From f964e2fd37290823fe2292b23a73739e210b5fb8 Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Tue, 12 May 2026 02:11:10 -0500 Subject: [PATCH] fix(snapshot): cap workspace size at 2 GB before first side-repo init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Users reported running `deepseek-tui` inside project directories with hundreds of GB of content — ML datasets, model weights (`.safetensors`, `.gguf`, `.pt`, `.onnx`), Docker image dumps, parquet / arrow caches, anything that falls outside the snapshot built-in excludes. The pre/post-turn snapshot path called `SnapshotRepo::open_or_init` which initialized the side git repo and then ran `git add -A` — which walked the entire workspace indexing every file. On a 100-300 GB directory this hung the TUI for minutes-to-hours while git churned through the index. The pre-existing v0.8.27 fixes (#1112: retention cap, mid-session prune, expanded built-in excludes) addressed the orthogonal "snapshots grow unbounded over many turns" angle but did nothing to prevent the first snapshot from being impossible to take. This change adds `estimate_workspace_size_bounded()` — a bounded `ignore::WalkBuilder` walk that respects `.gitignore` and the snapshot module's existing skip list (`node_modules/`, `target/`, `.next/`, `.venv/`, `__pycache__/`, etc.). The walk early-exits at either the byte cap or 200,000 file entries, returning `None` to signal "too big to snapshot." `SnapshotRepo::open_or_init_with_cap(workspace, cap_bytes)` calls the estimator *before* the side `git init`, and returns `Err(InvalidInput)` with a "workspace too large" reason — which `turn::snapshot_with_label` already logs at WARN and continues past, so a too-large workspace silently disables snapshots without blocking any turn. The check is paid only on first init; subsequent snapshots through the existing side repo skip it. Plumbing: - `SnapshotsConfig.max_workspace_gb` (default 2, `0` disables) - `EngineConfig.snapshots_max_workspace_bytes` resolved at engine construction from `config.snapshots_config().max_workspace_gb` - `pre_turn_snapshot` / `post_turn_snapshot` / `pre_tool_snapshot` take a `cap_bytes: u64` argument threaded from the engine - `SnapshotRepo::open_or_init` retains its v0.8.31 signature as a thin wrapper over `open_or_init_with_cap` using the default cap - `config.example.toml` documents the new `max_workspace_gb` knob with the "set to 0 to disable" escape hatch for users with legitimate large monorepos Six new tests pin both the estimator (under-cap returns Some, over-cap returns None, builtin-excluded dirs skipped, cap=0 disables the bound) and the `open_or_init_with_cap` integration (oversized workspace fails with the right error and references the config knob; cap=0 succeeds even on oversized content). Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 20 ++ config.example.toml | 9 + crates/tui/src/config.rs | 13 ++ crates/tui/src/core/engine.rs | 16 +- crates/tui/src/core/engine/turn_loop.rs | 3 +- crates/tui/src/core/turn.rs | 20 +- crates/tui/src/main.rs | 4 + crates/tui/src/runtime_threads.rs | 5 + crates/tui/src/snapshot/mod.rs | 5 +- crates/tui/src/snapshot/repo.rs | 235 ++++++++++++++++++++++++ crates/tui/src/tui/ui.rs | 4 + 11 files changed, 321 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a40e711c..6706dfad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,26 @@ real world uses." ### Fixed +- **Snapshots no longer try to index a multi-hundred-GB workspace + on first turn.** Reported by users running `deepseek-tui` inside + project directories with hundreds of GB of content — datasets, + model weights (`.safetensors`, `.gguf`, `.pt`), Docker image + dumps, parquet / arrow caches — where the side-git snapshot + initialization would hang the TUI for minutes or hours while + `git add -A` walked the workspace. v0.8.32 adds a default + 2 GB ceiling on non-excluded workspace content (measured before + any git work, walking the same excludes the snapshot path + already honors). When the cap is exceeded the side repo isn't + initialized; subsequent snapshots are skipped with a clear + WARN-level log line referencing the new + `[snapshots] max_workspace_gb` config knob users can raise (or + set to `0` to disable the cap entirely and restore v0.8.31 + behaviour). The bounded estimator also early-exits past 200k + file entries, so a workspace full of tiny files trips the cap + before paying for a full walk. Pre-existing v0.8.27 fixes for + the growth-over-time angle (#1112: retention cap, mid-session + prune, expanded built-in excludes) continue to apply; this + closes the orthogonal "snapshots-too-big-to-start" path. - **Toast stack overlay no longer renders on top of the composer input** (harvested from PR #1485 by **@MeAiRobot**). When a deferred tool's schema auto-loaded after the model requested diff --git a/config.example.toml b/config.example.toml index ebe710cb..ccd89ac8 100644 --- a/config.example.toml +++ b/config.example.toml @@ -452,6 +452,15 @@ default_text_model = "deepseek-ai/deepseek-v4-pro" # [snapshots] # enabled = true # Snapshot workspace pre/post each turn for /restore # max_age_days = 7 # Older snapshots pruned at session start +# max_workspace_gb = 2 # Snapshots self-disable on first init when the +# # non-excluded workspace exceeds this size in GB +# # (v0.8.32). Default 2 GB protects against running +# # deepseek-tui in directories with hundreds of GB +# # of datasets / model weights / docker dumps where +# # `git add -A` would hang the TUI for hours. Set +# # to 0 to disable the cap (v0.8.31 behaviour); +# # raise to a higher number for legitimate large +# # monorepos. # ───────────────────────────────────────────────────────────────────────────────── # LSP Diagnostics (post-edit) (#136) diff --git a/crates/tui/src/config.rs b/crates/tui/src/config.rs index 526bc68e..2caeacfa 100644 --- a/crates/tui/src/config.rs +++ b/crates/tui/src/config.rs @@ -464,6 +464,10 @@ fn default_snapshot_max_age_days() -> u64 { crate::snapshot::DEFAULT_MAX_AGE.as_secs() / (24 * 60 * 60) } +fn default_snapshot_max_workspace_gb() -> u64 { + crate::snapshot::DEFAULT_MAX_WORKSPACE_BYTES_FOR_SNAPSHOT / (1024 * 1024 * 1024) +} + /// Workspace side-git snapshot configuration (#137). #[derive(Debug, Clone, Deserialize)] pub struct SnapshotsConfig { @@ -473,6 +477,14 @@ pub struct SnapshotsConfig { /// Prune side-git snapshots older than this many days at session boot. #[serde(default = "default_snapshot_max_age_days")] pub max_age_days: u64, + /// Maximum non-excluded workspace size (in GB) before the snapshot + /// feature self-disables on first use. Set to `0` to disable the cap + /// and snapshot regardless of size (the v0.8.31 behavior). The walk + /// honors `.gitignore` and the snapshot module's built-in excludes + /// (`node_modules/`, `target/`, ...) so the measured size reflects + /// what would actually land in a snapshot commit. + #[serde(default = "default_snapshot_max_workspace_gb")] + pub max_workspace_gb: u64, } impl Default for SnapshotsConfig { @@ -480,6 +492,7 @@ impl Default for SnapshotsConfig { Self { enabled: default_snapshots_enabled(), max_age_days: default_snapshot_max_age_days(), + max_workspace_gb: default_snapshot_max_workspace_gb(), } } } diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index 9d1509ad..aadef244 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -132,6 +132,10 @@ pub struct EngineConfig { pub network_policy: Option, /// Whether to take side-git workspace snapshots before/after each turn. pub snapshots_enabled: bool, + /// Maximum workspace size (in bytes) before snapshots self-disable on + /// first init. `0` disables the cap. Resolved from + /// `[snapshots] max_workspace_gb` × 1 GB at engine construction. + pub snapshots_max_workspace_bytes: u64, /// Post-edit LSP diagnostics injection (#136). When `None`, the engine /// constructs a disabled manager so the field is always present. pub lsp_config: Option, @@ -188,6 +192,8 @@ impl Default for EngineConfig { max_spawn_depth: crate::tools::subagent::DEFAULT_MAX_SPAWN_DEPTH, network_policy: None, snapshots_enabled: true, + snapshots_max_workspace_bytes: + crate::snapshot::DEFAULT_MAX_WORKSPACE_BYTES_FOR_SNAPSHOT, lsp_config: None, runtime_services: RuntimeToolServices::default(), subagent_model_overrides: HashMap::new(), @@ -936,8 +942,11 @@ impl Engine { if self.config.snapshots_enabled { let pre_workspace = self.session.workspace.clone(); let pre_seq = self.turn_counter; - let _ = tokio::task::spawn_blocking(move || pre_turn_snapshot(&pre_workspace, pre_seq)) - .await; + let pre_cap = self.config.snapshots_max_workspace_bytes; + let _ = tokio::task::spawn_blocking(move || { + pre_turn_snapshot(&pre_workspace, pre_seq, pre_cap) + }) + .await; } // A new turn means any leftover retry banner (success cleared @@ -1156,8 +1165,9 @@ impl Engine { if self.config.snapshots_enabled { let post_workspace = self.session.workspace.clone(); let post_seq = self.turn_counter; + let post_cap = self.config.snapshots_max_workspace_bytes; crate::utils::spawn_blocking_supervised("post-turn-snapshot", move || { - post_turn_snapshot(&post_workspace, post_seq); + post_turn_snapshot(&post_workspace, post_seq, post_cap); }); } } diff --git a/crates/tui/src/core/engine/turn_loop.rs b/crates/tui/src/core/engine/turn_loop.rs index f7b767c8..63e63b45 100644 --- a/crates/tui/src/core/engine/turn_loop.rs +++ b/crates/tui/src/core/engine/turn_loop.rs @@ -1586,8 +1586,9 @@ impl Engine { { let ws = self.session.workspace.clone(); let tid = tool_id.clone(); + let cap = self.config.snapshots_max_workspace_bytes; let _ = tokio::task::spawn_blocking(move || { - crate::core::turn::pre_tool_snapshot(&ws, &tid) + crate::core::turn::pre_tool_snapshot(&ws, &tid, cap) }) .await; } diff --git a/crates/tui/src/core/turn.rs b/crates/tui/src/core/turn.rs index b887b345..c845adb3 100644 --- a/crates/tui/src/core/turn.rs +++ b/crates/tui/src/core/turn.rs @@ -130,10 +130,14 @@ fn add_optional_usage(total: Option, delta: Option) -> Option { /// Take a `pre-turn:` workspace snapshot. /// +/// `cap_bytes` is the workspace-size ceiling that gates first-init +/// (passed through to [`SnapshotRepo::open_or_init_with_cap`]); pass +/// `0` to disable the cap. +/// /// Returns the snapshot SHA on success, `None` on any error. Errors are /// logged at WARN; the turn loop must not block on this. -pub fn pre_turn_snapshot(workspace: &Path, turn_seq: u64) -> Option { - snapshot_with_label(workspace, &format!("pre-turn:{turn_seq}")) +pub fn pre_turn_snapshot(workspace: &Path, turn_seq: u64, cap_bytes: u64) -> Option { + snapshot_with_label(workspace, &format!("pre-turn:{turn_seq}"), cap_bytes) } /// Take a `tool:` workspace snapshot, taken before executing a @@ -144,18 +148,18 @@ pub fn pre_turn_snapshot(workspace: &Path, turn_seq: u64) -> Option { /// /// Returns the snapshot SHA on success, `None` on any error. Errors are /// logged at WARN and are non-fatal. -pub fn pre_tool_snapshot(workspace: &Path, call_id: &str) -> Option { - snapshot_with_label(workspace, &format!("tool:{call_id}")) +pub fn pre_tool_snapshot(workspace: &Path, call_id: &str, cap_bytes: u64) -> Option { + snapshot_with_label(workspace, &format!("tool:{call_id}"), cap_bytes) } /// Take a `post-turn:` workspace snapshot. Same failure model as /// [`pre_turn_snapshot`]. -pub fn post_turn_snapshot(workspace: &Path, turn_seq: u64) -> Option { - snapshot_with_label(workspace, &format!("post-turn:{turn_seq}")) +pub fn post_turn_snapshot(workspace: &Path, turn_seq: u64, cap_bytes: u64) -> Option { + snapshot_with_label(workspace, &format!("post-turn:{turn_seq}"), cap_bytes) } -fn snapshot_with_label(workspace: &Path, label: &str) -> Option { - match SnapshotRepo::open_or_init(workspace) { +fn snapshot_with_label(workspace: &Path, label: &str, cap_bytes: u64) -> Option { + match SnapshotRepo::open_or_init_with_cap(workspace, cap_bytes) { Ok(repo) => match repo.snapshot(label) { Ok(id) => Some(id.0), Err(e) => { diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs index f7104d90..02ba2e19 100644 --- a/crates/tui/src/main.rs +++ b/crates/tui/src/main.rs @@ -4441,6 +4441,10 @@ async fn run_exec_agent( max_spawn_depth: crate::tools::subagent::DEFAULT_MAX_SPAWN_DEPTH, network_policy, snapshots_enabled: config.snapshots_config().enabled, + snapshots_max_workspace_bytes: config + .snapshots_config() + .max_workspace_gb + .saturating_mul(1024 * 1024 * 1024), lsp_config, runtime_services: crate::tools::spec::RuntimeToolServices::default(), subagent_model_overrides: config.subagent_model_overrides(), diff --git a/crates/tui/src/runtime_threads.rs b/crates/tui/src/runtime_threads.rs index de659163..742ef440 100644 --- a/crates/tui/src/runtime_threads.rs +++ b/crates/tui/src/runtime_threads.rs @@ -1946,6 +1946,11 @@ impl RuntimeThreadManager { max_spawn_depth: crate::tools::subagent::DEFAULT_MAX_SPAWN_DEPTH, network_policy, snapshots_enabled: self.config.snapshots_config().enabled, + snapshots_max_workspace_bytes: self + .config + .snapshots_config() + .max_workspace_gb + .saturating_mul(1024 * 1024 * 1024), lsp_config, runtime_services: crate::tools::spec::RuntimeToolServices { task_manager: self.task_manager.lock().ok().and_then(|slot| slot.clone()), diff --git a/crates/tui/src/snapshot/mod.rs b/crates/tui/src/snapshot/mod.rs index 69550cda..51ea6419 100644 --- a/crates/tui/src/snapshot/mod.rs +++ b/crates/tui/src/snapshot/mod.rs @@ -41,4 +41,7 @@ pub mod repo; pub use paths::{snapshot_dir_for, snapshot_git_dir}; pub use prune::{DEFAULT_MAX_AGE, prune_older_than}; #[allow(unused_imports)] -pub use repo::{Snapshot, SnapshotId, SnapshotRepo}; +pub use repo::{ + DEFAULT_MAX_WORKSPACE_BYTES_FOR_SNAPSHOT, Snapshot, SnapshotId, SnapshotRepo, + estimate_workspace_size_bounded, +}; diff --git a/crates/tui/src/snapshot/repo.rs b/crates/tui/src/snapshot/repo.rs index 296b94ca..d1c2cfae 100644 --- a/crates/tui/src/snapshot/repo.rs +++ b/crates/tui/src/snapshot/repo.rs @@ -59,6 +59,61 @@ const MAX_SNAPSHOT_SIZE_MB: u64 = 500; /// so the repo doesn't hit the limit again one snapshot later. const PRUNE_TARGET_MB: u64 = 400; +/// Default workspace-size ceiling above which snapshots self-disable +/// on first use (2 GB of non-excluded content). Reports from users with +/// multi-hundred-GB project directories — datasets, model weights, +/// docker image dumps that fall outside the built-in excludes — +/// surfaced that `git add -A` on first init would hang the TUI for +/// minutes-to-hours while indexing the workspace. Snapshots are a +/// rollback safety net, not a backup tool; bailing out on workspaces +/// that big is the right tradeoff. Users with legitimate large +/// monorepos can raise `[snapshots] max_workspace_gb` (or set it to +/// `0` to disable the cap entirely). +pub const DEFAULT_MAX_WORKSPACE_BYTES_FOR_SNAPSHOT: u64 = 2 * 1024 * 1024 * 1024; + +/// Hard cap on the number of file entries the bounded size estimator +/// will inspect before declaring the workspace "too large". Protects +/// against a workspace with millions of tiny files (no individual +/// file is large, but `git add -A` would still take forever). +const SIZE_WALK_MAX_ENTRIES: usize = 200_000; + +/// Top-level directory and extension patterns that the snapshot path +/// already excludes via `BUILTIN_EXCLUDES`. The estimator skips these +/// up front so the size walk reflects what would actually land in the +/// snapshot commit. Kept narrow to common build-output dirs — anything +/// else falls back to the `.gitignore` filter. +const SIZE_WALK_SKIP_DIRS: &[&str] = &[ + "node_modules", + "target", + "dist", + "build", + ".build", + ".next", + ".nuxt", + ".svelte-kit", + ".turbo", + ".parcel-cache", + "vendor", + ".cargo", + ".rustup", + ".npm", + ".bun", + ".yarn", + ".pnpm-store", + ".cache", + ".venv", + "venv", + ".tox", + "__pycache__", + ".mypy_cache", + ".pytest_cache", + ".ruff_cache", + ".gradle", + ".m2", + ".local", + ".git", +]; + const BUILTIN_EXCLUDES: &str = "\ # DeepSeek TUI built-in snapshot exclusions node_modules/ @@ -136,6 +191,18 @@ impl SnapshotRepo { /// the user's global git identity (we don't want our snapshots to /// look like they came from the user). pub fn open_or_init(workspace: &Path) -> io::Result { + Self::open_or_init_with_cap(workspace, DEFAULT_MAX_WORKSPACE_BYTES_FOR_SNAPSHOT) + } + + /// Variant of [`Self::open_or_init`] that accepts an explicit + /// workspace-size cap. `cap_bytes = 0` disables the cap entirely + /// (always snapshot, regardless of size). + /// + /// When the workspace exceeds the cap and the side repo hasn't + /// been initialized yet, returns `Err(InvalidInput)` with a + /// "workspace too large" reason. Subsequent calls (after the user + /// shrinks the workspace or raises the cap via config) succeed. + pub fn open_or_init_with_cap(workspace: &Path, cap_bytes: u64) -> io::Result { let work_tree = workspace .canonicalize() .unwrap_or_else(|_| workspace.to_path_buf()); @@ -156,6 +223,24 @@ impl SnapshotRepo { let needs_init = !git_dir.exists(); if needs_init { + // First-init size guard. Skipping this on subsequent opens + // is intentional: paying a workspace walk on every snapshot + // would defeat the purpose of the cap, and a workspace + // that fit on first init is allowed to grow within the + // existing repo's `MAX_SNAPSHOT_SIZE_MB` budget. Users on + // workspaces that grew past the cap mid-session get the + // existing aggressive-pruning path in `snapshot()`. + if estimate_workspace_size_bounded(&work_tree, cap_bytes).is_none() { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + format!( + "workspace too large for snapshots (over {} GB of non-excluded content or > {} entries): {}\n raise `[snapshots] max_workspace_gb` in config.toml (or set it to 0 to disable the cap) if you want snapshots on this workspace.", + cap_bytes / (1024 * 1024 * 1024), + SIZE_WALK_MAX_ENTRIES, + work_tree.display() + ), + )); + } let parent = git_dir.parent().ok_or_else(|| { io::Error::new(io::ErrorKind::InvalidInput, "snapshot dir has no parent") })?; @@ -658,6 +743,52 @@ fn io_other(msg: impl Into) -> io::Error { io::Error::other(msg.into()) } +/// Walk `workspace` and accumulate file sizes, returning `Some(total)` +/// when the workspace fits under `cap_bytes` and `None` when the walk +/// exceeds the cap. Honors `.gitignore` (via the `ignore` crate's +/// `WalkBuilder` defaults) and the snapshot-specific skip list above, +/// so the measured size reflects what would actually land in a +/// snapshot commit rather than the raw `du -sh` total. +/// +/// The walk is bounded by both `cap_bytes` and +/// [`SIZE_WALK_MAX_ENTRIES`] — either trip returns `None`. A +/// `cap_bytes` of `0` disables the cap entirely (returns `Some(total)` +/// no matter how large), so config can opt out. +pub fn estimate_workspace_size_bounded(workspace: &Path, cap_bytes: u64) -> Option { + use ignore::WalkBuilder; + let mut total: u64 = 0; + let mut entries: usize = 0; + let skip: HashSet<&'static str> = SIZE_WALK_SKIP_DIRS.iter().copied().collect(); + let walker = WalkBuilder::new(workspace) + .hidden(false) + .follow_links(false) + .filter_entry(move |entry| { + // Skip the well-known build-output directories at any depth. + // The `ignore` crate calls `filter_entry` once per dir/file; + // returning `false` here prunes the whole subtree. + entry + .file_name() + .to_str() + .is_none_or(|name| !skip.contains(name)) + }) + .build(); + for entry in walker.flatten() { + entries += 1; + if entries > SIZE_WALK_MAX_ENTRIES { + return None; + } + if let Ok(meta) = entry.metadata() + && meta.is_file() + { + total = total.saturating_add(meta.len()); + if cap_bytes > 0 && total > cap_bytes { + return None; + } + } + } + Some(total) +} + fn unsafe_workspace_snapshot_reason(workspace: &Path, home: Option<&Path>) -> Option<&'static str> { let workspace = normalize_path_for_safety(workspace); if is_filesystem_root(&workspace) { @@ -1126,4 +1257,108 @@ mod tests { let id = repo.snapshot("pre-turn:1").expect("snapshot under cap"); assert_eq!(id.as_str().len(), 40); } + + #[test] + fn estimate_workspace_size_bounded_returns_total_when_under_cap() { + let tmp = tempdir().unwrap(); + let workspace = tmp.path().join("workspace"); + std::fs::create_dir_all(&workspace).unwrap(); + std::fs::write(workspace.join("a.txt"), vec![b'a'; 100]).unwrap(); + std::fs::write(workspace.join("b.txt"), vec![b'b'; 50]).unwrap(); + let total = estimate_workspace_size_bounded(&workspace, 10_000) + .expect("under-cap walk must return Some"); + assert!( + total >= 150, + "total ({total}) must include both files (≥150 bytes)" + ); + } + + #[test] + fn estimate_workspace_size_bounded_returns_none_when_over_cap() { + let tmp = tempdir().unwrap(); + let workspace = tmp.path().join("workspace"); + std::fs::create_dir_all(&workspace).unwrap(); + // Two 1 KB files, cap at 1 KB — second file should trip the cap. + std::fs::write(workspace.join("a.bin"), vec![b'a'; 1024]).unwrap(); + std::fs::write(workspace.join("b.bin"), vec![b'b'; 1024]).unwrap(); + assert!( + estimate_workspace_size_bounded(&workspace, 1024).is_none(), + "over-cap walk must return None for early bailout" + ); + } + + #[test] + fn estimate_workspace_size_bounded_skips_builtin_excluded_dirs() { + let tmp = tempdir().unwrap(); + let workspace = tmp.path().join("workspace"); + std::fs::create_dir_all(workspace.join("node_modules")).unwrap(); + std::fs::create_dir_all(workspace.join("target")).unwrap(); + std::fs::create_dir_all(workspace.join("src")).unwrap(); + // 2 MB of "build output" in excluded dirs — must not count toward + // the cap. + std::fs::write(workspace.join("node_modules/big.bin"), vec![0u8; 1_000_000]).unwrap(); + std::fs::write(workspace.join("target/big.bin"), vec![0u8; 1_000_000]).unwrap(); + std::fs::write(workspace.join("src/lib.rs"), b"// real source").unwrap(); + let total = estimate_workspace_size_bounded(&workspace, 500_000) + .expect("walk must succeed since real source is tiny"); + assert!( + total < 1_000, + "total ({total}) must reflect only src/, not node_modules/ or target/" + ); + } + + #[test] + fn estimate_workspace_size_bounded_cap_zero_disables_cap() { + let tmp = tempdir().unwrap(); + let workspace = tmp.path().join("workspace"); + std::fs::create_dir_all(&workspace).unwrap(); + // 10 KB file — would trip a 1 KB cap, but cap=0 means no cap. + std::fs::write(workspace.join("big.bin"), vec![0u8; 10 * 1024]).unwrap(); + let total = + estimate_workspace_size_bounded(&workspace, 0).expect("cap=0 must always return Some"); + assert!( + total >= 10 * 1024, + "total ({total}) must include the 10 KB file when cap is disabled" + ); + } + + #[test] + fn open_or_init_with_cap_rejects_oversized_workspace() { + let tmp = tempdir().unwrap(); + let workspace = tmp.path().join("workspace"); + std::fs::create_dir_all(&workspace).unwrap(); + let _home = scoped_home(tmp.path()); + // Drop a 4 KB file under a 1 KB cap. + std::fs::write(workspace.join("big.bin"), vec![0u8; 4096]).unwrap(); + let outcome = SnapshotRepo::open_or_init_with_cap(&workspace, 1024); + let err = match outcome { + Ok(_) => panic!("oversized workspace must fail open_or_init_with_cap"), + Err(e) => e, + }; + let msg = err.to_string(); + assert!( + msg.contains("workspace too large for snapshots"), + "error must call out the size cap; got: {msg}" + ); + assert!( + msg.contains("max_workspace_gb"), + "error must reference the config knob users can raise; got: {msg}" + ); + } + + #[test] + fn open_or_init_with_cap_zero_disables_size_check() { + let tmp = tempdir().unwrap(); + let workspace = tmp.path().join("workspace"); + std::fs::create_dir_all(&workspace).unwrap(); + let _home = scoped_home(tmp.path()); + // 4 KB file but cap=0 → should still succeed. + std::fs::write(workspace.join("big.bin"), vec![0u8; 4096]).unwrap(); + let repo = SnapshotRepo::open_or_init_with_cap(&workspace, 0) + .expect("cap=0 must skip the size check"); + let id = repo + .snapshot("pre-turn:1") + .expect("snapshot under disabled cap"); + assert_eq!(id.as_str().len(), 40); + } } diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs index de960ab6..10fa3e6e 100644 --- a/crates/tui/src/tui/ui.rs +++ b/crates/tui/src/tui/ui.rs @@ -616,6 +616,10 @@ fn build_engine_config(app: &App, config: &Config) -> EngineConfig { crate::network_policy::NetworkPolicyDecider::with_default_audit(toml_cfg.into_runtime()) }), snapshots_enabled: config.snapshots_config().enabled, + snapshots_max_workspace_bytes: config + .snapshots_config() + .max_workspace_gb + .saturating_mul(1024 * 1024 * 1024), lsp_config: config .lsp .clone()