diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs index 88316221..c8a53d7b 100644 --- a/crates/tui/src/main.rs +++ b/crates/tui/src/main.rs @@ -47,6 +47,7 @@ mod sandbox; mod session_manager; mod settings; mod skills; +mod snapshot; mod task_manager; #[cfg(test)] mod test_support; diff --git a/crates/tui/src/snapshot/mod.rs b/crates/tui/src/snapshot/mod.rs new file mode 100644 index 00000000..82b0ab61 --- /dev/null +++ b/crates/tui/src/snapshot/mod.rs @@ -0,0 +1,42 @@ +//! Workspace snapshots — pre/post-turn safety net. +//! +//! Each turn the engine takes a `pre-turn:` snapshot of the user's +//! workspace into a side git repo at +//! `~/.deepseek/snapshots///.git`, then a +//! matching `post-turn:` snapshot when the turn finishes. Users +//! can roll back via `/restore N` (slash command) or, when the model +//! recognises an "undo my last edit" intent, the `revert_turn` tool. +//! +//! ## Why a side repo? +//! +//! - The user's own `.git` is never touched. `--git-dir` and +//! `--work-tree` are *always* set together when we shell out to git; +//! that single invariant is what keeps snapshots and the user's repo +//! completely independent. +//! - Workspaces without git still get snapshots. +//! - `git`'s own deduplication (object packfiles) keeps the disk +//! footprint tractable — typical 100 MB workspace × 12 turns ≈ 1.2 GB +//! uncompressed but git's content-addressed storage usually brings +//! that down 10-30×. We mitigate further with: +//! - 7-day default retention (`session_manager` prunes at session +//! start via [`prune::prune_older_than`]). +//! - `gc.auto = 0` on the side repo (we don't want background gcs +//! firing mid-turn) plus an explicit `git gc --prune=now` after +//! prune. +//! +//! ## Failure model +//! +//! Pre/post-turn snapshot calls are **non-fatal**. If `git` is missing, +//! the disk is full, or the workspace is on a read-only filesystem, the +//! turn proceeds and the engine logs a warning. The snapshot is a +//! safety net, not a correctness gate. + +pub mod paths; +pub mod prune; +pub mod repo; + +#[allow(unused_imports)] +pub use paths::{snapshot_dir_for, snapshot_git_dir}; +pub use prune::{DEFAULT_MAX_AGE, prune_older_than}; +#[allow(unused_imports)] +pub use repo::{Snapshot, SnapshotId, SnapshotRepo}; diff --git a/crates/tui/src/snapshot/paths.rs b/crates/tui/src/snapshot/paths.rs new file mode 100644 index 00000000..9fe5e353 --- /dev/null +++ b/crates/tui/src/snapshot/paths.rs @@ -0,0 +1,139 @@ +//! Path resolution for the per-workspace snapshot side-repos. +//! +//! Snapshots live in `~/.deepseek/snapshots///`. +//! The two-level hash split lets us snapshot multiple worktrees of the same +//! project independently — `git worktree list` users won't get cross-talk +//! between feature branches. + +use std::collections::hash_map::DefaultHasher; +use std::hash::{Hash, Hasher}; +use std::io; +use std::path::{Path, PathBuf}; + +/// Compute the snapshot directory for a given workspace path. +/// +/// Returns `~/.deepseek/snapshots///`. The +/// caller is responsible for creating it on disk; we purposefully don't +/// touch the filesystem here so this is cheap to call repeatedly. +/// +/// The `project_hash` is derived from the canonicalized workspace path +/// after stripping any `.worktrees/` suffix — multiple worktrees +/// of the same repo share the same `project_hash` so users can browse +/// snapshots cross-worktree if they want, but the `worktree_hash` keeps +/// commits isolated by default. +pub fn snapshot_dir_for(workspace: &Path) -> PathBuf { + snapshot_dir_with_home(workspace, dirs::home_dir()) +} + +/// Same as [`snapshot_dir_for`] but with an injectable home directory. +/// Used by tests so we never touch the user's real `~/.deepseek/`. +pub fn snapshot_dir_with_home(workspace: &Path, home: Option) -> PathBuf { + let home = home.unwrap_or_else(|| PathBuf::from(".")); + let canonical = workspace + .canonicalize() + .unwrap_or_else(|_| workspace.to_path_buf()); + let project_root = strip_worktree_suffix(&canonical); + let project_hash = stable_hex(&project_root); + let worktree_hash = stable_hex(&canonical); + home.join(".deepseek") + .join("snapshots") + .join(project_hash) + .join(worktree_hash) +} + +/// Resolve the `.git` directory inside the snapshot dir. +pub fn snapshot_git_dir(workspace: &Path) -> PathBuf { + snapshot_dir_for(workspace).join(".git") +} + +/// Ensure the snapshot dir exists on disk and return its path. +pub fn ensure_snapshot_dir(workspace: &Path) -> io::Result { + let dir = snapshot_dir_for(workspace); + std::fs::create_dir_all(&dir)?; + Ok(dir) +} + +/// Strip a trailing `.worktrees/` segment so all worktrees of the +/// same checkout share a `project_hash`. If the path doesn't look like a +/// worktree it's returned unchanged. +fn strip_worktree_suffix(path: &Path) -> PathBuf { + let mut components: Vec<_> = path.components().collect(); + if components.len() >= 2 + && let Some(parent) = components.get(components.len() - 2) + && parent.as_os_str() == ".worktrees" + { + components.truncate(components.len() - 2); + let mut p = PathBuf::new(); + for c in components { + p.push(c.as_os_str()); + } + return p; + } + path.to_path_buf() +} + +/// Hex-encoded `DefaultHasher` digest. Sufficient for directory naming +/// (collision risk is negligible for the small set of paths we care +/// about, and we'd rather not pull in `sha2` for a 16-byte tag). +fn stable_hex(path: &Path) -> String { + let mut hasher = DefaultHasher::new(); + path.hash(&mut hasher); + format!("{:016x}", hasher.finish()) +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::tempdir; + + #[test] + fn snapshot_dir_layout_two_levels_under_deepseek() { + let tmp = tempdir().expect("tempdir"); + let dir = snapshot_dir_with_home(tmp.path(), Some(tmp.path().to_path_buf())); + let mut iter = dir.strip_prefix(tmp.path()).unwrap().components(); + assert_eq!(iter.next().unwrap().as_os_str(), ".deepseek"); + assert_eq!(iter.next().unwrap().as_os_str(), "snapshots"); + assert!(iter.next().is_some()); // project_hash + assert!(iter.next().is_some()); // worktree_hash + assert!(iter.next().is_none()); + } + + #[test] + fn worktree_suffix_stripped_for_project_hash() { + let tmp = tempdir().expect("tempdir"); + let main_path = tmp.path().join("repo"); + let wt_path = tmp.path().join("repo").join(".worktrees").join("featX"); + std::fs::create_dir_all(&main_path).unwrap(); + std::fs::create_dir_all(&wt_path).unwrap(); + + let main_dir = snapshot_dir_with_home(&main_path, Some(tmp.path().to_path_buf())); + let wt_dir = snapshot_dir_with_home(&wt_path, Some(tmp.path().to_path_buf())); + + // Same project_hash (parent component before the worktree-specific tail). + let main_components: Vec<_> = main_dir.components().collect(); + let wt_components: Vec<_> = wt_dir.components().collect(); + assert_eq!( + main_components[main_components.len() - 2], + wt_components[wt_components.len() - 2], + "worktrees should share project_hash", + ); + // But different worktree_hash (the tail). + assert_ne!(main_components.last(), wt_components.last()); + } + + #[test] + fn ensure_snapshot_dir_creates_path() { + let tmp = tempdir().expect("tempdir"); + // Use scoped HOME so we don't pollute the real one. + let dir = snapshot_dir_with_home(tmp.path(), Some(tmp.path().to_path_buf())); + std::fs::create_dir_all(&dir).unwrap(); + assert!(dir.exists()); + } + + #[test] + fn snapshot_git_dir_appends_dot_git() { + let tmp = tempdir().expect("tempdir"); + let git_dir = snapshot_git_dir(tmp.path()); + assert_eq!(git_dir.file_name().unwrap(), ".git"); + } +} diff --git a/crates/tui/src/snapshot/prune.rs b/crates/tui/src/snapshot/prune.rs new file mode 100644 index 00000000..3ad67893 --- /dev/null +++ b/crates/tui/src/snapshot/prune.rs @@ -0,0 +1,91 @@ +//! Boot-time snapshot pruning. +//! +//! Called from `session_manager` once per session start. Failure is +//! never fatal — old snapshots taking disk space is annoying but not +//! correctness-breaking, so we log and move on. + +use std::io; +use std::path::Path; +use std::time::Duration; + +use super::paths::snapshot_git_dir; +use super::repo::SnapshotRepo; + +/// Default snapshot retention window: 7 days. +pub const DEFAULT_MAX_AGE: Duration = Duration::from_secs(7 * 24 * 60 * 60); + +/// Prune snapshots older than `max_age` for the given workspace. +/// +/// If no snapshot repo exists yet (first run) this is a cheap no-op. +/// Returns the number of snapshots removed. +pub fn prune_older_than(workspace: &Path, max_age: Duration) -> io::Result { + let git_dir = snapshot_git_dir(workspace); + if !git_dir.exists() { + return Ok(0); + } + let repo = SnapshotRepo::open_or_init(workspace)?; + repo.prune_older_than(max_age) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::test_support::lock_test_env; + use std::sync::MutexGuard; + use tempfile::tempdir; + + /// Same guard shape as in `repo::tests` — pins HOME for the lifetime + /// of one test under the process-wide env mutex. + struct ScopedHome { + prev: Option, + _guard: MutexGuard<'static, ()>, + } + impl Drop for ScopedHome { + fn drop(&mut self) { + // SAFETY: process-wide lock still held. + unsafe { + match self.prev.take() { + Some(v) => std::env::set_var("HOME", v), + None => std::env::remove_var("HOME"), + } + } + } + } + fn scoped_home(home: &std::path::Path) -> ScopedHome { + let guard = lock_test_env(); + let prev = std::env::var_os("HOME"); + // SAFETY: serialised by the global env lock. + unsafe { + std::env::set_var("HOME", home); + } + ScopedHome { + prev, + _guard: guard, + } + } + + #[test] + fn prune_no_repo_returns_zero() { + let tmp = tempdir().unwrap(); + let _home = scoped_home(tmp.path()); + let removed = prune_older_than(tmp.path(), DEFAULT_MAX_AGE).unwrap(); + assert_eq!(removed, 0); + } + + #[test] + fn prune_with_existing_repo_zero_age_clears_all() { + let tmp = tempdir().unwrap(); + let _home = scoped_home(tmp.path()); + let workspace = tmp.path().join("ws"); + std::fs::create_dir_all(&workspace).unwrap(); + let repo = SnapshotRepo::open_or_init(&workspace).unwrap(); + std::fs::write(workspace.join("f.txt"), "x").unwrap(); + repo.snapshot("turn:0").unwrap(); + + // Same-second flake guard: see `repo::tests`. + std::thread::sleep(Duration::from_millis(1100)); + + let removed = prune_older_than(&workspace, Duration::from_secs(0)).unwrap(); + assert!(removed >= 1); + } +} diff --git a/crates/tui/src/snapshot/repo.rs b/crates/tui/src/snapshot/repo.rs new file mode 100644 index 00000000..8f34de6f --- /dev/null +++ b/crates/tui/src/snapshot/repo.rs @@ -0,0 +1,492 @@ +//! Side-git repository wrapper for workspace snapshots. +//! +//! `SnapshotRepo` shells out to the system `git` binary (we deliberately +//! avoid `git2` to dodge its LGPL surface). The two paths that matter: +//! +//! - `git_dir` → `~/.deepseek/snapshots///.git` +//! - `work_tree` → the user's actual workspace +//! +//! Every git invocation passes both `--git-dir` AND `--work-tree`. That is +//! the single biggest safety mechanism: it guarantees we never accidentally +//! mutate the user's own `.git` directory. If git can't find the side +//! repo, the command fails fast instead of falling back to "current +//! directory". + +use std::io; +use std::path::{Path, PathBuf}; +use std::process::{Command, Output}; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; + +use super::paths::{ensure_snapshot_dir, snapshot_git_dir}; + +/// Identifier for a snapshot — currently the underlying git commit SHA. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct SnapshotId(pub String); + +impl SnapshotId { + /// Borrow the SHA as a string slice. + pub fn as_str(&self) -> &str { + &self.0 + } +} + +/// A single snapshot record (one row in `git log`). +#[derive(Debug, Clone)] +pub struct Snapshot { + /// Commit SHA inside the side repo. + pub id: SnapshotId, + /// Subject line — the label passed to [`SnapshotRepo::snapshot`]. + pub label: String, + /// Author timestamp (Unix seconds). + pub timestamp: i64, +} + +/// Wrapper around the per-workspace side-git repo. +pub struct SnapshotRepo { + git_dir: PathBuf, + work_tree: PathBuf, +} + +impl SnapshotRepo { + /// Open or initialize the snapshot repo for `workspace`. + /// + /// On first use this: + /// 1. Creates the `~/.deepseek/snapshots/<…>/.git` dir. + /// 2. Runs `git init --bare=false --quiet`. + /// 3. Sets a fixed `user.name` / `user.email` so commits don't pick up + /// the user's global git identity (we don't want our snapshots to + /// look like they came from the user). + pub fn open_or_init(workspace: &Path) -> io::Result { + let work_tree = workspace + .canonicalize() + .unwrap_or_else(|_| workspace.to_path_buf()); + + let _ = ensure_snapshot_dir(&work_tree)?; + let git_dir = snapshot_git_dir(&work_tree); + + let needs_init = !git_dir.exists(); + if needs_init { + let parent = git_dir.parent().ok_or_else(|| { + io::Error::new(io::ErrorKind::InvalidInput, "snapshot dir has no parent") + })?; + std::fs::create_dir_all(parent)?; + // `git init` here uses the parent directory as the work tree + // and stores metadata in `.git`. We then continue to use + // explicit `--git-dir` / `--work-tree` flags for every other + // command so behaviour is invariant of cwd. + let init = Command::new("git") + .arg("init") + .arg("--quiet") + .arg(parent) + .output() + .map_err(|e| io_other(format!("failed to spawn git init: {e}")))?; + if !init.status.success() { + return Err(io_other(format!( + "git init failed: {}", + String::from_utf8_lossy(&init.stderr).trim() + ))); + } + + // Pin a stable identity so snapshot commits are recognisable + // and don't bleed into the user's git config. + let _ = run_git( + &git_dir, + &work_tree, + &["config", "user.name", "deepseek-snapshots"], + ); + let _ = run_git( + &git_dir, + &work_tree, + &["config", "user.email", "snapshots@deepseek-tui.local"], + ); + // Don't auto-gc on every commit; we manage pruning ourselves. + let _ = run_git(&git_dir, &work_tree, &["config", "gc.auto", "0"]); + // Ignore CRLF rewriting — we want byte-for-byte fidelity. + let _ = run_git(&git_dir, &work_tree, &["config", "core.autocrlf", "false"]); + } + + Ok(Self { git_dir, work_tree }) + } + + /// Take a snapshot of the current working tree. + /// + /// Internally: `git add -A` then `git commit --allow-empty -m