feat(snapshot): #137 add workspace snapshot side-repo module
Introduce `crate::snapshot` — a per-workspace side-git repo that lives under `~/.deepseek/snapshots/<project_hash>/<worktree_hash>/.git` and captures the workspace into commits via `git add -A` + `git commit --allow-empty`. The user's own `.git` is never touched: every git invocation passes both `--git-dir` (side repo) and `--work-tree` (workspace) together, which is the load-bearing safety invariant. Module layout: - `paths.rs` — resolves the side-repo dir; strips `.worktrees/<name>` so worktrees of the same checkout share a project_hash but get distinct worktree_hashes. - `repo.rs` — `SnapshotRepo::open_or_init / snapshot / restore / list / prune_older_than`. Shells out to system `git` (avoids `git2` LGPL surface). Honors workspace `.gitignore` automatically. - `prune.rs` — boot-time helper used by session_manager (next commit). Default retention is 7 days. Tests (real `git` invocations on tempdirs, env-mutating tests serialised through the existing `crate::test_support::lock_test_env` mutex) cover: snapshot creates a commit in the side repo only, restore reverts files, list respects limit, prune drops aged commits, gitignore is honored, and re-init is idempotent. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -47,6 +47,7 @@ mod sandbox;
|
||||
mod session_manager;
|
||||
mod settings;
|
||||
mod skills;
|
||||
mod snapshot;
|
||||
mod task_manager;
|
||||
#[cfg(test)]
|
||||
mod test_support;
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
//! Workspace snapshots — pre/post-turn safety net.
|
||||
//!
|
||||
//! Each turn the engine takes a `pre-turn:<seq>` snapshot of the user's
|
||||
//! workspace into a side git repo at
|
||||
//! `~/.deepseek/snapshots/<project_hash>/<worktree_hash>/.git`, then a
|
||||
//! matching `post-turn:<seq>` snapshot when the turn finishes. Users
|
||||
//! can roll back via `/restore N` (slash command) or, when the model
|
||||
//! recognises an "undo my last edit" intent, the `revert_turn` tool.
|
||||
//!
|
||||
//! ## Why a side repo?
|
||||
//!
|
||||
//! - The user's own `.git` is never touched. `--git-dir` and
|
||||
//! `--work-tree` are *always* set together when we shell out to git;
|
||||
//! that single invariant is what keeps snapshots and the user's repo
|
||||
//! completely independent.
|
||||
//! - Workspaces without git still get snapshots.
|
||||
//! - `git`'s own deduplication (object packfiles) keeps the disk
|
||||
//! footprint tractable — typical 100 MB workspace × 12 turns ≈ 1.2 GB
|
||||
//! uncompressed but git's content-addressed storage usually brings
|
||||
//! that down 10-30×. We mitigate further with:
|
||||
//! - 7-day default retention (`session_manager` prunes at session
|
||||
//! start via [`prune::prune_older_than`]).
|
||||
//! - `gc.auto = 0` on the side repo (we don't want background gcs
|
||||
//! firing mid-turn) plus an explicit `git gc --prune=now` after
|
||||
//! prune.
|
||||
//!
|
||||
//! ## Failure model
|
||||
//!
|
||||
//! Pre/post-turn snapshot calls are **non-fatal**. If `git` is missing,
|
||||
//! the disk is full, or the workspace is on a read-only filesystem, the
|
||||
//! turn proceeds and the engine logs a warning. The snapshot is a
|
||||
//! safety net, not a correctness gate.
|
||||
|
||||
pub mod paths;
|
||||
pub mod prune;
|
||||
pub mod repo;
|
||||
|
||||
#[allow(unused_imports)]
|
||||
pub use paths::{snapshot_dir_for, snapshot_git_dir};
|
||||
pub use prune::{DEFAULT_MAX_AGE, prune_older_than};
|
||||
#[allow(unused_imports)]
|
||||
pub use repo::{Snapshot, SnapshotId, SnapshotRepo};
|
||||
@@ -0,0 +1,139 @@
|
||||
//! Path resolution for the per-workspace snapshot side-repos.
|
||||
//!
|
||||
//! Snapshots live in `~/.deepseek/snapshots/<project_hash>/<worktree_hash>/`.
|
||||
//! The two-level hash split lets us snapshot multiple worktrees of the same
|
||||
//! project independently — `git worktree list` users won't get cross-talk
|
||||
//! between feature branches.
|
||||
|
||||
use std::collections::hash_map::DefaultHasher;
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::io;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
/// Compute the snapshot directory for a given workspace path.
|
||||
///
|
||||
/// Returns `~/.deepseek/snapshots/<project_hash>/<worktree_hash>/`. The
|
||||
/// caller is responsible for creating it on disk; we purposefully don't
|
||||
/// touch the filesystem here so this is cheap to call repeatedly.
|
||||
///
|
||||
/// The `project_hash` is derived from the canonicalized workspace path
|
||||
/// after stripping any `.worktrees/<name>` suffix — multiple worktrees
|
||||
/// of the same repo share the same `project_hash` so users can browse
|
||||
/// snapshots cross-worktree if they want, but the `worktree_hash` keeps
|
||||
/// commits isolated by default.
|
||||
pub fn snapshot_dir_for(workspace: &Path) -> PathBuf {
|
||||
snapshot_dir_with_home(workspace, dirs::home_dir())
|
||||
}
|
||||
|
||||
/// Same as [`snapshot_dir_for`] but with an injectable home directory.
|
||||
/// Used by tests so we never touch the user's real `~/.deepseek/`.
|
||||
pub fn snapshot_dir_with_home(workspace: &Path, home: Option<PathBuf>) -> PathBuf {
|
||||
let home = home.unwrap_or_else(|| PathBuf::from("."));
|
||||
let canonical = workspace
|
||||
.canonicalize()
|
||||
.unwrap_or_else(|_| workspace.to_path_buf());
|
||||
let project_root = strip_worktree_suffix(&canonical);
|
||||
let project_hash = stable_hex(&project_root);
|
||||
let worktree_hash = stable_hex(&canonical);
|
||||
home.join(".deepseek")
|
||||
.join("snapshots")
|
||||
.join(project_hash)
|
||||
.join(worktree_hash)
|
||||
}
|
||||
|
||||
/// Resolve the `.git` directory inside the snapshot dir.
|
||||
pub fn snapshot_git_dir(workspace: &Path) -> PathBuf {
|
||||
snapshot_dir_for(workspace).join(".git")
|
||||
}
|
||||
|
||||
/// Ensure the snapshot dir exists on disk and return its path.
|
||||
pub fn ensure_snapshot_dir(workspace: &Path) -> io::Result<PathBuf> {
|
||||
let dir = snapshot_dir_for(workspace);
|
||||
std::fs::create_dir_all(&dir)?;
|
||||
Ok(dir)
|
||||
}
|
||||
|
||||
/// Strip a trailing `.worktrees/<name>` segment so all worktrees of the
|
||||
/// same checkout share a `project_hash`. If the path doesn't look like a
|
||||
/// worktree it's returned unchanged.
|
||||
fn strip_worktree_suffix(path: &Path) -> PathBuf {
|
||||
let mut components: Vec<_> = path.components().collect();
|
||||
if components.len() >= 2
|
||||
&& let Some(parent) = components.get(components.len() - 2)
|
||||
&& parent.as_os_str() == ".worktrees"
|
||||
{
|
||||
components.truncate(components.len() - 2);
|
||||
let mut p = PathBuf::new();
|
||||
for c in components {
|
||||
p.push(c.as_os_str());
|
||||
}
|
||||
return p;
|
||||
}
|
||||
path.to_path_buf()
|
||||
}
|
||||
|
||||
/// Hex-encoded `DefaultHasher` digest. Sufficient for directory naming
|
||||
/// (collision risk is negligible for the small set of paths we care
|
||||
/// about, and we'd rather not pull in `sha2` for a 16-byte tag).
|
||||
fn stable_hex(path: &Path) -> String {
|
||||
let mut hasher = DefaultHasher::new();
|
||||
path.hash(&mut hasher);
|
||||
format!("{:016x}", hasher.finish())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use tempfile::tempdir;
|
||||
|
||||
#[test]
|
||||
fn snapshot_dir_layout_two_levels_under_deepseek() {
|
||||
let tmp = tempdir().expect("tempdir");
|
||||
let dir = snapshot_dir_with_home(tmp.path(), Some(tmp.path().to_path_buf()));
|
||||
let mut iter = dir.strip_prefix(tmp.path()).unwrap().components();
|
||||
assert_eq!(iter.next().unwrap().as_os_str(), ".deepseek");
|
||||
assert_eq!(iter.next().unwrap().as_os_str(), "snapshots");
|
||||
assert!(iter.next().is_some()); // project_hash
|
||||
assert!(iter.next().is_some()); // worktree_hash
|
||||
assert!(iter.next().is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn worktree_suffix_stripped_for_project_hash() {
|
||||
let tmp = tempdir().expect("tempdir");
|
||||
let main_path = tmp.path().join("repo");
|
||||
let wt_path = tmp.path().join("repo").join(".worktrees").join("featX");
|
||||
std::fs::create_dir_all(&main_path).unwrap();
|
||||
std::fs::create_dir_all(&wt_path).unwrap();
|
||||
|
||||
let main_dir = snapshot_dir_with_home(&main_path, Some(tmp.path().to_path_buf()));
|
||||
let wt_dir = snapshot_dir_with_home(&wt_path, Some(tmp.path().to_path_buf()));
|
||||
|
||||
// Same project_hash (parent component before the worktree-specific tail).
|
||||
let main_components: Vec<_> = main_dir.components().collect();
|
||||
let wt_components: Vec<_> = wt_dir.components().collect();
|
||||
assert_eq!(
|
||||
main_components[main_components.len() - 2],
|
||||
wt_components[wt_components.len() - 2],
|
||||
"worktrees should share project_hash",
|
||||
);
|
||||
// But different worktree_hash (the tail).
|
||||
assert_ne!(main_components.last(), wt_components.last());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ensure_snapshot_dir_creates_path() {
|
||||
let tmp = tempdir().expect("tempdir");
|
||||
// Use scoped HOME so we don't pollute the real one.
|
||||
let dir = snapshot_dir_with_home(tmp.path(), Some(tmp.path().to_path_buf()));
|
||||
std::fs::create_dir_all(&dir).unwrap();
|
||||
assert!(dir.exists());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn snapshot_git_dir_appends_dot_git() {
|
||||
let tmp = tempdir().expect("tempdir");
|
||||
let git_dir = snapshot_git_dir(tmp.path());
|
||||
assert_eq!(git_dir.file_name().unwrap(), ".git");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,91 @@
|
||||
//! Boot-time snapshot pruning.
|
||||
//!
|
||||
//! Called from `session_manager` once per session start. Failure is
|
||||
//! never fatal — old snapshots taking disk space is annoying but not
|
||||
//! correctness-breaking, so we log and move on.
|
||||
|
||||
use std::io;
|
||||
use std::path::Path;
|
||||
use std::time::Duration;
|
||||
|
||||
use super::paths::snapshot_git_dir;
|
||||
use super::repo::SnapshotRepo;
|
||||
|
||||
/// Default snapshot retention window: 7 days.
|
||||
pub const DEFAULT_MAX_AGE: Duration = Duration::from_secs(7 * 24 * 60 * 60);
|
||||
|
||||
/// Prune snapshots older than `max_age` for the given workspace.
|
||||
///
|
||||
/// If no snapshot repo exists yet (first run) this is a cheap no-op.
|
||||
/// Returns the number of snapshots removed.
|
||||
pub fn prune_older_than(workspace: &Path, max_age: Duration) -> io::Result<usize> {
|
||||
let git_dir = snapshot_git_dir(workspace);
|
||||
if !git_dir.exists() {
|
||||
return Ok(0);
|
||||
}
|
||||
let repo = SnapshotRepo::open_or_init(workspace)?;
|
||||
repo.prune_older_than(max_age)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::test_support::lock_test_env;
|
||||
use std::sync::MutexGuard;
|
||||
use tempfile::tempdir;
|
||||
|
||||
/// Same guard shape as in `repo::tests` — pins HOME for the lifetime
|
||||
/// of one test under the process-wide env mutex.
|
||||
struct ScopedHome {
|
||||
prev: Option<std::ffi::OsString>,
|
||||
_guard: MutexGuard<'static, ()>,
|
||||
}
|
||||
impl Drop for ScopedHome {
|
||||
fn drop(&mut self) {
|
||||
// SAFETY: process-wide lock still held.
|
||||
unsafe {
|
||||
match self.prev.take() {
|
||||
Some(v) => std::env::set_var("HOME", v),
|
||||
None => std::env::remove_var("HOME"),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
fn scoped_home(home: &std::path::Path) -> ScopedHome {
|
||||
let guard = lock_test_env();
|
||||
let prev = std::env::var_os("HOME");
|
||||
// SAFETY: serialised by the global env lock.
|
||||
unsafe {
|
||||
std::env::set_var("HOME", home);
|
||||
}
|
||||
ScopedHome {
|
||||
prev,
|
||||
_guard: guard,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn prune_no_repo_returns_zero() {
|
||||
let tmp = tempdir().unwrap();
|
||||
let _home = scoped_home(tmp.path());
|
||||
let removed = prune_older_than(tmp.path(), DEFAULT_MAX_AGE).unwrap();
|
||||
assert_eq!(removed, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn prune_with_existing_repo_zero_age_clears_all() {
|
||||
let tmp = tempdir().unwrap();
|
||||
let _home = scoped_home(tmp.path());
|
||||
let workspace = tmp.path().join("ws");
|
||||
std::fs::create_dir_all(&workspace).unwrap();
|
||||
let repo = SnapshotRepo::open_or_init(&workspace).unwrap();
|
||||
std::fs::write(workspace.join("f.txt"), "x").unwrap();
|
||||
repo.snapshot("turn:0").unwrap();
|
||||
|
||||
// Same-second flake guard: see `repo::tests`.
|
||||
std::thread::sleep(Duration::from_millis(1100));
|
||||
|
||||
let removed = prune_older_than(&workspace, Duration::from_secs(0)).unwrap();
|
||||
assert!(removed >= 1);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,492 @@
|
||||
//! Side-git repository wrapper for workspace snapshots.
|
||||
//!
|
||||
//! `SnapshotRepo` shells out to the system `git` binary (we deliberately
|
||||
//! avoid `git2` to dodge its LGPL surface). The two paths that matter:
|
||||
//!
|
||||
//! - `git_dir` → `~/.deepseek/snapshots/<project_hash>/<worktree_hash>/.git`
|
||||
//! - `work_tree` → the user's actual workspace
|
||||
//!
|
||||
//! Every git invocation passes both `--git-dir` AND `--work-tree`. That is
|
||||
//! the single biggest safety mechanism: it guarantees we never accidentally
|
||||
//! mutate the user's own `.git` directory. If git can't find the side
|
||||
//! repo, the command fails fast instead of falling back to "current
|
||||
//! directory".
|
||||
|
||||
use std::io;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::{Command, Output};
|
||||
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
||||
|
||||
use super::paths::{ensure_snapshot_dir, snapshot_git_dir};
|
||||
|
||||
/// Identifier for a snapshot — currently the underlying git commit SHA.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct SnapshotId(pub String);
|
||||
|
||||
impl SnapshotId {
|
||||
/// Borrow the SHA as a string slice.
|
||||
pub fn as_str(&self) -> &str {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
/// A single snapshot record (one row in `git log`).
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Snapshot {
|
||||
/// Commit SHA inside the side repo.
|
||||
pub id: SnapshotId,
|
||||
/// Subject line — the label passed to [`SnapshotRepo::snapshot`].
|
||||
pub label: String,
|
||||
/// Author timestamp (Unix seconds).
|
||||
pub timestamp: i64,
|
||||
}
|
||||
|
||||
/// Wrapper around the per-workspace side-git repo.
|
||||
pub struct SnapshotRepo {
|
||||
git_dir: PathBuf,
|
||||
work_tree: PathBuf,
|
||||
}
|
||||
|
||||
impl SnapshotRepo {
|
||||
/// Open or initialize the snapshot repo for `workspace`.
|
||||
///
|
||||
/// On first use this:
|
||||
/// 1. Creates the `~/.deepseek/snapshots/<…>/.git` dir.
|
||||
/// 2. Runs `git init --bare=false --quiet`.
|
||||
/// 3. Sets a fixed `user.name` / `user.email` so commits don't pick up
|
||||
/// the user's global git identity (we don't want our snapshots to
|
||||
/// look like they came from the user).
|
||||
pub fn open_or_init(workspace: &Path) -> io::Result<Self> {
|
||||
let work_tree = workspace
|
||||
.canonicalize()
|
||||
.unwrap_or_else(|_| workspace.to_path_buf());
|
||||
|
||||
let _ = ensure_snapshot_dir(&work_tree)?;
|
||||
let git_dir = snapshot_git_dir(&work_tree);
|
||||
|
||||
let needs_init = !git_dir.exists();
|
||||
if needs_init {
|
||||
let parent = git_dir.parent().ok_or_else(|| {
|
||||
io::Error::new(io::ErrorKind::InvalidInput, "snapshot dir has no parent")
|
||||
})?;
|
||||
std::fs::create_dir_all(parent)?;
|
||||
// `git init` here uses the parent directory as the work tree
|
||||
// and stores metadata in `.git`. We then continue to use
|
||||
// explicit `--git-dir` / `--work-tree` flags for every other
|
||||
// command so behaviour is invariant of cwd.
|
||||
let init = Command::new("git")
|
||||
.arg("init")
|
||||
.arg("--quiet")
|
||||
.arg(parent)
|
||||
.output()
|
||||
.map_err(|e| io_other(format!("failed to spawn git init: {e}")))?;
|
||||
if !init.status.success() {
|
||||
return Err(io_other(format!(
|
||||
"git init failed: {}",
|
||||
String::from_utf8_lossy(&init.stderr).trim()
|
||||
)));
|
||||
}
|
||||
|
||||
// Pin a stable identity so snapshot commits are recognisable
|
||||
// and don't bleed into the user's git config.
|
||||
let _ = run_git(
|
||||
&git_dir,
|
||||
&work_tree,
|
||||
&["config", "user.name", "deepseek-snapshots"],
|
||||
);
|
||||
let _ = run_git(
|
||||
&git_dir,
|
||||
&work_tree,
|
||||
&["config", "user.email", "snapshots@deepseek-tui.local"],
|
||||
);
|
||||
// Don't auto-gc on every commit; we manage pruning ourselves.
|
||||
let _ = run_git(&git_dir, &work_tree, &["config", "gc.auto", "0"]);
|
||||
// Ignore CRLF rewriting — we want byte-for-byte fidelity.
|
||||
let _ = run_git(&git_dir, &work_tree, &["config", "core.autocrlf", "false"]);
|
||||
}
|
||||
|
||||
Ok(Self { git_dir, work_tree })
|
||||
}
|
||||
|
||||
/// Take a snapshot of the current working tree.
|
||||
///
|
||||
/// Internally: `git add -A` then `git commit --allow-empty -m <label>`.
|
||||
/// `git add -A` honours the user's workspace `.gitignore` because we
|
||||
/// keep the side repo's `core.excludesFile` empty and let git read
|
||||
/// the workspace's own `.gitignore` files when staging.
|
||||
///
|
||||
/// Returns the snapshot's commit SHA.
|
||||
pub fn snapshot(&self, label: &str) -> io::Result<SnapshotId> {
|
||||
// Stage every tracked + untracked path the workspace exposes.
|
||||
// `--all` here means `add` + `update` + `remove` — the same set
|
||||
// `git status` would show.
|
||||
let add = run_git(&self.git_dir, &self.work_tree, &["add", "-A"])?;
|
||||
if !add.status.success() {
|
||||
return Err(io_other(format!(
|
||||
"git add -A failed: {}",
|
||||
String::from_utf8_lossy(&add.stderr).trim()
|
||||
)));
|
||||
}
|
||||
|
||||
// `--allow-empty` so back-to-back snapshots with no changes
|
||||
// still produce a marker commit (otherwise `/restore N` indices
|
||||
// would skip turns where nothing changed).
|
||||
let commit = run_git(
|
||||
&self.git_dir,
|
||||
&self.work_tree,
|
||||
&[
|
||||
"commit",
|
||||
"--allow-empty",
|
||||
"--no-verify",
|
||||
"--no-gpg-sign",
|
||||
"-m",
|
||||
label,
|
||||
],
|
||||
)?;
|
||||
if !commit.status.success() {
|
||||
return Err(io_other(format!(
|
||||
"git commit failed: {}",
|
||||
String::from_utf8_lossy(&commit.stderr).trim()
|
||||
)));
|
||||
}
|
||||
|
||||
let head = run_git(&self.git_dir, &self.work_tree, &["rev-parse", "HEAD"])?;
|
||||
if !head.status.success() {
|
||||
return Err(io_other(format!(
|
||||
"git rev-parse HEAD failed: {}",
|
||||
String::from_utf8_lossy(&head.stderr).trim()
|
||||
)));
|
||||
}
|
||||
let sha = String::from_utf8_lossy(&head.stdout).trim().to_string();
|
||||
Ok(SnapshotId(sha))
|
||||
}
|
||||
|
||||
/// Restore the workspace to the state at `id`.
|
||||
///
|
||||
/// Uses `git checkout <sha> -- :/` which checks out every path in the
|
||||
/// snapshot tree relative to the workspace root. We do NOT touch the
|
||||
/// user's own `.git` — snapshots only contain working-tree files.
|
||||
pub fn restore(&self, id: &SnapshotId) -> io::Result<()> {
|
||||
let checkout = run_git(
|
||||
&self.git_dir,
|
||||
&self.work_tree,
|
||||
&["checkout", id.as_str(), "--", ":/"],
|
||||
)?;
|
||||
if !checkout.status.success() {
|
||||
return Err(io_other(format!(
|
||||
"git checkout failed: {}",
|
||||
String::from_utf8_lossy(&checkout.stderr).trim()
|
||||
)));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// List up to `limit` most-recent snapshots, newest first.
|
||||
pub fn list(&self, limit: usize) -> io::Result<Vec<Snapshot>> {
|
||||
// `git log -<n>` is the short form of `--max-count=<n>`; if `limit`
|
||||
// is `usize::MAX` (caller asked for "everything") we pass an empty
|
||||
// count so git defaults to no upper bound.
|
||||
let mut args: Vec<String> = vec!["log".to_string()];
|
||||
if limit < usize::MAX {
|
||||
args.push(format!("--max-count={limit}"));
|
||||
}
|
||||
args.push("--pretty=format:%H%x09%at%x09%s".to_string());
|
||||
args.push("--no-color".to_string());
|
||||
let arg_refs: Vec<&str> = args.iter().map(String::as_str).collect();
|
||||
let log = run_git(&self.git_dir, &self.work_tree, &arg_refs)?;
|
||||
if !log.status.success() {
|
||||
// No commits yet → empty list.
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
let stdout = String::from_utf8_lossy(&log.stdout);
|
||||
let mut out = Vec::new();
|
||||
for line in stdout.lines() {
|
||||
let mut parts = line.splitn(3, '\t');
|
||||
let sha = parts.next().unwrap_or("").to_string();
|
||||
let ts = parts
|
||||
.next()
|
||||
.and_then(|s| s.parse::<i64>().ok())
|
||||
.unwrap_or(0);
|
||||
let subject = parts.next().unwrap_or("").to_string();
|
||||
if sha.is_empty() {
|
||||
continue;
|
||||
}
|
||||
out.push(Snapshot {
|
||||
id: SnapshotId(sha),
|
||||
label: subject,
|
||||
timestamp: ts,
|
||||
});
|
||||
}
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
/// Drop snapshots older than `max_age`, returning the count removed.
|
||||
///
|
||||
/// Strategy: identify keepable commits (younger than the cutoff),
|
||||
/// reset HEAD to the oldest survivor, then `git reflog expire` +
|
||||
/// `git gc --prune=now` to actually reclaim space. Cheap and avoids
|
||||
/// rewriting history when nothing has aged out.
|
||||
pub fn prune_older_than(&self, max_age: Duration) -> io::Result<usize> {
|
||||
let now = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.map_err(|e| io_other(format!("clock error: {e}")))?
|
||||
.as_secs() as i64;
|
||||
let cutoff = now - max_age.as_secs() as i64;
|
||||
|
||||
let snapshots = self.list(usize::MAX)?;
|
||||
if snapshots.is_empty() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
// Snapshots are newest-first. Find the index of the first one
|
||||
// at-or-older than the cutoff — every entry from that index
|
||||
// onward is a candidate for removal. We use `<=` so a 0-second
|
||||
// retention drops same-second commits (otherwise tests calling
|
||||
// `prune_older_than(Duration::ZERO)` immediately after creating
|
||||
// a snapshot would never prune anything).
|
||||
let cut_index = snapshots.iter().position(|s| s.timestamp <= cutoff);
|
||||
let Some(cut) = cut_index else {
|
||||
return Ok(0);
|
||||
};
|
||||
let removed = snapshots.len() - cut;
|
||||
if removed == 0 {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
if cut == 0 {
|
||||
// Every snapshot is older than the cutoff — wipe the repo
|
||||
// entirely so the next snapshot starts a fresh history.
|
||||
// Removing `.git/refs/heads/*` is enough to orphan the old
|
||||
// commits, then gc reclaims them.
|
||||
let refs_dir = self.git_dir.join("refs").join("heads");
|
||||
if refs_dir.exists() {
|
||||
for entry in std::fs::read_dir(&refs_dir)? {
|
||||
let path = entry?.path();
|
||||
if path.is_file() {
|
||||
let _ = std::fs::remove_file(&path);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Also drop HEAD's packed refs so `git log` returns nothing.
|
||||
let packed = self.git_dir.join("packed-refs");
|
||||
if packed.exists() {
|
||||
let _ = std::fs::remove_file(&packed);
|
||||
}
|
||||
} else {
|
||||
// Reset HEAD to the youngest commit older-than-cutoff's
|
||||
// *predecessor* — i.e. the oldest surviving snapshot.
|
||||
let survivor = &snapshots[cut - 1];
|
||||
let reset = run_git(
|
||||
&self.git_dir,
|
||||
&self.work_tree,
|
||||
&["update-ref", "HEAD", survivor.id.as_str()],
|
||||
)?;
|
||||
if !reset.status.success() {
|
||||
return Err(io_other(format!(
|
||||
"git update-ref failed: {}",
|
||||
String::from_utf8_lossy(&reset.stderr).trim()
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
// Reclaim space.
|
||||
let _ = run_git(
|
||||
&self.git_dir,
|
||||
&self.work_tree,
|
||||
&["reflog", "expire", "--expire=now", "--all"],
|
||||
);
|
||||
let _ = run_git(
|
||||
&self.git_dir,
|
||||
&self.work_tree,
|
||||
&["gc", "--prune=now", "--quiet"],
|
||||
);
|
||||
|
||||
Ok(removed)
|
||||
}
|
||||
|
||||
/// Return the side-repo's `.git` directory (for diagnostics).
|
||||
#[allow(dead_code)]
|
||||
pub fn git_dir(&self) -> &Path {
|
||||
&self.git_dir
|
||||
}
|
||||
|
||||
/// Return the work tree path (for diagnostics).
|
||||
#[allow(dead_code)]
|
||||
pub fn work_tree(&self) -> &Path {
|
||||
&self.work_tree
|
||||
}
|
||||
}
|
||||
|
||||
fn run_git(git_dir: &Path, work_tree: &Path, args: &[&str]) -> io::Result<Output> {
|
||||
Command::new("git")
|
||||
.arg("--git-dir")
|
||||
.arg(git_dir)
|
||||
.arg("--work-tree")
|
||||
.arg(work_tree)
|
||||
.args(args)
|
||||
.output()
|
||||
}
|
||||
|
||||
fn io_other(msg: impl Into<String>) -> io::Error {
|
||||
io::Error::other(msg.into())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::test_support::lock_test_env;
|
||||
use std::sync::MutexGuard;
|
||||
use tempfile::tempdir;
|
||||
|
||||
/// Holds HOME pinned to a tempdir for the lifetime of a test. Also
|
||||
/// owns the process-wide env-var mutex so tests across modules
|
||||
/// don't trample each other's `HOME`.
|
||||
pub(super) struct ScopedHome {
|
||||
prev: Option<std::ffi::OsString>,
|
||||
_guard: MutexGuard<'static, ()>,
|
||||
}
|
||||
impl Drop for ScopedHome {
|
||||
fn drop(&mut self) {
|
||||
// SAFETY: process-wide lock still held.
|
||||
unsafe {
|
||||
match self.prev.take() {
|
||||
Some(v) => std::env::set_var("HOME", v),
|
||||
None => std::env::remove_var("HOME"),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
pub(super) fn scoped_home(home: &Path) -> ScopedHome {
|
||||
let guard = lock_test_env();
|
||||
let prev = std::env::var_os("HOME");
|
||||
// SAFETY: serialised by the global env lock.
|
||||
unsafe {
|
||||
std::env::set_var("HOME", home);
|
||||
}
|
||||
ScopedHome {
|
||||
prev,
|
||||
_guard: guard,
|
||||
}
|
||||
}
|
||||
|
||||
/// Build a side-repo whose snapshot dir lives under the same
|
||||
/// tempdir we're using for `HOME` — so the inner `dirs::home_dir()`
|
||||
/// lookup stays inside our sandbox. Returns the guard alongside so
|
||||
/// the caller can keep HOME pinned for the rest of the test.
|
||||
fn make_repo(tmp: &Path) -> (SnapshotRepo, ScopedHome) {
|
||||
let workspace = tmp.join("workspace");
|
||||
std::fs::create_dir_all(&workspace).unwrap();
|
||||
let guard = scoped_home(tmp);
|
||||
let repo = SnapshotRepo::open_or_init(&workspace).expect("open_or_init");
|
||||
(repo, guard)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn snapshot_creates_commit_in_side_repo_only() {
|
||||
let tmp = tempdir().unwrap();
|
||||
let (repo, _home) = make_repo(tmp.path());
|
||||
std::fs::write(repo.work_tree().join("a.txt"), b"alpha").unwrap();
|
||||
|
||||
let id = repo.snapshot("pre-turn:1").expect("snapshot");
|
||||
assert_eq!(id.as_str().len(), 40);
|
||||
|
||||
let list = repo.list(10).expect("list");
|
||||
assert_eq!(list.len(), 1);
|
||||
assert_eq!(list[0].label, "pre-turn:1");
|
||||
|
||||
// The user's workspace must NOT have a real `.git` because we
|
||||
// never created one in their workspace — only in the side dir.
|
||||
assert!(!repo.work_tree().join(".git").exists());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn restore_reverts_workspace_files() {
|
||||
let tmp = tempdir().unwrap();
|
||||
let (repo, _home) = make_repo(tmp.path());
|
||||
let f = repo.work_tree().join("file.txt");
|
||||
|
||||
std::fs::write(&f, b"original").unwrap();
|
||||
let id = repo.snapshot("pre-turn:1").expect("snapshot");
|
||||
|
||||
std::fs::write(&f, b"clobbered").unwrap();
|
||||
repo.snapshot("post-turn:1").expect("snapshot 2");
|
||||
|
||||
repo.restore(&id).expect("restore");
|
||||
let after = std::fs::read_to_string(&f).unwrap();
|
||||
assert_eq!(after, "original");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn list_respects_limit() {
|
||||
let tmp = tempdir().unwrap();
|
||||
let (repo, _home) = make_repo(tmp.path());
|
||||
for i in 0..5 {
|
||||
std::fs::write(repo.work_tree().join("f.txt"), format!("v{i}")).unwrap();
|
||||
repo.snapshot(&format!("turn:{i}")).unwrap();
|
||||
}
|
||||
let three = repo.list(3).unwrap();
|
||||
assert_eq!(three.len(), 3);
|
||||
// Newest first.
|
||||
assert_eq!(three[0].label, "turn:4");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn prune_drops_snapshots_older_than_threshold() {
|
||||
let tmp = tempdir().unwrap();
|
||||
let (repo, _home) = make_repo(tmp.path());
|
||||
std::fs::write(repo.work_tree().join("f.txt"), "v0").unwrap();
|
||||
repo.snapshot("turn:0").unwrap();
|
||||
|
||||
// Wait one second so the snapshot's commit timestamp is strictly
|
||||
// in the past relative to the prune call's "now" — otherwise
|
||||
// same-second comparisons make the assertion flaky.
|
||||
std::thread::sleep(Duration::from_millis(1100));
|
||||
|
||||
let removed = repo.prune_older_than(Duration::from_secs(0)).unwrap();
|
||||
assert!(removed >= 1, "expected at least 1 pruned, got {removed}");
|
||||
|
||||
// After pruning everything, the next snapshot should start a
|
||||
// fresh history.
|
||||
std::fs::write(repo.work_tree().join("f.txt"), "v1").unwrap();
|
||||
repo.snapshot("turn:1").unwrap();
|
||||
let list = repo.list(10).unwrap();
|
||||
assert_eq!(list.len(), 1);
|
||||
assert_eq!(list[0].label, "turn:1");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn snapshot_respects_workspace_gitignore() {
|
||||
let tmp = tempdir().unwrap();
|
||||
let (repo, _home) = make_repo(tmp.path());
|
||||
std::fs::write(repo.work_tree().join(".gitignore"), "ignored.txt\n").unwrap();
|
||||
std::fs::write(repo.work_tree().join("ignored.txt"), b"secret").unwrap();
|
||||
std::fs::write(repo.work_tree().join("kept.txt"), b"public").unwrap();
|
||||
|
||||
let id = repo.snapshot("pre-turn:1").expect("snapshot");
|
||||
|
||||
// `git ls-tree` against the snapshot's commit shouldn't list ignored.txt.
|
||||
let ls = run_git(
|
||||
repo.git_dir(),
|
||||
repo.work_tree(),
|
||||
&["ls-tree", "-r", "--name-only", id.as_str()],
|
||||
)
|
||||
.expect("ls-tree");
|
||||
let names = String::from_utf8_lossy(&ls.stdout);
|
||||
assert!(names.contains("kept.txt"), "kept.txt missing: {names}");
|
||||
assert!(
|
||||
!names.contains("ignored.txt"),
|
||||
"ignored.txt should not be in snapshot: {names}",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn open_or_init_is_idempotent() {
|
||||
let tmp = tempdir().unwrap();
|
||||
let (_r, _h) = make_repo(tmp.path());
|
||||
// Second open should not panic and should reuse the existing
|
||||
// `.git`. We re-open via the public API rather than make_repo to
|
||||
// avoid double-acquiring HOME (the guard would deadlock).
|
||||
drop((_r, _h));
|
||||
let (_r2, _h2) = make_repo(tmp.path());
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user