Merge branch 'feat/v070-arity' (#131 bash-command arity for safer auto_allow)

- crates/tui/src/command_safety.rs — port opencode arity dictionary (160+ entries) + classify_command()
- crates/tui/src/tools/approval_cache.rs — cache key uses canonical prefix
- config.example.toml — auto_allow prefix-semantics docblock
- 41 colocated unit tests
This commit is contained in:
Hunter Bown
2026-04-28 00:06:21 -05:00
3 changed files with 572 additions and 9 deletions
+10
View File
@@ -62,6 +62,16 @@ memory_path = "~/.deepseek/memory.md"
allow_shell = true
approval_policy = "on-request" # on-request | untrusted | never
sandbox_mode = "workspace-write" # read-only | workspace-write | danger-full-access | external-sandbox
# auto_allow entries match by command prefix, not raw string.
# See command_safety.rs for the prefix dictionary.
#
# Examples:
# auto_allow = ["git status"] # auto-approves: git status, git status -s, git status --porcelain
# # does NOT auto-approve: git push, git checkout
# auto_allow = ["cargo check", "npm run"]
#
# auto_allow = []
max_subagents = 5 # optional (1-20)
# Optional managed policy paths (defaults to /etc/deepseek/*.toml on unix):
+552
View File
@@ -5,6 +5,300 @@
//!
//! This module provides pre-execution analysis of shell commands to detect
//! potentially dangerous patterns and prevent accidental damage.
//!
//! ## Command prefix classification
//!
//! [`classify_command`] maps a token slice to its canonical command prefix.
//! The prefix is the portion of the command that identifies *what action* is
//! being taken, stripped of flags and extra positional arguments.
//!
//! The arity dictionary [`COMMAND_ARITY`] encodes, for each known prefix, how
//! many *positional* (non-flag) words after the base command word form the
//! prefix. Flags (tokens that start with `-`) never count toward arity.
//!
//! ### Examples
//!
//! | Input tokens | Arity | Canonical prefix |
//! |---------------------------------------|-------|-------------------|
//! | `["git", "status", "-s"]` | 1 | `"git status"` |
//! | `["git", "checkout", "main"]` | 2 | `"git checkout"` |
//! | `["npm", "run", "dev"]` | 2 | `"npm run"` |
//! | `["docker", "compose", "up"]` | 2 | `"docker compose"`|
//! | `["cargo", "check", "--workspace"]` | 1 | `"cargo check"` |
//!
//! Ported from opencode `packages/opencode/src/permission/arity.ts`.
// ── Arity dictionary ──────────────────────────────────────────────────────────
/// Arity dictionary: maps a command prefix (space-separated, lowercase) to the
/// number of positional (non-flag) words, *including the base command word*,
/// that form the canonical prefix.
///
/// Flags (tokens starting with `-`) are **never** counted toward arity — that
/// is the central invariant: `auto_allow = ["git status"]` must match
/// `git status -s`, `git status --porcelain`, etc., but not `git push`.
///
/// Ported from opencode `packages/opencode/src/permission/arity.ts` (163 LOC).
pub static COMMAND_ARITY: &[(&str, u8)] = &[
// ── git ──────────────────────────────────────────────────────────────────
("git add", 2),
("git am", 2),
("git apply", 2),
("git bisect", 2),
("git blame", 2),
("git branch", 2),
("git cat-file", 2),
("git checkout", 2),
("git cherry-pick", 2),
("git clean", 2),
("git clone", 2),
("git commit", 2),
("git config", 2),
("git describe", 2),
("git diff", 2),
("git fetch", 2),
("git format-patch", 2),
("git grep", 2),
("git init", 2),
("git log", 2),
("git ls-files", 2),
("git merge", 2),
("git mv", 2),
("git notes", 2),
("git pull", 2),
("git push", 2),
("git rebase", 2),
("git reflog", 2),
("git remote", 2),
("git reset", 2),
("git restore", 2),
("git revert", 2),
("git rm", 2),
("git show", 2),
("git stash", 2),
("git status", 2),
("git submodule", 2),
("git switch", 2),
("git tag", 2),
("git worktree", 2),
// ── npm ──────────────────────────────────────────────────────────────────
("npm audit", 2),
("npm build", 2),
("npm cache", 2),
("npm ci", 2),
("npm dedupe", 2),
("npm fund", 2),
("npm help", 2),
("npm info", 2),
("npm init", 2),
("npm install", 2),
("npm link", 2),
("npm list", 2),
("npm ls", 2),
("npm outdated", 2),
("npm pack", 2),
("npm prune", 2),
("npm publish", 2),
("npm rebuild", 2),
("npm run", 3),
("npm start", 2),
("npm stop", 2),
("npm test", 2),
("npm uninstall", 2),
("npm update", 2),
("npm version", 2),
("npm view", 2),
// ── yarn ─────────────────────────────────────────────────────────────────
("yarn add", 2),
("yarn audit", 2),
("yarn build", 2),
("yarn install", 2),
("yarn run", 3),
("yarn start", 2),
("yarn test", 2),
("yarn upgrade", 2),
("yarn workspace", 3),
// ── pnpm ─────────────────────────────────────────────────────────────────
("pnpm add", 2),
("pnpm build", 2),
("pnpm install", 2),
("pnpm run", 3),
("pnpm start", 2),
("pnpm test", 2),
("pnpm update", 2),
// ── cargo ────────────────────────────────────────────────────────────────
("cargo add", 2),
("cargo bench", 2),
("cargo build", 2),
("cargo check", 2),
("cargo clean", 2),
("cargo clippy", 2),
("cargo doc", 2),
("cargo fix", 2),
("cargo fmt", 2),
("cargo generate", 2),
("cargo install", 2),
("cargo metadata", 2),
("cargo package", 2),
("cargo publish", 2),
("cargo remove", 2),
("cargo run", 2),
("cargo search", 2),
("cargo test", 2),
("cargo tree", 2),
("cargo uninstall", 2),
("cargo update", 2),
("cargo yank", 2),
// ── docker ───────────────────────────────────────────────────────────────
("docker build", 2),
("docker compose", 3),
("docker container", 3),
("docker cp", 2),
("docker exec", 2),
("docker image", 3),
("docker images", 2),
("docker inspect", 2),
("docker kill", 2),
("docker logs", 2),
("docker network", 3),
("docker ps", 2),
("docker pull", 2),
("docker push", 2),
("docker rm", 2),
("docker rmi", 2),
("docker run", 2),
("docker start", 2),
("docker stop", 2),
("docker system", 3),
("docker tag", 2),
("docker volume", 3),
// ── kubectl ──────────────────────────────────────────────────────────────
("kubectl apply", 2),
("kubectl create", 3),
("kubectl delete", 3),
("kubectl describe", 3),
("kubectl exec", 2),
("kubectl explain", 2),
("kubectl get", 3),
("kubectl label", 2),
("kubectl logs", 2),
("kubectl patch", 2),
("kubectl port-forward", 2),
("kubectl rollout", 3),
("kubectl scale", 2),
("kubectl set", 2),
("kubectl top", 3),
// ── go ───────────────────────────────────────────────────────────────────
("go build", 2),
("go clean", 2),
("go env", 2),
("go fmt", 2),
("go generate", 2),
("go get", 2),
("go install", 2),
("go list", 2),
("go mod", 3),
("go run", 2),
("go test", 2),
("go vet", 2),
("go work", 3),
// ── python / pip ─────────────────────────────────────────────────────────
("pip install", 2),
("pip uninstall", 2),
("pip list", 2),
("pip show", 2),
("pip freeze", 2),
("pip3 install", 2),
("pip3 uninstall", 2),
("pip3 list", 2),
("pip3 show", 2),
("python -m", 3),
("python3 -m", 3),
// ── make / cmake ─────────────────────────────────────────────────────────
("make", 1),
// ── gh (GitHub CLI) ──────────────────────────────────────────────────────
("gh pr", 3),
("gh issue", 3),
("gh repo", 3),
("gh release", 3),
("gh workflow", 3),
("gh run", 3),
("gh secret", 3),
// ── rustup ───────────────────────────────────────────────────────────────
("rustup default", 2),
("rustup install", 2),
("rustup show", 2),
("rustup target", 3),
("rustup toolchain", 3),
("rustup update", 2),
// ── deno / bun / node ────────────────────────────────────────────────────
("deno run", 2),
("deno test", 2),
("deno fmt", 2),
("deno lint", 2),
("bun add", 2),
("bun build", 2),
("bun install", 2),
("bun run", 3),
("bun test", 2),
("npx", 2),
];
/// Return the canonical command prefix for a slice of command tokens.
///
/// The prefix is determined by the [`COMMAND_ARITY`] dictionary:
///
/// 1. Tokens that start with `-` are treated as flags and **skipped** — they
/// never contribute to arity.
/// 2. The arity value `n` means that `n` positional words (including the base
/// command name) form the canonical prefix.
/// 3. The longest matching dictionary entry wins (greedy).
/// 4. If no dictionary entry matches, the single base command word is returned
/// as the prefix.
///
/// # Examples
///
/// ```
/// # use deepseek_tui::command_safety::classify_command;
/// assert_eq!(classify_command(&["git", "status", "-s"]), "git status");
/// assert_eq!(classify_command(&["git", "push", "origin"]), "git push");
/// assert_eq!(classify_command(&["cargo", "check", "--workspace"]), "cargo check");
/// assert_eq!(classify_command(&["npm", "run", "dev"]), "npm run dev");
/// assert_eq!(classify_command(&["ls", "-la"]), "ls");
/// ```
pub fn classify_command(tokens: &[&str]) -> String {
if tokens.is_empty() {
return String::new();
}
// Collect only the positional (non-flag) tokens, lowercased.
let positional: Vec<String> = tokens
.iter()
.filter(|t| !t.starts_with('-'))
.map(|t| t.to_ascii_lowercase())
.collect();
if positional.is_empty() {
return String::new();
}
// Try matching from the longest possible prefix down to 1 positional word.
// Maximum lookup depth is 3 (covers all entries in the dictionary that use
// arity ≤ 3; the arity-3 entries consume at most 3 positional tokens).
let max_depth = positional.len().min(3);
for depth in (1..=max_depth).rev() {
let candidate = positional[..depth].join(" ");
if let Some(&(_key, arity)) = COMMAND_ARITY.iter().find(|(key, _)| **key == candidate) {
// Found a matching dictionary entry. Return the positional tokens
// up to min(arity, available_positional_count) joined by spaces.
let take = (arity as usize).min(positional.len());
return positional[..take].join(" ");
}
}
// No dictionary match → single-word prefix (the base command name).
positional[0].clone()
}
/// Safety classification of a command
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
@@ -646,4 +940,262 @@ mod tests {
CommandCategory::System
);
}
// ── classify_command tests ────────────────────────────────────────────────
/// Helper: split a string on whitespace into a `Vec<&str>` and call
/// `classify_command`.
fn classify(s: &str) -> String {
let tokens: Vec<&str> = s.split_whitespace().collect();
classify_command(&tokens)
}
// ── git (arity 2 each) ────────────────────────────────────────────────────
#[test]
fn classify_git_status_bare() {
assert_eq!(classify("git status"), "git status");
}
#[test]
fn classify_git_status_with_short_flag() {
assert_eq!(classify("git status -s"), "git status");
}
#[test]
fn classify_git_status_with_long_flag() {
assert_eq!(classify("git status --porcelain"), "git status");
}
#[test]
fn classify_git_push_does_not_equal_git_status() {
assert_ne!(classify("git push origin main"), "git status");
}
#[test]
fn classify_git_push() {
assert_eq!(classify("git push origin main"), "git push");
}
#[test]
fn classify_git_push_force() {
// --force is a flag, so it is stripped; prefix is still "git push"
assert_eq!(classify("git push --force"), "git push");
}
#[test]
fn classify_git_log_with_flags() {
assert_eq!(classify("git log --oneline --graph"), "git log");
}
#[test]
fn classify_git_diff() {
assert_eq!(classify("git diff HEAD~1"), "git diff");
}
#[test]
fn classify_git_checkout() {
assert_eq!(classify("git checkout main"), "git checkout");
}
#[test]
fn classify_git_commit() {
assert_eq!(classify("git commit -m 'fix'"), "git commit");
}
#[test]
fn classify_git_stash() {
assert_eq!(classify("git stash"), "git stash");
}
#[test]
fn classify_git_rebase() {
assert_eq!(classify("git rebase -i HEAD~3"), "git rebase");
}
// ── cargo (arity 2 each) ─────────────────────────────────────────────────
#[test]
fn classify_cargo_check_bare() {
assert_eq!(classify("cargo check"), "cargo check");
}
#[test]
fn classify_cargo_check_with_flag() {
assert_eq!(classify("cargo check --workspace"), "cargo check");
}
#[test]
fn classify_cargo_build() {
assert_eq!(classify("cargo build --release"), "cargo build");
}
#[test]
fn classify_cargo_test() {
assert_eq!(classify("cargo test --locked"), "cargo test");
}
#[test]
fn classify_cargo_clippy() {
assert_eq!(classify("cargo clippy --all-targets"), "cargo clippy");
}
#[test]
fn classify_cargo_fmt() {
assert_eq!(classify("cargo fmt --all"), "cargo fmt");
}
// ── npm ──────────────────────────────────────────────────────────────────
#[test]
fn classify_npm_run_dev_arity_3() {
// "npm run" has arity 3: base="npm", sub="run", script="dev"
assert_eq!(classify("npm run dev"), "npm run dev");
}
#[test]
fn classify_npm_run_build_arity_3() {
assert_eq!(classify("npm run build"), "npm run build");
}
#[test]
fn classify_npm_install() {
assert_eq!(classify("npm install"), "npm install");
}
#[test]
fn classify_npm_test() {
assert_eq!(classify("npm test"), "npm test");
}
// ── docker ───────────────────────────────────────────────────────────────
#[test]
fn classify_docker_compose_up_arity_3() {
assert_eq!(classify("docker compose up"), "docker compose up");
}
#[test]
fn classify_docker_compose_down_arity_3() {
assert_eq!(classify("docker compose down"), "docker compose down");
}
#[test]
fn classify_docker_build() {
assert_eq!(classify("docker build -t myapp ."), "docker build");
}
#[test]
fn classify_docker_ps() {
assert_eq!(classify("docker ps -a"), "docker ps");
}
#[test]
fn classify_docker_run() {
assert_eq!(classify("docker run --rm ubuntu"), "docker run");
}
// ── kubectl ──────────────────────────────────────────────────────────────
#[test]
fn classify_kubectl_get_pods() {
// arity 3: "kubectl get pods"
assert_eq!(classify("kubectl get pods"), "kubectl get pods");
}
#[test]
fn classify_kubectl_apply() {
assert_eq!(classify("kubectl apply -f manifest.yaml"), "kubectl apply");
}
#[test]
fn classify_kubectl_logs() {
assert_eq!(classify("kubectl logs my-pod"), "kubectl logs");
}
// ── go ───────────────────────────────────────────────────────────────────
#[test]
fn classify_go_build() {
assert_eq!(classify("go build ./..."), "go build");
}
#[test]
fn classify_go_test() {
assert_eq!(classify("go test ./..."), "go test");
}
#[test]
fn classify_go_mod_tidy() {
// arity 3: "go mod tidy"
assert_eq!(classify("go mod tidy"), "go mod tidy");
}
// ── pip ──────────────────────────────────────────────────────────────────
#[test]
fn classify_pip_install() {
assert_eq!(classify("pip install requests"), "pip install");
}
#[test]
fn classify_pip_list() {
assert_eq!(classify("pip list --outdated"), "pip list");
}
// ── unknown commands fall back to single-word prefix ──────────────────────
#[test]
fn classify_unknown_single_word() {
assert_eq!(classify("ls"), "ls");
}
#[test]
fn classify_unknown_with_flags() {
// "ls" is not in the dict with an arity entry; falls back to base word
assert_eq!(classify("ls -la"), "ls");
}
#[test]
fn classify_empty_gives_empty() {
assert_eq!(classify_command(&[]), "");
}
// ── auto_allow semantics ──────────────────────────────────────────────────
/// Core requirement from the issue: `auto_allow = ["git status"]` must match
/// `git status -s` and `git status --porcelain` but NOT `git push`.
#[test]
fn auto_allow_git_status_matches_variants() {
let allow_list = ["git status"];
// These should all match the "git status" prefix.
let approved_commands = [
"git status",
"git status -s",
"git status --porcelain",
"git status --short --branch",
];
for cmd in &approved_commands {
let tokens: Vec<&str> = cmd.split_whitespace().collect();
let prefix = classify_command(&tokens);
assert!(
allow_list.contains(&prefix.as_str()),
"Expected 'git status' to match command '{cmd}', got prefix '{prefix}'"
);
}
}
#[test]
fn auto_allow_git_status_does_not_match_push_or_checkout() {
let allow_list = ["git status"];
let denied_commands = ["git push", "git push origin main", "git checkout main"];
for cmd in &denied_commands {
let tokens: Vec<&str> = cmd.split_whitespace().collect();
let prefix = classify_command(&tokens);
assert!(
!allow_list.contains(&prefix.as_str()),
"Expected 'git push'/'git checkout' NOT to match 'git status' allow_list, but got prefix '{prefix}' for '{cmd}'"
);
}
}
}
+10 -9
View File
@@ -23,6 +23,8 @@
use std::collections::HashMap;
use std::time::Instant;
use crate::command_safety::classify_command;
/// The fingerprint of a tool call — stable enough to match repeated
/// calls but specific enough to avoid privilege confusion.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
@@ -139,19 +141,18 @@ pub fn build_approval_key(tool_name: &str, input: &serde_json::Value) -> Approva
ApprovalKey(fingerprint)
}
/// Extract the first three nonflag tokens from the command string.
/// Return the canonical command prefix for the shell command in `input`.
///
/// Uses [`classify_command`] from the arity dictionary so that
/// `auto_allow = ["git status"]` correctly matches `git status -s` and
/// `git status --porcelain` without also matching `git push`.
fn command_prefix(input: &serde_json::Value) -> String {
let cmd = input.get("command").and_then(|v| v.as_str()).unwrap_or("");
let tokens: Vec<&str> = cmd
.split_whitespace()
.filter(|t| !t.starts_with('-'))
.take(3)
.collect();
let tokens: Vec<&str> = cmd.split_whitespace().collect();
if tokens.is_empty() {
"<empty>".to_string()
} else {
tokens.join(" ")
return "<empty>".to_string();
}
classify_command(&tokens)
}
/// Hash the sorted set of file paths referenced by a patch input.