fix(security): tighten shell safety classification

2026-05-08 14:38:20 -05:00
parent 9ee3b51582
commit 2de3766477
1 changed files with 278 additions and 1 deletions
@@ -590,6 +590,10 @@ pub fn analyze_command(command: &str) -> SafetyAnalysis {
        );
    }

+    if let Some(analysis) = analyze_destructive_patterns(command) {
+        return analysis;
+    }
+
    if command.contains("&&") || command.contains("||") || command.contains(';') {
        // Chains of known-safe commands (cargo/git/zig/npm/etc.) are
        // routine for build+test workflows. Instead of hard-blocking,
@@ -622,7 +626,9 @@ pub fn analyze_command(command: &str) -> SafetyAnalysis {
        );
    }

-    // Check for dangerous patterns first
+    // Check for dangerous patterns first. The token-aware pass above handles
+    // spacing and quoting variants; these literal patterns remain as a compact
+    // fallback for legacy shapes.
    for (pattern, reason) in DANGEROUS_PATTERNS {
        if command_lower.contains(&pattern.to_lowercase()) {
            return SafetyAnalysis::dangerous(
@@ -717,6 +723,231 @@ pub fn analyze_command(command: &str) -> SafetyAnalysis {
    )
 }

+fn analyze_destructive_patterns(command: &str) -> Option<SafetyAnalysis> {
+    if primary_shell_command_is(command, "eval") {
+        return Some(SafetyAnalysis::dangerous(
+            command,
+            vec!["Command invokes shell eval".to_string()],
+            vec!["Avoid evaluating dynamically generated shell input".to_string()],
+        ));
+    }
+
+    if pipes_remote_content_to_shell(command) {
+        return Some(SafetyAnalysis::dangerous(
+            command,
+            vec!["Piping remote content directly to shell is dangerous".to_string()],
+            vec!["Download the script first and review it before execution".to_string()],
+        ));
+    }
+
+    for segment in split_command_segments(command) {
+        let tokens = shell_words(&segment);
+        let Some(start) = primary_token_index(&tokens) else {
+            continue;
+        };
+        match tokens[start].as_str() {
+            "rm" => {
+                if let Some(reason) = dangerous_rm_reason(&tokens[start + 1..]) {
+                    return Some(SafetyAnalysis::dangerous(
+                        command,
+                        vec![reason],
+                        vec!["Review the deletion target before retrying".to_string()],
+                    ));
+                }
+            }
+            "find" => {
+                if let Some(analysis) = analyze_find_mutation(command, &tokens[start + 1..]) {
+                    return Some(analysis);
+                }
+            }
+            _ => {}
+        }
+    }
+
+    None
+}
+
+fn split_command_segments(command: &str) -> Vec<String> {
+    command
+        .replace("&&", "\n")
+        .replace("||", "\n")
+        .replace(';', "\n")
+        .split('\n')
+        .map(str::trim)
+        .filter(|segment| !segment.is_empty())
+        .map(ToOwned::to_owned)
+        .collect()
+}
+
+fn shell_words(segment: &str) -> Vec<String> {
+    shlex::split(segment).unwrap_or_else(|| {
+        segment
+            .split_whitespace()
+            .map(|token| token.trim_matches(['"', '\'']).to_string())
+            .collect()
+    })
+}
+
+fn primary_token_index(tokens: &[String]) -> Option<usize> {
+    let mut idx = 0;
+    while idx < tokens.len() {
+        let token = tokens[idx].as_str();
+        if token == "env" {
+            idx += 1;
+            while idx < tokens.len()
+                && (tokens[idx].starts_with('-') || is_env_assignment(&tokens[idx]))
+            {
+                idx += 1;
+            }
+            continue;
+        }
+        if is_env_assignment(token) {
+            idx += 1;
+            continue;
+        }
+        return Some(idx);
+    }
+    None
+}
+
+fn is_env_assignment(token: &str) -> bool {
+    let Some((name, _value)) = token.split_once('=') else {
+        return false;
+    };
+    !name.is_empty()
+        && name
+            .chars()
+            .all(|ch| ch == '_' || ch.is_ascii_alphanumeric())
+        && name
+            .chars()
+            .next()
+            .is_some_and(|ch| ch == '_' || ch.is_ascii_alphabetic())
+}
+
+fn primary_shell_command_is(command: &str, expected: &str) -> bool {
+    split_command_segments(command).into_iter().any(|segment| {
+        let tokens = shell_words(&segment);
+        primary_token_index(&tokens)
+            .and_then(|idx| tokens.get(idx))
+            .is_some_and(|token| token == expected)
+    })
+}
+
+fn pipes_remote_content_to_shell(command: &str) -> bool {
+    split_command_segments(command).into_iter().any(|segment| {
+        let parts: Vec<&str> = segment.split('|').collect();
+        if parts.len() < 2 {
+            return false;
+        }
+        parts.windows(2).any(|window| {
+            let left = window[0].to_ascii_lowercase();
+            if !(left.contains("curl") || left.contains("wget")) {
+                return false;
+            }
+            let right_tokens = shell_words(window[1]);
+            primary_token_index(&right_tokens)
+                .and_then(|idx| right_tokens.get(idx))
+                .is_some_and(|token| matches!(token.as_str(), "sh" | "bash" | "zsh"))
+        })
+    })
+}
+
+fn dangerous_rm_reason(args: &[String]) -> Option<String> {
+    let mut recursive = false;
+    let mut force = false;
+    let mut targets = Vec::new();
+
+    for arg in args {
+        match arg.as_str() {
+            "--" => continue,
+            "--recursive" | "--dir" => recursive = true,
+            "--force" => force = true,
+            flag if flag.starts_with('-') && !flag.starts_with("--") => {
+                recursive |= flag.chars().any(|ch| matches!(ch, 'r' | 'R'));
+                force |= flag.chars().any(|ch| ch == 'f');
+            }
+            target => targets.push(target),
+        }
+    }
+
+    if !(recursive || force) {
+        return None;
+    }
+
+    for target in targets {
+        if is_root_delete_target(target) {
+            return Some("Recursive or forced deletion targets the root filesystem".to_string());
+        }
+        if is_home_delete_target(target) {
+            return Some("Recursive or forced deletion targets the home directory".to_string());
+        }
+        if target_contains_parent_escape(target) {
+            return Some("Recursive or forced deletion may escape the workspace".to_string());
+        }
+    }
+
+    None
+}
+
+fn analyze_find_mutation(command: &str, args: &[String]) -> Option<SafetyAnalysis> {
+    let has_delete = args.iter().any(|arg| arg == "-delete");
+    let execs_rm = args
+        .windows(2)
+        .any(|pair| pair[0] == "-exec" && pair[1] == "rm");
+    if !(has_delete || execs_rm) {
+        return None;
+    }
+
+    let targets: Vec<&str> = args
+        .iter()
+        .take_while(|arg| !arg.starts_with('-'))
+        .map(String::as_str)
+        .collect();
+    if targets.iter().any(|target| {
+        is_root_delete_target(target)
+            || is_home_delete_target(target)
+            || target_contains_parent_escape(target)
+    }) {
+        return Some(SafetyAnalysis::dangerous(
+            command,
+            vec!["find mutation targets a broad or external path".to_string()],
+            vec!["Restrict the find root to a workspace-relative path".to_string()],
+        ));
+    }
+
+    Some(SafetyAnalysis::requires_approval(
+        command,
+        vec!["find command may delete files".to_string()],
+    ))
+}
+
+fn is_root_delete_target(target: &str) -> bool {
+    let normalized = target.trim_matches(['"', '\'']).replace('\\', "/");
+    normalized == "/"
+        || normalized == "/*"
+        || normalized == "//"
+        || normalized.starts_with("/*/")
+        || normalized.starts_with("/.")
+}
+
+fn is_home_delete_target(target: &str) -> bool {
+    let normalized = target.trim_matches(['"', '\'']).replace('\\', "/");
+    let lower = normalized.to_ascii_lowercase();
+    lower == "~"
+        || lower.starts_with("~/")
+        || lower == "$home"
+        || lower.starts_with("$home/")
+        || lower == "${home}"
+        || lower.starts_with("${home}/")
+}
+
+fn target_contains_parent_escape(target: &str) -> bool {
+    target
+        .replace('\\', "/")
+        .split('/')
+        .any(|component| component == "..")
+}
+
 /// Check if a command is known to be safe
 fn is_safe_command(command: &str) -> bool {
    let command_lower = command.to_lowercase();
@@ -955,6 +1186,52 @@ mod tests {
        );
    }

+    #[test]
+    fn test_destructive_patterns_handle_spacing_and_quotes() {
+        assert_eq!(analyze_command("rm  -rf  /").level, SafetyLevel::Dangerous);
+        assert_eq!(
+            analyze_command("rm -rf \"/\"").level,
+            SafetyLevel::Dangerous
+        );
+        assert_eq!(analyze_command("rm -fr -- /").level, SafetyLevel::Dangerous);
+        assert_eq!(
+            analyze_command("FOO=bar rm -rf $HOME").level,
+            SafetyLevel::Dangerous
+        );
+    }
+
+    #[test]
+    fn test_destructive_patterns_scan_chained_segments() {
+        assert_eq!(
+            analyze_command("echo ok; rm -rf /").level,
+            SafetyLevel::Dangerous
+        );
+    }
+
+    #[test]
+    fn test_find_delete_requires_approval_or_blocks_broad_roots() {
+        assert_eq!(
+            analyze_command("find / -delete").level,
+            SafetyLevel::Dangerous
+        );
+        assert_eq!(
+            analyze_command("find . -delete").level,
+            SafetyLevel::RequiresApproval
+        );
+    }
+
+    #[test]
+    fn test_eval_invocation_is_blocked_without_substring_false_positive() {
+        assert_eq!(
+            analyze_command("eval $(echo test | base64 -d)").level,
+            SafetyLevel::Dangerous
+        );
+        assert_ne!(
+            analyze_command("cargo run --bin deepseek -- eval").level,
+            SafetyLevel::Dangerous
+        );
+    }
+
    #[test]
    fn test_null_byte_is_blocked() {
        assert_eq!(