fix(command-safety): reject null bytes in shell commands (#706) (#918)

Null bytes embedded in command strings can be used to slip past parsers that treat them as terminators while shells still see the trailing payload. The existing analyzer already blocks `\n` / `\r` multi-line input but lets `\0` through; add a matching dangerous classification beside it. This PR intentionally takes only the null-byte slice from #706. The broader `command.contains("eval")` / `command.contains("exec ")` guard from the same PR is *not* applied because it false-positives on routine commands such as `cargo run -- eval` (the offline eval harness) or any binary whose name contains `eval` (`evaluator.py`, `primeval`). A regression test pins that behavior. Tests: - `test_null_byte_is_blocked` — `ls\0 -la` and `echo hello\0world` classified as Dangerous. - `test_eval_substring_is_not_misclassified` — `cargo run --bin deepseek -- eval` and `python evaluator.py` are *not* Dangerous. Integrates #706. Co-authored-by: 浩淼的mac <haomiaodemac@haomiaodemacdeMacBook-Air.local>
2026-05-06 19:22:33 -05:00
parent 785f5c625f
commit 1122bb0333
1 changed files with 41 additions and 0 deletions
@@ -582,6 +582,14 @@ pub fn analyze_command(command: &str) -> SafetyAnalysis {
        );
    }

+    if command.contains('\0') {
+        return SafetyAnalysis::dangerous(
+            command,
+            vec!["Command contains a null byte".to_string()],
+            vec!["Strip embedded null bytes before retrying".to_string()],
+        );
+    }
+
    if command.contains("&&") || command.contains("||") || command.contains(';') {
        // Chains of known-safe commands (cargo/git/zig/npm/etc.) are
        // routine for build+test workflows. Instead of hard-blocking,
@@ -947,6 +955,39 @@ mod tests {
        );
    }

+    #[test]
+    fn test_null_byte_is_blocked() {
+        assert_eq!(
+            analyze_command("ls\0 -la").level,
+            SafetyLevel::Dangerous,
+            "embedded NUL byte must be rejected as dangerous"
+        );
+        assert_eq!(
+            analyze_command("echo hello\0world").level,
+            SafetyLevel::Dangerous
+        );
+    }
+
+    #[test]
+    fn test_eval_substring_is_not_misclassified() {
+        // Words like `evaluate` / `evaluation` / `cargo run -- eval`
+        // contain the substring "eval" but are not eval invocations.
+        // Guard against the naive `command.contains("eval")` regression
+        // — these should stay safe / workspace-safe, never Dangerous.
+        let evaluate_safe = analyze_command("cargo run --bin deepseek -- eval").level;
+        assert_ne!(
+            evaluate_safe,
+            SafetyLevel::Dangerous,
+            "running the eval harness should not be classified as dangerous"
+        );
+        let evaluator = analyze_command("python evaluator.py --suite default").level;
+        assert_ne!(
+            evaluator,
+            SafetyLevel::Dangerous,
+            "running an evaluator script should not be classified as dangerous"
+        );
+    }
+
    #[test]
    fn test_privileged_commands() {
        assert_eq!(