fix(command-safety): reject null bytes in shell commands (#706) (#918)

Null bytes embedded in command strings can be used to slip past
parsers that treat them as terminators while shells still see the
trailing payload. The existing analyzer already blocks `\n` / `\r`
multi-line input but lets `\0` through; add a matching dangerous
classification beside it.

This PR intentionally takes only the null-byte slice from #706. The
broader `command.contains("eval")` / `command.contains("exec ")`
guard from the same PR is *not* applied because it false-positives on
routine commands such as `cargo run -- eval` (the offline eval
harness) or any binary whose name contains `eval` (`evaluator.py`,
`primeval`). A regression test pins that behavior.

Tests:
- `test_null_byte_is_blocked` — `ls\0 -la` and `echo hello\0world`
  classified as Dangerous.
- `test_eval_substring_is_not_misclassified` — `cargo run --bin
  deepseek -- eval` and `python evaluator.py` are *not* Dangerous.

Integrates #706.

Co-authored-by: 浩淼的mac <haomiaodemac@haomiaodemacdeMacBook-Air.local>
This commit is contained in:
Hunter Bown
2026-05-06 19:22:33 -05:00
committed by GitHub
parent 785f5c625f
commit 1122bb0333
+41
View File
@@ -582,6 +582,14 @@ pub fn analyze_command(command: &str) -> SafetyAnalysis {
);
}
if command.contains('\0') {
return SafetyAnalysis::dangerous(
command,
vec!["Command contains a null byte".to_string()],
vec!["Strip embedded null bytes before retrying".to_string()],
);
}
if command.contains("&&") || command.contains("||") || command.contains(';') {
// Chains of known-safe commands (cargo/git/zig/npm/etc.) are
// routine for build+test workflows. Instead of hard-blocking,
@@ -947,6 +955,39 @@ mod tests {
);
}
#[test]
fn test_null_byte_is_blocked() {
assert_eq!(
analyze_command("ls\0 -la").level,
SafetyLevel::Dangerous,
"embedded NUL byte must be rejected as dangerous"
);
assert_eq!(
analyze_command("echo hello\0world").level,
SafetyLevel::Dangerous
);
}
#[test]
fn test_eval_substring_is_not_misclassified() {
// Words like `evaluate` / `evaluation` / `cargo run -- eval`
// contain the substring "eval" but are not eval invocations.
// Guard against the naive `command.contains("eval")` regression
// — these should stay safe / workspace-safe, never Dangerous.
let evaluate_safe = analyze_command("cargo run --bin deepseek -- eval").level;
assert_ne!(
evaluate_safe,
SafetyLevel::Dangerous,
"running the eval harness should not be classified as dangerous"
);
let evaluator = analyze_command("python evaluator.py --suite default").level;
assert_ne!(
evaluator,
SafetyLevel::Dangerous,
"running an evaluator script should not be classified as dangerous"
);
}
#[test]
fn test_privileged_commands() {
assert_eq!(