fix(whaleflow): reject unknown workflow references (#2837)

This commit is contained in:
Hunter Bown
2026-06-05 22:58:23 -07:00
committed by GitHub
parent 96b825b84e
commit 6bb564e49c
5 changed files with 191 additions and 8 deletions
+5 -1
View File
@@ -68,7 +68,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
execution in CI-oriented crate tests (#2679). Leaf, branch, and workflow
results now also carry separate ARMH/shared-memo and provider prompt-cache
telemetry counters, with mock aggregation tests, so #2671 can progress
without wiring live RLM calls or billing-affecting provider behavior yet.
without wiring live RLM calls or billing-affecting provider behavior yet. The
Starlark and typed-IR gates now also reject unknown leaf dependencies,
reducer inputs, and teacher-review candidates before mock execution or replay,
keeping generated workflows fail-closed while runtime/worktree semantics stay
deferred.
Thanks @AdityaVG13 for the WhaleFlow draft and cost-tracking direction.
- Added a state-store v2 schema migration for WhaleFlow trace tables covering
workflow, branch, leaf, control-node, and teacher-candidate runs. The
+5 -1
View File
@@ -68,7 +68,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
execution in CI-oriented crate tests (#2679). Leaf, branch, and workflow
results now also carry separate ARMH/shared-memo and provider prompt-cache
telemetry counters, with mock aggregation tests, so #2671 can progress
without wiring live RLM calls or billing-affecting provider behavior yet.
without wiring live RLM calls or billing-affecting provider behavior yet. The
Starlark and typed-IR gates now also reject unknown leaf dependencies,
reducer inputs, and teacher-review candidates before mock execution or replay,
keeping generated workflows fail-closed while runtime/worktree semantics stay
deferred.
Thanks @AdityaVG13 for the WhaleFlow draft and cost-tracking direction.
- Added a state-store v2 schema migration for WhaleFlow trace tables covering
workflow, branch, leaf, control-node, and teacher-candidate runs. The
+154 -1
View File
@@ -878,7 +878,7 @@ impl MockWorkflowExecutor {
if let Some(max_children) = spec.max_children {
nodes.truncate(max_children);
}
validate_workflow_nodes(&nodes)?;
validate_workflow_node_shapes(&nodes)?;
self.execute_nodes(&nodes, execution)?;
execution.control_node_results.push(ControlNodeResult {
node_id: spec.id.clone(),
@@ -973,6 +973,12 @@ pub enum WorkflowExecutionError {
EmptyLeafPrompt { leaf: String },
#[error("duplicate workflow node `{node}`")]
DuplicateNodeId { node: String },
#[error("workflow node `{node}` has unknown {field} reference `{reference}`")]
UnknownNodeReference {
node: String,
field: &'static str,
reference: String,
},
}
fn default_frontier_limit() -> usize {
@@ -994,6 +1000,14 @@ fn node_id(node: &WorkflowNode) -> String {
pub(crate) fn validate_workflow_nodes(
nodes: &[WorkflowNode],
) -> Result<(), WorkflowExecutionError> {
let mut seen = BTreeSet::new();
validate_workflow_nodes_inner(nodes, &mut seen)?;
validate_workflow_references(nodes, &seen)
}
pub(crate) fn validate_workflow_node_shapes(
nodes: &[WorkflowNode],
) -> Result<(), WorkflowExecutionError> {
let mut seen = BTreeSet::new();
validate_workflow_nodes_inner(nodes, &mut seen)
@@ -1033,6 +1047,68 @@ fn validate_workflow_nodes_inner(
Ok(())
}
fn validate_workflow_references(
nodes: &[WorkflowNode],
known_ids: &BTreeSet<String>,
) -> Result<(), WorkflowExecutionError> {
for node in nodes {
match node {
WorkflowNode::BranchSet(spec) => {
validate_workflow_references(&spec.children, known_ids)?;
}
WorkflowNode::Leaf(spec) => {
validate_known_references(
spec.id.as_str(),
"depends_on_results",
&spec.depends_on_results,
known_ids,
)?;
}
WorkflowNode::Sequence(spec) => {
validate_workflow_references(&spec.children, known_ids)?;
}
WorkflowNode::Reduce(spec) => {
validate_known_references(spec.id.as_str(), "inputs", &spec.inputs, known_ids)?;
}
WorkflowNode::TeacherReview(spec) => {
validate_known_references(
spec.id.as_str(),
"candidates",
&spec.candidates,
known_ids,
)?;
}
WorkflowNode::LoopUntil(spec) => {
validate_workflow_references(&spec.children, known_ids)?;
}
WorkflowNode::Cond(spec) => {
validate_workflow_references(&spec.then_nodes, known_ids)?;
validate_workflow_references(&spec.else_nodes, known_ids)?;
}
WorkflowNode::Expand(_) => {}
}
}
Ok(())
}
fn validate_known_references(
node: &str,
field: &'static str,
references: &[String],
known_ids: &BTreeSet<String>,
) -> Result<(), WorkflowExecutionError> {
for reference in references {
if !known_ids.contains(reference) {
return Err(WorkflowExecutionError::UnknownNodeReference {
node: node.to_string(),
field,
reference: reference.clone(),
});
}
}
Ok(())
}
fn control_kind_name(node: &WorkflowNode) -> &'static str {
match node {
WorkflowNode::BranchSet(_) => "branch_set",
@@ -2074,6 +2150,83 @@ mod tests {
);
}
#[test]
fn workflow_spec_rejects_unknown_leaf_dependency() {
let mut summarize = leaf_node("summarize");
let WorkflowNode::Leaf(spec) = &mut summarize else {
panic!("expected leaf");
};
spec.depends_on_results = vec!["missing-scan".to_string()];
let workflow = workflow_spec(vec![summarize]);
let mut executor = MockWorkflowExecutor::new();
let err = executor
.run(&workflow)
.expect_err("unknown leaf dependency should fail before execution");
assert_eq!(
err,
WorkflowExecutionError::UnknownNodeReference {
node: "summarize".to_string(),
field: "depends_on_results",
reference: "missing-scan".to_string(),
}
);
}
#[test]
fn workflow_spec_rejects_unknown_reduce_input() {
let workflow = workflow_spec(vec![
leaf_node("scan"),
WorkflowNode::Reduce(ReduceSpec {
id: "summarize".to_string(),
inputs: vec!["scan".to_string(), "missing-review".to_string()],
prompt: "Summarize safe fixes".to_string(),
model_policy: ModelPolicy::default(),
}),
]);
let mut executor = MockWorkflowExecutor::new();
let err = executor
.run(&workflow)
.expect_err("unknown reduce input should fail before execution");
assert_eq!(
err,
WorkflowExecutionError::UnknownNodeReference {
node: "summarize".to_string(),
field: "inputs",
reference: "missing-review".to_string(),
}
);
}
#[test]
fn workflow_spec_rejects_unknown_teacher_candidate() {
let workflow = workflow_spec(vec![
leaf_node("candidate-a"),
WorkflowNode::TeacherReview(TeacherReviewSpec {
id: "teacher-review".to_string(),
candidates: vec!["candidate-a".to_string(), "candidate-b".to_string()],
promotion_policy: PromotionPolicy::default(),
}),
]);
let mut executor = MockWorkflowExecutor::new();
let err = executor
.run(&workflow)
.expect_err("unknown teacher candidate should fail before execution");
assert_eq!(
err,
WorkflowExecutionError::UnknownNodeReference {
node: "teacher-review".to_string(),
field: "candidates",
reference: "candidate-b".to_string(),
}
);
}
#[test]
fn tournament_selects_passing_minimal_branch() {
let tournament = BranchTournament { min_score: 60 };
+2 -2
View File
@@ -8,7 +8,7 @@ use crate::{
BranchResult, BranchSpec, CondSpec, ControlNodeKind, ControlNodeResult, ExpandSpec, LeafResult,
LeafSpec, LoopUntilSpec, SequenceSpec, WorkflowExecution, WorkflowExecutionError,
WorkflowMemoUsage, WorkflowNode, WorkflowRunStatus, WorkflowSpec, WorkflowUsage,
validate_workflow_nodes,
validate_workflow_node_shapes, validate_workflow_nodes,
};
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
@@ -316,7 +316,7 @@ impl WorkflowReplayExecutor {
.as_ref()
.map(|record| record.generated_nodes.clone())
.unwrap_or_default();
validate_workflow_nodes(&generated_nodes)?;
validate_workflow_node_shapes(&generated_nodes)?;
self.execute_nodes(spec, &generated_nodes, execution)?;
let selected = record
.as_ref()
+25 -3
View File
@@ -13,7 +13,7 @@ use thiserror::Error;
use crate::{
AgentType, BranchSpec, BudgetSpec, CondSpec, ExpandSpec, IsolationMode, LeafSpec, ModelPolicy,
PermissionSpec, PromotionPolicy, ReduceSpec, SequenceSpec, TaskMode, TeacherReviewSpec,
WorkflowNode, WorkflowSpec,
WorkflowNode, WorkflowSpec, validate_workflow_nodes,
};
pub type StarlarkWorkflowResult<T> = std::result::Result<T, StarlarkWorkflowError>;
@@ -50,10 +50,13 @@ pub fn compile_starlark_workflow(
eval.eval_module(ast, &globals)
.map_err(StarlarkWorkflowError::Starlark)?;
}
builder
let workflow = builder
.into_inner()
.workflow
.ok_or(StarlarkWorkflowError::MissingWorkflow)
.ok_or(StarlarkWorkflowError::MissingWorkflow)?;
validate_workflow_nodes(&workflow.nodes)
.map_err(|error| StarlarkWorkflowError::InvalidNode(error.to_string()))?;
Ok(workflow)
}
pub fn compile_starlark_workflow_with_repair(
@@ -531,6 +534,25 @@ workflow(goal = "bad", nodes = [])
));
}
#[test]
fn starlark_compile_gate_rejects_unknown_references() {
let source = r#"
workflow(
id = "bad-reference",
goal = "reject missing candidates",
nodes = [
teacher_review(id = "review", candidates = ["missing-candidate"]),
],
)
"#;
let err = compile_starlark_workflow("bad-reference.star", source)
.expect_err("unknown candidate should fail at the compile gate");
assert!(matches!(err, StarlarkWorkflowError::InvalidNode(_)));
assert!(err.to_string().contains("missing-candidate"));
}
#[test]
fn issue_fix_tournament_example_compiles() {
let source = include_str!("../../../workflows/issue_fix_tournament.star");