fix(whaleflow): reject unknown workflow references (#2837)

2026-06-05 22:58:23 -07:00
parent 96b825b84e
commit 6bb564e49c
5 changed files with 191 additions and 8 deletions
@@ -68,7 +68,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
  execution in CI-oriented crate tests (#2679). Leaf, branch, and workflow
  results now also carry separate ARMH/shared-memo and provider prompt-cache
  telemetry counters, with mock aggregation tests, so #2671 can progress
-  without wiring live RLM calls or billing-affecting provider behavior yet.
+  without wiring live RLM calls or billing-affecting provider behavior yet. The
+  Starlark and typed-IR gates now also reject unknown leaf dependencies,
+  reducer inputs, and teacher-review candidates before mock execution or replay,
+  keeping generated workflows fail-closed while runtime/worktree semantics stay
+  deferred.
  Thanks @AdityaVG13 for the WhaleFlow draft and cost-tracking direction.
 - Added a state-store v2 schema migration for WhaleFlow trace tables covering
  workflow, branch, leaf, control-node, and teacher-candidate runs. The
@@ -68,7 +68,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
  execution in CI-oriented crate tests (#2679). Leaf, branch, and workflow
  results now also carry separate ARMH/shared-memo and provider prompt-cache
  telemetry counters, with mock aggregation tests, so #2671 can progress
-  without wiring live RLM calls or billing-affecting provider behavior yet.
+  without wiring live RLM calls or billing-affecting provider behavior yet. The
+  Starlark and typed-IR gates now also reject unknown leaf dependencies,
+  reducer inputs, and teacher-review candidates before mock execution or replay,
+  keeping generated workflows fail-closed while runtime/worktree semantics stay
+  deferred.
  Thanks @AdityaVG13 for the WhaleFlow draft and cost-tracking direction.
 - Added a state-store v2 schema migration for WhaleFlow trace tables covering
  workflow, branch, leaf, control-node, and teacher-candidate runs. The
@@ -878,7 +878,7 @@ impl MockWorkflowExecutor {
        if let Some(max_children) = spec.max_children {
            nodes.truncate(max_children);
        }
-        validate_workflow_nodes(&nodes)?;
+        validate_workflow_node_shapes(&nodes)?;
        self.execute_nodes(&nodes, execution)?;
        execution.control_node_results.push(ControlNodeResult {
            node_id: spec.id.clone(),
@@ -973,6 +973,12 @@ pub enum WorkflowExecutionError {
    EmptyLeafPrompt { leaf: String },
    #[error("duplicate workflow node `{node}`")]
    DuplicateNodeId { node: String },
+    #[error("workflow node `{node}` has unknown {field} reference `{reference}`")]
+    UnknownNodeReference {
+        node: String,
+        field: &'static str,
+        reference: String,
+    },
 }

 fn default_frontier_limit() -> usize {
@@ -994,6 +1000,14 @@ fn node_id(node: &WorkflowNode) -> String {

 pub(crate) fn validate_workflow_nodes(
    nodes: &[WorkflowNode],
+) -> Result<(), WorkflowExecutionError> {
+    let mut seen = BTreeSet::new();
+    validate_workflow_nodes_inner(nodes, &mut seen)?;
+    validate_workflow_references(nodes, &seen)
+}
+
+pub(crate) fn validate_workflow_node_shapes(
+    nodes: &[WorkflowNode],
 ) -> Result<(), WorkflowExecutionError> {
    let mut seen = BTreeSet::new();
    validate_workflow_nodes_inner(nodes, &mut seen)
@@ -1033,6 +1047,68 @@ fn validate_workflow_nodes_inner(
    Ok(())
 }

+fn validate_workflow_references(
+    nodes: &[WorkflowNode],
+    known_ids: &BTreeSet<String>,
+) -> Result<(), WorkflowExecutionError> {
+    for node in nodes {
+        match node {
+            WorkflowNode::BranchSet(spec) => {
+                validate_workflow_references(&spec.children, known_ids)?;
+            }
+            WorkflowNode::Leaf(spec) => {
+                validate_known_references(
+                    spec.id.as_str(),
+                    "depends_on_results",
+                    &spec.depends_on_results,
+                    known_ids,
+                )?;
+            }
+            WorkflowNode::Sequence(spec) => {
+                validate_workflow_references(&spec.children, known_ids)?;
+            }
+            WorkflowNode::Reduce(spec) => {
+                validate_known_references(spec.id.as_str(), "inputs", &spec.inputs, known_ids)?;
+            }
+            WorkflowNode::TeacherReview(spec) => {
+                validate_known_references(
+                    spec.id.as_str(),
+                    "candidates",
+                    &spec.candidates,
+                    known_ids,
+                )?;
+            }
+            WorkflowNode::LoopUntil(spec) => {
+                validate_workflow_references(&spec.children, known_ids)?;
+            }
+            WorkflowNode::Cond(spec) => {
+                validate_workflow_references(&spec.then_nodes, known_ids)?;
+                validate_workflow_references(&spec.else_nodes, known_ids)?;
+            }
+            WorkflowNode::Expand(_) => {}
+        }
+    }
+    Ok(())
+}
+
+fn validate_known_references(
+    node: &str,
+    field: &'static str,
+    references: &[String],
+    known_ids: &BTreeSet<String>,
+) -> Result<(), WorkflowExecutionError> {
+    for reference in references {
+        if !known_ids.contains(reference) {
+            return Err(WorkflowExecutionError::UnknownNodeReference {
+                node: node.to_string(),
+                field,
+                reference: reference.clone(),
+            });
+        }
+    }
+    Ok(())
+}
+
 fn control_kind_name(node: &WorkflowNode) -> &'static str {
    match node {
        WorkflowNode::BranchSet(_) => "branch_set",
@@ -2074,6 +2150,83 @@ mod tests {
        );
    }

+    #[test]
+    fn workflow_spec_rejects_unknown_leaf_dependency() {
+        let mut summarize = leaf_node("summarize");
+        let WorkflowNode::Leaf(spec) = &mut summarize else {
+            panic!("expected leaf");
+        };
+        spec.depends_on_results = vec!["missing-scan".to_string()];
+        let workflow = workflow_spec(vec![summarize]);
+
+        let mut executor = MockWorkflowExecutor::new();
+        let err = executor
+            .run(&workflow)
+            .expect_err("unknown leaf dependency should fail before execution");
+
+        assert_eq!(
+            err,
+            WorkflowExecutionError::UnknownNodeReference {
+                node: "summarize".to_string(),
+                field: "depends_on_results",
+                reference: "missing-scan".to_string(),
+            }
+        );
+    }
+
+    #[test]
+    fn workflow_spec_rejects_unknown_reduce_input() {
+        let workflow = workflow_spec(vec![
+            leaf_node("scan"),
+            WorkflowNode::Reduce(ReduceSpec {
+                id: "summarize".to_string(),
+                inputs: vec!["scan".to_string(), "missing-review".to_string()],
+                prompt: "Summarize safe fixes".to_string(),
+                model_policy: ModelPolicy::default(),
+            }),
+        ]);
+
+        let mut executor = MockWorkflowExecutor::new();
+        let err = executor
+            .run(&workflow)
+            .expect_err("unknown reduce input should fail before execution");
+
+        assert_eq!(
+            err,
+            WorkflowExecutionError::UnknownNodeReference {
+                node: "summarize".to_string(),
+                field: "inputs",
+                reference: "missing-review".to_string(),
+            }
+        );
+    }
+
+    #[test]
+    fn workflow_spec_rejects_unknown_teacher_candidate() {
+        let workflow = workflow_spec(vec![
+            leaf_node("candidate-a"),
+            WorkflowNode::TeacherReview(TeacherReviewSpec {
+                id: "teacher-review".to_string(),
+                candidates: vec!["candidate-a".to_string(), "candidate-b".to_string()],
+                promotion_policy: PromotionPolicy::default(),
+            }),
+        ]);
+
+        let mut executor = MockWorkflowExecutor::new();
+        let err = executor
+            .run(&workflow)
+            .expect_err("unknown teacher candidate should fail before execution");
+
+        assert_eq!(
+            err,
+            WorkflowExecutionError::UnknownNodeReference {
+                node: "teacher-review".to_string(),
+                field: "candidates",
+                reference: "candidate-b".to_string(),
+            }
+        );
+    }
+
    #[test]
    fn tournament_selects_passing_minimal_branch() {
        let tournament = BranchTournament { min_score: 60 };
@@ -8,7 +8,7 @@ use crate::{
    BranchResult, BranchSpec, CondSpec, ControlNodeKind, ControlNodeResult, ExpandSpec, LeafResult,
    LeafSpec, LoopUntilSpec, SequenceSpec, WorkflowExecution, WorkflowExecutionError,
    WorkflowMemoUsage, WorkflowNode, WorkflowRunStatus, WorkflowSpec, WorkflowUsage,
-    validate_workflow_nodes,
+    validate_workflow_node_shapes, validate_workflow_nodes,
 };

 #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
@@ -316,7 +316,7 @@ impl WorkflowReplayExecutor {
            .as_ref()
            .map(|record| record.generated_nodes.clone())
            .unwrap_or_default();
-        validate_workflow_nodes(&generated_nodes)?;
+        validate_workflow_node_shapes(&generated_nodes)?;
        self.execute_nodes(spec, &generated_nodes, execution)?;
        let selected = record
            .as_ref()
@@ -13,7 +13,7 @@ use thiserror::Error;
 use crate::{
    AgentType, BranchSpec, BudgetSpec, CondSpec, ExpandSpec, IsolationMode, LeafSpec, ModelPolicy,
    PermissionSpec, PromotionPolicy, ReduceSpec, SequenceSpec, TaskMode, TeacherReviewSpec,
-    WorkflowNode, WorkflowSpec,
+    WorkflowNode, WorkflowSpec, validate_workflow_nodes,
 };

 pub type StarlarkWorkflowResult<T> = std::result::Result<T, StarlarkWorkflowError>;
@@ -50,10 +50,13 @@ pub fn compile_starlark_workflow(
        eval.eval_module(ast, &globals)
            .map_err(StarlarkWorkflowError::Starlark)?;
    }
-    builder
+    let workflow = builder
        .into_inner()
        .workflow
-        .ok_or(StarlarkWorkflowError::MissingWorkflow)
+        .ok_or(StarlarkWorkflowError::MissingWorkflow)?;
+    validate_workflow_nodes(&workflow.nodes)
+        .map_err(|error| StarlarkWorkflowError::InvalidNode(error.to_string()))?;
+    Ok(workflow)
 }

 pub fn compile_starlark_workflow_with_repair(
@@ -531,6 +534,25 @@ workflow(goal = "bad", nodes = [])
        ));
    }

+    #[test]
+    fn starlark_compile_gate_rejects_unknown_references() {
+        let source = r#"
+workflow(
+    id = "bad-reference",
+    goal = "reject missing candidates",
+    nodes = [
+        teacher_review(id = "review", candidates = ["missing-candidate"]),
+    ],
+)
+"#;
+
+        let err = compile_starlark_workflow("bad-reference.star", source)
+            .expect_err("unknown candidate should fail at the compile gate");
+
+        assert!(matches!(err, StarlarkWorkflowError::InvalidNode(_)));
+        assert!(err.to_string().contains("missing-candidate"));
+    }
+
    #[test]
    fn issue_fix_tournament_example_compiles() {
        let source = include_str!("../../../workflows/issue_fix_tournament.star");