fix(whaleflow): reject unknown workflow references (#2837)
This commit is contained in:
+5
-1
@@ -68,7 +68,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
execution in CI-oriented crate tests (#2679). Leaf, branch, and workflow
|
||||
results now also carry separate ARMH/shared-memo and provider prompt-cache
|
||||
telemetry counters, with mock aggregation tests, so #2671 can progress
|
||||
without wiring live RLM calls or billing-affecting provider behavior yet.
|
||||
without wiring live RLM calls or billing-affecting provider behavior yet. The
|
||||
Starlark and typed-IR gates now also reject unknown leaf dependencies,
|
||||
reducer inputs, and teacher-review candidates before mock execution or replay,
|
||||
keeping generated workflows fail-closed while runtime/worktree semantics stay
|
||||
deferred.
|
||||
Thanks @AdityaVG13 for the WhaleFlow draft and cost-tracking direction.
|
||||
- Added a state-store v2 schema migration for WhaleFlow trace tables covering
|
||||
workflow, branch, leaf, control-node, and teacher-candidate runs. The
|
||||
|
||||
@@ -68,7 +68,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
execution in CI-oriented crate tests (#2679). Leaf, branch, and workflow
|
||||
results now also carry separate ARMH/shared-memo and provider prompt-cache
|
||||
telemetry counters, with mock aggregation tests, so #2671 can progress
|
||||
without wiring live RLM calls or billing-affecting provider behavior yet.
|
||||
without wiring live RLM calls or billing-affecting provider behavior yet. The
|
||||
Starlark and typed-IR gates now also reject unknown leaf dependencies,
|
||||
reducer inputs, and teacher-review candidates before mock execution or replay,
|
||||
keeping generated workflows fail-closed while runtime/worktree semantics stay
|
||||
deferred.
|
||||
Thanks @AdityaVG13 for the WhaleFlow draft and cost-tracking direction.
|
||||
- Added a state-store v2 schema migration for WhaleFlow trace tables covering
|
||||
workflow, branch, leaf, control-node, and teacher-candidate runs. The
|
||||
|
||||
+154
-1
@@ -878,7 +878,7 @@ impl MockWorkflowExecutor {
|
||||
if let Some(max_children) = spec.max_children {
|
||||
nodes.truncate(max_children);
|
||||
}
|
||||
validate_workflow_nodes(&nodes)?;
|
||||
validate_workflow_node_shapes(&nodes)?;
|
||||
self.execute_nodes(&nodes, execution)?;
|
||||
execution.control_node_results.push(ControlNodeResult {
|
||||
node_id: spec.id.clone(),
|
||||
@@ -973,6 +973,12 @@ pub enum WorkflowExecutionError {
|
||||
EmptyLeafPrompt { leaf: String },
|
||||
#[error("duplicate workflow node `{node}`")]
|
||||
DuplicateNodeId { node: String },
|
||||
#[error("workflow node `{node}` has unknown {field} reference `{reference}`")]
|
||||
UnknownNodeReference {
|
||||
node: String,
|
||||
field: &'static str,
|
||||
reference: String,
|
||||
},
|
||||
}
|
||||
|
||||
fn default_frontier_limit() -> usize {
|
||||
@@ -994,6 +1000,14 @@ fn node_id(node: &WorkflowNode) -> String {
|
||||
|
||||
pub(crate) fn validate_workflow_nodes(
|
||||
nodes: &[WorkflowNode],
|
||||
) -> Result<(), WorkflowExecutionError> {
|
||||
let mut seen = BTreeSet::new();
|
||||
validate_workflow_nodes_inner(nodes, &mut seen)?;
|
||||
validate_workflow_references(nodes, &seen)
|
||||
}
|
||||
|
||||
pub(crate) fn validate_workflow_node_shapes(
|
||||
nodes: &[WorkflowNode],
|
||||
) -> Result<(), WorkflowExecutionError> {
|
||||
let mut seen = BTreeSet::new();
|
||||
validate_workflow_nodes_inner(nodes, &mut seen)
|
||||
@@ -1033,6 +1047,68 @@ fn validate_workflow_nodes_inner(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn validate_workflow_references(
|
||||
nodes: &[WorkflowNode],
|
||||
known_ids: &BTreeSet<String>,
|
||||
) -> Result<(), WorkflowExecutionError> {
|
||||
for node in nodes {
|
||||
match node {
|
||||
WorkflowNode::BranchSet(spec) => {
|
||||
validate_workflow_references(&spec.children, known_ids)?;
|
||||
}
|
||||
WorkflowNode::Leaf(spec) => {
|
||||
validate_known_references(
|
||||
spec.id.as_str(),
|
||||
"depends_on_results",
|
||||
&spec.depends_on_results,
|
||||
known_ids,
|
||||
)?;
|
||||
}
|
||||
WorkflowNode::Sequence(spec) => {
|
||||
validate_workflow_references(&spec.children, known_ids)?;
|
||||
}
|
||||
WorkflowNode::Reduce(spec) => {
|
||||
validate_known_references(spec.id.as_str(), "inputs", &spec.inputs, known_ids)?;
|
||||
}
|
||||
WorkflowNode::TeacherReview(spec) => {
|
||||
validate_known_references(
|
||||
spec.id.as_str(),
|
||||
"candidates",
|
||||
&spec.candidates,
|
||||
known_ids,
|
||||
)?;
|
||||
}
|
||||
WorkflowNode::LoopUntil(spec) => {
|
||||
validate_workflow_references(&spec.children, known_ids)?;
|
||||
}
|
||||
WorkflowNode::Cond(spec) => {
|
||||
validate_workflow_references(&spec.then_nodes, known_ids)?;
|
||||
validate_workflow_references(&spec.else_nodes, known_ids)?;
|
||||
}
|
||||
WorkflowNode::Expand(_) => {}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn validate_known_references(
|
||||
node: &str,
|
||||
field: &'static str,
|
||||
references: &[String],
|
||||
known_ids: &BTreeSet<String>,
|
||||
) -> Result<(), WorkflowExecutionError> {
|
||||
for reference in references {
|
||||
if !known_ids.contains(reference) {
|
||||
return Err(WorkflowExecutionError::UnknownNodeReference {
|
||||
node: node.to_string(),
|
||||
field,
|
||||
reference: reference.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn control_kind_name(node: &WorkflowNode) -> &'static str {
|
||||
match node {
|
||||
WorkflowNode::BranchSet(_) => "branch_set",
|
||||
@@ -2074,6 +2150,83 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn workflow_spec_rejects_unknown_leaf_dependency() {
|
||||
let mut summarize = leaf_node("summarize");
|
||||
let WorkflowNode::Leaf(spec) = &mut summarize else {
|
||||
panic!("expected leaf");
|
||||
};
|
||||
spec.depends_on_results = vec!["missing-scan".to_string()];
|
||||
let workflow = workflow_spec(vec![summarize]);
|
||||
|
||||
let mut executor = MockWorkflowExecutor::new();
|
||||
let err = executor
|
||||
.run(&workflow)
|
||||
.expect_err("unknown leaf dependency should fail before execution");
|
||||
|
||||
assert_eq!(
|
||||
err,
|
||||
WorkflowExecutionError::UnknownNodeReference {
|
||||
node: "summarize".to_string(),
|
||||
field: "depends_on_results",
|
||||
reference: "missing-scan".to_string(),
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn workflow_spec_rejects_unknown_reduce_input() {
|
||||
let workflow = workflow_spec(vec![
|
||||
leaf_node("scan"),
|
||||
WorkflowNode::Reduce(ReduceSpec {
|
||||
id: "summarize".to_string(),
|
||||
inputs: vec!["scan".to_string(), "missing-review".to_string()],
|
||||
prompt: "Summarize safe fixes".to_string(),
|
||||
model_policy: ModelPolicy::default(),
|
||||
}),
|
||||
]);
|
||||
|
||||
let mut executor = MockWorkflowExecutor::new();
|
||||
let err = executor
|
||||
.run(&workflow)
|
||||
.expect_err("unknown reduce input should fail before execution");
|
||||
|
||||
assert_eq!(
|
||||
err,
|
||||
WorkflowExecutionError::UnknownNodeReference {
|
||||
node: "summarize".to_string(),
|
||||
field: "inputs",
|
||||
reference: "missing-review".to_string(),
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn workflow_spec_rejects_unknown_teacher_candidate() {
|
||||
let workflow = workflow_spec(vec![
|
||||
leaf_node("candidate-a"),
|
||||
WorkflowNode::TeacherReview(TeacherReviewSpec {
|
||||
id: "teacher-review".to_string(),
|
||||
candidates: vec!["candidate-a".to_string(), "candidate-b".to_string()],
|
||||
promotion_policy: PromotionPolicy::default(),
|
||||
}),
|
||||
]);
|
||||
|
||||
let mut executor = MockWorkflowExecutor::new();
|
||||
let err = executor
|
||||
.run(&workflow)
|
||||
.expect_err("unknown teacher candidate should fail before execution");
|
||||
|
||||
assert_eq!(
|
||||
err,
|
||||
WorkflowExecutionError::UnknownNodeReference {
|
||||
node: "teacher-review".to_string(),
|
||||
field: "candidates",
|
||||
reference: "candidate-b".to_string(),
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tournament_selects_passing_minimal_branch() {
|
||||
let tournament = BranchTournament { min_score: 60 };
|
||||
|
||||
@@ -8,7 +8,7 @@ use crate::{
|
||||
BranchResult, BranchSpec, CondSpec, ControlNodeKind, ControlNodeResult, ExpandSpec, LeafResult,
|
||||
LeafSpec, LoopUntilSpec, SequenceSpec, WorkflowExecution, WorkflowExecutionError,
|
||||
WorkflowMemoUsage, WorkflowNode, WorkflowRunStatus, WorkflowSpec, WorkflowUsage,
|
||||
validate_workflow_nodes,
|
||||
validate_workflow_node_shapes, validate_workflow_nodes,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
|
||||
@@ -316,7 +316,7 @@ impl WorkflowReplayExecutor {
|
||||
.as_ref()
|
||||
.map(|record| record.generated_nodes.clone())
|
||||
.unwrap_or_default();
|
||||
validate_workflow_nodes(&generated_nodes)?;
|
||||
validate_workflow_node_shapes(&generated_nodes)?;
|
||||
self.execute_nodes(spec, &generated_nodes, execution)?;
|
||||
let selected = record
|
||||
.as_ref()
|
||||
|
||||
@@ -13,7 +13,7 @@ use thiserror::Error;
|
||||
use crate::{
|
||||
AgentType, BranchSpec, BudgetSpec, CondSpec, ExpandSpec, IsolationMode, LeafSpec, ModelPolicy,
|
||||
PermissionSpec, PromotionPolicy, ReduceSpec, SequenceSpec, TaskMode, TeacherReviewSpec,
|
||||
WorkflowNode, WorkflowSpec,
|
||||
WorkflowNode, WorkflowSpec, validate_workflow_nodes,
|
||||
};
|
||||
|
||||
pub type StarlarkWorkflowResult<T> = std::result::Result<T, StarlarkWorkflowError>;
|
||||
@@ -50,10 +50,13 @@ pub fn compile_starlark_workflow(
|
||||
eval.eval_module(ast, &globals)
|
||||
.map_err(StarlarkWorkflowError::Starlark)?;
|
||||
}
|
||||
builder
|
||||
let workflow = builder
|
||||
.into_inner()
|
||||
.workflow
|
||||
.ok_or(StarlarkWorkflowError::MissingWorkflow)
|
||||
.ok_or(StarlarkWorkflowError::MissingWorkflow)?;
|
||||
validate_workflow_nodes(&workflow.nodes)
|
||||
.map_err(|error| StarlarkWorkflowError::InvalidNode(error.to_string()))?;
|
||||
Ok(workflow)
|
||||
}
|
||||
|
||||
pub fn compile_starlark_workflow_with_repair(
|
||||
@@ -531,6 +534,25 @@ workflow(goal = "bad", nodes = [])
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn starlark_compile_gate_rejects_unknown_references() {
|
||||
let source = r#"
|
||||
workflow(
|
||||
id = "bad-reference",
|
||||
goal = "reject missing candidates",
|
||||
nodes = [
|
||||
teacher_review(id = "review", candidates = ["missing-candidate"]),
|
||||
],
|
||||
)
|
||||
"#;
|
||||
|
||||
let err = compile_starlark_workflow("bad-reference.star", source)
|
||||
.expect_err("unknown candidate should fail at the compile gate");
|
||||
|
||||
assert!(matches!(err, StarlarkWorkflowError::InvalidNode(_)));
|
||||
assert!(err.to_string().contains("missing-candidate"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn issue_fix_tournament_example_compiles() {
|
||||
let source = include_str!("../../../workflows/issue_fix_tournament.star");
|
||||
|
||||
Reference in New Issue
Block a user