diff --git a/CHANGELOG.md b/CHANGELOG.md index 2931cf3a..dbc92247 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -43,8 +43,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 It also adds a crate-local mock executor skeleton for Sequence, BranchSet, Leaf, Reduce, LoopUntil, Cond, Expand, BranchTournament, and ParetoFrontier control flow so #2669 can progress without spawning agents, applying - worktrees, or exposing a `workflow_run` runtime tool yet. Thanks @AdityaVG13 - for the WhaleFlow draft and cost-tracking direction. + worktrees, or exposing a `workflow_run` runtime tool yet. A first Starlark + authoring layer now compiles fail-closed model-authored workflow files into + that typed IR, with `rlm_cache_change.star` and `issue_fix_tournament.star` + examples plus a one-pass repair for common `ctx.*` authoring aliases (#2670). + Thanks @AdityaVG13 for the WhaleFlow draft and cost-tracking direction. - Added a state-store v2 schema migration for WhaleFlow trace tables covering workflow, branch, leaf, control-node, and teacher-candidate runs. The migration creates persistence shape only; workflow execution and replay diff --git a/Cargo.lock b/Cargo.lock index 5ff23ee2..dd068bd4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1032,8 +1032,10 @@ version = "0.8.53" name = "codewhale-whaleflow" version = "0.8.53" dependencies = [ + "anyhow", "serde", "serde_json", + "starlark", "thiserror 2.0.18", ] diff --git a/crates/tui/CHANGELOG.md b/crates/tui/CHANGELOG.md index 2931cf3a..dbc92247 100644 --- a/crates/tui/CHANGELOG.md +++ b/crates/tui/CHANGELOG.md @@ -43,8 +43,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 It also adds a crate-local mock executor skeleton for Sequence, BranchSet, Leaf, Reduce, LoopUntil, Cond, Expand, BranchTournament, and ParetoFrontier control flow so #2669 can progress without spawning agents, applying - worktrees, or exposing a `workflow_run` runtime tool yet. Thanks @AdityaVG13 - for the WhaleFlow draft and cost-tracking direction. + worktrees, or exposing a `workflow_run` runtime tool yet. A first Starlark + authoring layer now compiles fail-closed model-authored workflow files into + that typed IR, with `rlm_cache_change.star` and `issue_fix_tournament.star` + examples plus a one-pass repair for common `ctx.*` authoring aliases (#2670). + Thanks @AdityaVG13 for the WhaleFlow draft and cost-tracking direction. - Added a state-store v2 schema migration for WhaleFlow trace tables covering workflow, branch, leaf, control-node, and teacher-candidate runs. The migration creates persistence shape only; workflow execution and replay diff --git a/crates/whaleflow/Cargo.toml b/crates/whaleflow/Cargo.toml index c41d8e36..047f961c 100644 --- a/crates/whaleflow/Cargo.toml +++ b/crates/whaleflow/Cargo.toml @@ -8,7 +8,12 @@ description = "Typed WhaleFlow workflow IR and validation for CodeWhale" [dependencies] serde.workspace = true +serde_json.workspace = true thiserror.workspace = true +[target.'cfg(not(target_env = "ohos"))'.dependencies] +anyhow.workspace = true +starlark = "0.13.0" + [dev-dependencies] serde_json.workspace = true diff --git a/crates/whaleflow/src/lib.rs b/crates/whaleflow/src/lib.rs index d76501ca..b0833850 100644 --- a/crates/whaleflow/src/lib.rs +++ b/crates/whaleflow/src/lib.rs @@ -4,12 +4,20 @@ //! exposure, worktree application, replay, and model execution are layered on //! top only after their cancellation and evidence semantics are proven. +#[cfg(not(target_env = "ohos"))] +mod starlark_authoring; + use std::collections::{BTreeMap, BTreeSet}; use std::path::Path; use serde::{Deserialize, Serialize}; use thiserror::Error; +#[cfg(not(target_env = "ohos"))] +pub use starlark_authoring::{ + compile_starlark_workflow, compile_starlark_workflow_with_repair, repair_starlark_workflow_once, +}; + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct WorkflowConfig { pub goal: String, diff --git a/crates/whaleflow/src/starlark_authoring.rs b/crates/whaleflow/src/starlark_authoring.rs new file mode 100644 index 00000000..7f395c78 --- /dev/null +++ b/crates/whaleflow/src/starlark_authoring.rs @@ -0,0 +1,493 @@ +use std::cell::{RefCell, RefMut}; + +use starlark::any::ProvidesStaticType; +use starlark::environment::{GlobalsBuilder, Module}; +use starlark::eval::Evaluator; +use starlark::starlark_module; +use starlark::syntax::{AstModule, Dialect}; +use starlark::values::Value; +use starlark::values::list::UnpackList; +use starlark::values::none::NoneType; +use thiserror::Error; + +use crate::{ + AgentType, BranchSpec, BudgetSpec, CondSpec, ExpandSpec, IsolationMode, LeafSpec, ModelPolicy, + PermissionSpec, PromotionPolicy, ReduceSpec, SequenceSpec, TaskMode, TeacherReviewSpec, + WorkflowNode, WorkflowSpec, +}; + +pub type StarlarkWorkflowResult = std::result::Result; + +#[derive(Debug, Error)] +pub enum StarlarkWorkflowError { + #[error("workflow source contains unsupported construct `{construct}`")] + UnsupportedConstruct { construct: &'static str }, + #[error("workflow did not call workflow(...)")] + MissingWorkflow, + #[error("invalid workflow node: {0}")] + InvalidNode(String), + #[error("invalid {field} value `{value}`")] + InvalidEnum { field: &'static str, value: String }, + #[error("starlark error: {0}")] + Starlark(starlark::Error), +} + +pub fn compile_starlark_workflow( + identifier: &str, + source: &str, +) -> StarlarkWorkflowResult { + reject_unsupported_constructs(source)?; + let mut dialect = Dialect::Extended.clone(); + dialect.enable_f_strings = true; + let ast = AstModule::parse(identifier, source.to_string(), &dialect) + .map_err(StarlarkWorkflowError::Starlark)?; + let builder = RefCell::new(WorkflowBuilder::default()); + let globals = GlobalsBuilder::standard().with(workflow_builtins).build(); + let module = Module::new(); + { + let mut eval = Evaluator::new(&module); + eval.extra = Some(&builder); + eval.eval_module(ast, &globals) + .map_err(StarlarkWorkflowError::Starlark)?; + } + builder + .into_inner() + .workflow + .ok_or(StarlarkWorkflowError::MissingWorkflow) +} + +pub fn compile_starlark_workflow_with_repair( + identifier: &str, + source: &str, +) -> StarlarkWorkflowResult { + match compile_starlark_workflow(identifier, source) { + Ok(workflow) => Ok(workflow), + Err(first_err) => { + let repaired = repair_starlark_workflow_once(source); + if repaired == source { + Err(first_err) + } else { + compile_starlark_workflow(identifier, &repaired) + } + } + } +} + +pub fn repair_starlark_workflow_once(source: &str) -> String { + source + .replace("ctx.parallel(", "branch(") + .replace("ctx.sequence(", "sequence(") + .replace("ctx.loop_until(", "loop_until(") + .replace("ctx.when(", "when(") + .replace("ctx.expand(", "expand(") + .replace("ctx.tournament(", "tournament(") + .replace("ctx.teacher.review(", "teacher_review(") +} + +fn reject_unsupported_constructs(source: &str) -> StarlarkWorkflowResult<()> { + for (needle, construct) in [ + ("load(", "load"), + ("import ", "import"), + ("class ", "class"), + ("while ", "while"), + ("async ", "async"), + ("await ", "await"), + ("open(", "open"), + ] { + if source.contains(needle) { + return Err(StarlarkWorkflowError::UnsupportedConstruct { construct }); + } + } + Ok(()) +} + +#[derive(Debug, Default, ProvidesStaticType)] +struct WorkflowBuilder { + workflow: Option, +} + +fn workflow_builder<'v, 'a>(eval: &Evaluator<'v, 'a, '_>) -> RefMut<'a, WorkflowBuilder> { + #[expect(clippy::expect_used)] + eval.extra + .as_ref() + .expect("workflow_builder requires Evaluator.extra to be populated") + .downcast_ref::>() + .expect("Evaluator.extra must contain a WorkflowBuilder") + .borrow_mut() +} + +fn encode_node(node: WorkflowNode) -> anyhow::Result { + serde_json::to_string(&node).map_err(Into::into) +} + +fn decode_node(value: Value<'_>) -> anyhow::Result { + let raw = value.unpack_str().ok_or_else(|| { + StarlarkWorkflowError::InvalidNode(format!( + "expected node token string, got {}", + value.get_type() + )) + })?; + serde_json::from_str(raw).map_err(|err: serde_json::Error| { + StarlarkWorkflowError::InvalidNode(err.to_string()).into() + }) +} + +fn decode_nodes(values: UnpackList>) -> anyhow::Result> { + values.items.into_iter().map(decode_node).collect() +} + +fn decode_strings(values: Option>>) -> anyhow::Result> { + let Some(values) = values else { + return Ok(Vec::new()); + }; + values + .items + .into_iter() + .map(|value| { + value + .unpack_str() + .map(str::to_string) + .ok_or_else(|| anyhow::anyhow!("expected string, got {}", value.get_type())) + }) + .collect() +} + +fn agent_type(raw: Option<&str>) -> anyhow::Result { + match raw.unwrap_or("general") { + "general" => Ok(AgentType::General), + "explore" | "explorer" => Ok(AgentType::Explore), + "plan" => Ok(AgentType::Plan), + "review" => Ok(AgentType::Review), + "implementer" | "implement" => Ok(AgentType::Implementer), + "verifier" | "verify" => Ok(AgentType::Verifier), + "tool_agent" | "tool" => Ok(AgentType::ToolAgent), + value => Err(StarlarkWorkflowError::InvalidEnum { + field: "agent_type", + value: value.to_string(), + } + .into()), + } +} + +fn task_mode(raw: Option<&str>) -> anyhow::Result { + match raw.unwrap_or("read_only") { + "read_only" => Ok(TaskMode::ReadOnly), + "read_write" => Ok(TaskMode::ReadWrite), + value => Err(StarlarkWorkflowError::InvalidEnum { + field: "mode", + value: value.to_string(), + } + .into()), + } +} + +fn isolation_mode(raw: Option<&str>) -> anyhow::Result { + match raw.unwrap_or("shared") { + "shared" => Ok(IsolationMode::Shared), + "worktree" => Ok(IsolationMode::Worktree), + value => Err(StarlarkWorkflowError::InvalidEnum { + field: "isolation", + value: value.to_string(), + } + .into()), + } +} + +fn leaf_spec( + id: &str, + prompt: &str, + agent_type: Option<&str>, + mode: Option<&str>, + isolation: Option<&str>, + file_scope: Option>>, + depends_on_results: Option>>, +) -> anyhow::Result { + Ok(LeafSpec { + id: id.to_string(), + prompt: prompt.to_string(), + agent_type: self::agent_type(agent_type)?, + mode: task_mode(mode)?, + isolation: isolation_mode(isolation)?, + file_scope: decode_strings(file_scope)?, + depends_on_results: decode_strings(depends_on_results)?, + budget: BudgetSpec::default(), + permissions: PermissionSpec::default(), + model_policy: ModelPolicy::default(), + }) +} + +#[starlark_module] +fn workflow_builtins(builder: &mut GlobalsBuilder) { + fn workflow<'v>( + goal: &'v str, + nodes: UnpackList>, + id: Option<&'v str>, + description: Option<&'v str>, + eval: &mut Evaluator<'v, '_, '_>, + ) -> anyhow::Result { + let spec = WorkflowSpec { + id: id.map(str::to_string), + goal: goal.to_string(), + description: description.map(str::to_string), + budget: BudgetSpec::default(), + permissions: PermissionSpec::default(), + model_policy: ModelPolicy::default(), + promotion_policy: PromotionPolicy::default(), + nodes: decode_nodes(nodes)?, + }; + workflow_builder(eval).workflow = Some(spec); + Ok(NoneType) + } + + fn agent<'v>( + id: &'v str, + prompt: &'v str, + agent_type: Option<&'v str>, + mode: Option<&'v str>, + isolation: Option<&'v str>, + file_scope: Option>>, + depends_on_results: Option>>, + ) -> anyhow::Result { + encode_node(WorkflowNode::Leaf(leaf_spec( + id, + prompt, + agent_type, + mode, + isolation, + file_scope, + depends_on_results, + )?)) + } + + fn test<'v>( + id: &'v str, + command: &'v str, + file_scope: Option>>, + ) -> anyhow::Result { + encode_node(WorkflowNode::Leaf(leaf_spec( + id, + &format!("Run test command: {command}"), + Some("verifier"), + Some("read_only"), + Some("shared"), + file_scope, + None, + )?)) + } + + fn search<'v>( + id: &'v str, + query: &'v str, + file_scope: Option>>, + ) -> anyhow::Result { + encode_node(WorkflowNode::Leaf(leaf_spec( + id, + &format!("Search codebase: {query}"), + Some("tool"), + Some("read_only"), + Some("shared"), + file_scope, + None, + )?)) + } + + fn shell<'v>( + id: &'v str, + command: &'v str, + file_scope: Option>>, + ) -> anyhow::Result { + encode_node(WorkflowNode::Leaf(leaf_spec( + id, + &format!("Run shell command: {command}"), + Some("tool"), + Some("read_only"), + Some("shared"), + file_scope, + None, + )?)) + } + + fn branch<'v>( + id: &'v str, + children: UnpackList>, + parallel: Option, + ) -> anyhow::Result { + encode_node(WorkflowNode::BranchSet(BranchSpec { + id: id.to_string(), + description: None, + parallel: parallel.unwrap_or(true), + budget: BudgetSpec::default(), + permissions: PermissionSpec::default(), + model_policy: ModelPolicy::default(), + children: decode_nodes(children)?, + })) + } + + fn sequence<'v>(id: &'v str, children: UnpackList>) -> anyhow::Result { + encode_node(WorkflowNode::Sequence(SequenceSpec { + id: id.to_string(), + children: decode_nodes(children)?, + })) + } + + fn reduce<'v>( + id: &'v str, + prompt: &'v str, + inputs: Option>>, + ) -> anyhow::Result { + encode_node(WorkflowNode::Reduce(ReduceSpec { + id: id.to_string(), + inputs: decode_strings(inputs)?, + prompt: prompt.to_string(), + model_policy: ModelPolicy::default(), + })) + } + + fn teacher_review<'v>( + id: &'v str, + candidates: Option>>, + ) -> anyhow::Result { + encode_node(WorkflowNode::TeacherReview(TeacherReviewSpec { + id: id.to_string(), + candidates: decode_strings(candidates)?, + promotion_policy: PromotionPolicy::default(), + })) + } + + fn tournament<'v>( + id: &'v str, + candidates: Option>>, + ) -> anyhow::Result { + encode_node(WorkflowNode::TeacherReview(TeacherReviewSpec { + id: id.to_string(), + candidates: decode_strings(candidates)?, + promotion_policy: PromotionPolicy::default(), + })) + } + + fn loop_until<'v>( + id: &'v str, + condition: &'v str, + children: UnpackList>, + max_iterations: Option, + ) -> anyhow::Result { + encode_node(WorkflowNode::LoopUntil(crate::LoopUntilSpec { + id: id.to_string(), + condition: condition.to_string(), + max_iterations, + children: decode_nodes(children)?, + })) + } + + fn r#when<'v>( + id: &'v str, + condition: &'v str, + then_nodes: UnpackList>, + else_nodes: Option>>, + ) -> anyhow::Result { + encode_node(WorkflowNode::Cond(CondSpec { + id: id.to_string(), + condition: condition.to_string(), + then_nodes: decode_nodes(then_nodes)?, + else_nodes: else_nodes + .map(decode_nodes) + .transpose()? + .unwrap_or_default(), + })) + } + + fn expand<'v>( + id: &'v str, + source: &'v str, + max_children: Option, + ) -> anyhow::Result { + encode_node(WorkflowNode::Expand(ExpandSpec { + id: id.to_string(), + source: source.to_string(), + max_children, + template: None, + })) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{AgentType, ControlNodeKind}; + + #[test] + fn starlark_compiles_to_ir() { + let source = include_str!("../../../workflows/rlm_cache_change.star"); + let workflow = compile_starlark_workflow("rlm_cache_change.star", source) + .expect("example should compile"); + + assert_eq!(workflow.id.as_deref(), Some("rlm-cache-change")); + assert_eq!(workflow.nodes.len(), 2); + let WorkflowNode::BranchSet(branch) = &workflow.nodes[0] else { + panic!("first node should be a branch set"); + }; + assert_eq!(branch.id, "discover"); + assert!(branch.parallel); + let WorkflowNode::Leaf(leaf) = &branch.children[0] else { + panic!("first branch child should be a leaf"); + }; + assert_eq!(leaf.agent_type, AgentType::ToolAgent); + } + + #[test] + fn starlark_repair_loop() { + let source = r#" +workflow( + id = "repair-demo", + goal = "repair ctx aliases", + nodes = [ + ctx.parallel(id = "discover", children = [ + agent(id = "scan", prompt = "scan repo"), + ]), + ], +) +"#; + + let workflow = compile_starlark_workflow_with_repair("repair.star", source) + .expect("repair should rewrite ctx.parallel"); + + assert_eq!(workflow.id.as_deref(), Some("repair-demo")); + assert!(matches!(workflow.nodes[0], WorkflowNode::BranchSet(_))); + } + + #[test] + fn invalid_workflow_rejected() { + let source = r#" +load("@stdlib//fs.star", "open") +workflow(goal = "bad", nodes = []) +"#; + + let err = compile_starlark_workflow("invalid.star", source) + .expect_err("imports should be rejected before eval"); + + assert!(matches!( + err, + StarlarkWorkflowError::UnsupportedConstruct { construct: "load" } + )); + } + + #[test] + fn issue_fix_tournament_example_compiles() { + let source = include_str!("../../../workflows/issue_fix_tournament.star"); + let workflow = compile_starlark_workflow("issue_fix_tournament.star", source) + .expect("example should compile"); + + let WorkflowNode::Sequence(sequence) = &workflow.nodes[1] else { + panic!("second node should be a sequence"); + }; + assert!(sequence.children.iter().any(|node| matches!( + node, + WorkflowNode::TeacherReview(review) + if review.id == "select-fix" + ))); + assert_eq!( + ControlNodeKind::TeacherReview, + ControlNodeKind::TeacherReview + ); + } +} diff --git a/workflows/issue_fix_tournament.star b/workflows/issue_fix_tournament.star new file mode 100644 index 00000000..75f4ec3a --- /dev/null +++ b/workflows/issue_fix_tournament.star @@ -0,0 +1,42 @@ +workflow( + id = "issue-fix-tournament", + goal = "Compare narrow fixes for one issue before promotion", + nodes = [ + branch( + id = "candidate-fixes", + parallel = True, + children = [ + agent( + id = "minimal-fix", + prompt = "Produce the smallest fix and list verification evidence.", + agent_type = "implementer", + mode = "read_write", + isolation = "worktree", + file_scope = ["crates/**"], + ), + agent( + id = "defensive-fix", + prompt = "Produce a more defensive fix and list regression risks.", + agent_type = "implementer", + mode = "read_write", + isolation = "worktree", + file_scope = ["crates/**"], + ), + ], + ), + sequence( + id = "review", + children = [ + tournament( + id = "select-fix", + candidates = ["minimal-fix", "defensive-fix"], + ), + test( + id = "verify-selected", + command = "cargo test --workspace --locked", + file_scope = ["crates/**"], + ), + ], + ), + ], +) diff --git a/workflows/rlm_cache_change.star b/workflows/rlm_cache_change.star new file mode 100644 index 00000000..c68a86c9 --- /dev/null +++ b/workflows/rlm_cache_change.star @@ -0,0 +1,42 @@ +workflow( + id = "rlm-cache-change", + goal = "Evaluate an RLM/cache routing change with safe mock WhaleFlow IR", + nodes = [ + branch( + id = "discover", + parallel = True, + children = [ + search( + id = "find-cache-surfaces", + query = "Find RLM and cache routing surfaces", + file_scope = ["crates/tui/src/rlm/**", "crates/tui/src/core/**"], + ), + agent( + id = "inspect-provider-cache", + prompt = "Inspect provider cache behavior without editing files.", + agent_type = "explore", + file_scope = ["crates/tui/src/providers/**"], + ), + ], + ), + sequence( + id = "verify-and-summarize", + children = [ + test( + id = "run-rlm-tests", + command = "cargo test -p codewhale-tui rlm --locked", + file_scope = ["crates/tui/src/rlm/**"], + ), + reduce( + id = "summarize-cache-change", + inputs = [ + "find-cache-surfaces", + "inspect-provider-cache", + "run-rlm-tests", + ], + prompt = "Summarize the smallest safe cache-routing patch.", + ), + ], + ), + ], +)