From 14d14f56deeec478e1ff35c174503b54d3a481ea Mon Sep 17 00:00:00 2001
From: Hunter Bown <hmbown@gmail.com>
Date: Fri, 5 Jun 2026 23:08:10 -0700
Subject: [PATCH] feat(whaleflow): add teacher candidate artifacts (#2839)

---
 CHANGELOG.md                |   6 +-
 crates/tui/CHANGELOG.md     |   6 +-
 crates/whaleflow/src/lib.rs | 297 ++++++++++++++++++++++++++++++++++++
 3 files changed, 307 insertions(+), 2 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b73d00dd..241e530c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -78,7 +78,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   Starlark and typed-IR gates now also reject unknown leaf dependencies,
   reducer inputs, and teacher-review candidates before mock execution or replay,
   keeping generated workflows fail-closed while runtime/worktree semantics stay
-  deferred.
+  deferred. TeacherReview now has serializable GEPA-style candidate artifacts
+  for notes, workflow recipes, skills, regression tests, cache policy, branch
+  heuristics, and Starlark authoring prompt patches, plus an offline helper
+  that proposes candidates from recorded execution traces without promoting
+  them or training model weights (#2674).
   Thanks @AdityaVG13 for the WhaleFlow draft and cost-tracking direction.
 - Added a state-store v2 schema migration for WhaleFlow trace tables covering
   workflow, branch, leaf, control-node, and teacher-candidate runs. The
diff --git a/crates/tui/CHANGELOG.md b/crates/tui/CHANGELOG.md
index b73d00dd..241e530c 100644
--- a/crates/tui/CHANGELOG.md
+++ b/crates/tui/CHANGELOG.md
@@ -78,7 +78,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   Starlark and typed-IR gates now also reject unknown leaf dependencies,
   reducer inputs, and teacher-review candidates before mock execution or replay,
   keeping generated workflows fail-closed while runtime/worktree semantics stay
-  deferred.
+  deferred. TeacherReview now has serializable GEPA-style candidate artifacts
+  for notes, workflow recipes, skills, regression tests, cache policy, branch
+  heuristics, and Starlark authoring prompt patches, plus an offline helper
+  that proposes candidates from recorded execution traces without promoting
+  them or training model weights (#2674).
   Thanks @AdityaVG13 for the WhaleFlow draft and cost-tracking direction.
 - Added a state-store v2 schema migration for WhaleFlow trace tables covering
   workflow, branch, leaf, control-node, and teacher-candidate runs. The
diff --git a/crates/whaleflow/src/lib.rs b/crates/whaleflow/src/lib.rs
index 68ef4ac2..7540afdc 100644
--- a/crates/whaleflow/src/lib.rs
+++ b/crates/whaleflow/src/lib.rs
@@ -911,6 +911,202 @@ pub struct BranchCandidate {
     pub diversity_key: Option<String>,
 }
 
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum TeacherCandidateKind {
+    Note,
+    WorkflowRecipe,
+    SkillPatch,
+    RegressionTest,
+    CachePolicyPatch,
+    BranchHeuristic,
+    StarlarkAuthoringPromptPatch,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
+#[serde(rename_all = "snake_case")]
+pub enum TeacherCandidateStatus {
+    #[default]
+    Proposed,
+    Accepted,
+    Rejected,
+    Promoted,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub struct TeacherCandidate {
+    pub candidate_id: String,
+    pub kind: TeacherCandidateKind,
+    #[serde(default)]
+    pub status: TeacherCandidateStatus,
+    pub source_node_id: String,
+    #[serde(default)]
+    pub source_branch_id: Option<String>,
+    pub summary: String,
+    #[serde(default)]
+    pub evidence: Vec<String>,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
+pub struct TeacherReviewReport {
+    pub review_node_id: String,
+    #[serde(default)]
+    pub candidates: Vec<TeacherCandidate>,
+}
+
+impl TeacherReviewReport {
+    pub fn from_execution(review: &TeacherReviewSpec, execution: &WorkflowExecution) -> Self {
+        let candidates = teacher_candidates_from_execution(review, execution);
+        Self {
+            review_node_id: review.id.clone(),
+            candidates,
+        }
+    }
+}
+
+pub fn teacher_candidates_from_execution(
+    review: &TeacherReviewSpec,
+    execution: &WorkflowExecution,
+) -> Vec<TeacherCandidate> {
+    let mut candidates = Vec::new();
+    for source in &review.candidates {
+        if let Some(branch) = execution
+            .branch_results
+            .iter()
+            .find(|branch| branch.branch_id == *source || branch.task_id == *source)
+        {
+            candidates.push(teacher_candidate_from_branch(review, branch));
+            continue;
+        }
+        if let Some(leaf) = execution
+            .leaf_results
+            .iter()
+            .find(|leaf| leaf.leaf_id == *source || leaf.task_id == *source)
+        {
+            candidates.push(teacher_candidate_from_leaf(review, leaf));
+            continue;
+        }
+        if let Some(control) = execution
+            .control_node_results
+            .iter()
+            .find(|control| control.node_id == *source)
+        {
+            candidates.push(teacher_candidate_from_control(review, control));
+        }
+    }
+    candidates
+}
+
+fn teacher_candidate_from_branch(
+    review: &TeacherReviewSpec,
+    branch: &BranchResult,
+) -> TeacherCandidate {
+    let kind =
+        if branch.memo_usage.armh_hits > 0 || branch.memo_usage.provider_prompt_cache_hits > 0 {
+            TeacherCandidateKind::CachePolicyPatch
+        } else if branch.status == WorkflowRunStatus::Succeeded {
+            TeacherCandidateKind::WorkflowRecipe
+        } else {
+            TeacherCandidateKind::BranchHeuristic
+        };
+    let mut evidence = vec![format!("status={:?}", branch.status)];
+    if branch.usage.total_tokens() > 0 || branch.usage.cost_microusd > 0 {
+        evidence.push(format!(
+            "tokens={}, cost_microusd={}",
+            branch.usage.total_tokens(),
+            branch.usage.cost_microusd
+        ));
+    }
+    if branch.memo_usage.armh_hits > 0 || branch.memo_usage.provider_prompt_cache_hits > 0 {
+        evidence.push(format!(
+            "armh_hits={}, provider_prompt_cache_hits={}",
+            branch.memo_usage.armh_hits, branch.memo_usage.provider_prompt_cache_hits
+        ));
+    }
+    if let Some(notes) = branch.notes.as_deref() {
+        evidence.push(format!("notes={notes}"));
+    }
+    TeacherCandidate {
+        candidate_id: format!("{}:{}", review.id, branch.branch_id),
+        kind,
+        status: TeacherCandidateStatus::Proposed,
+        source_node_id: branch.task_id.clone(),
+        source_branch_id: Some(branch.branch_id.clone()),
+        summary: format!(
+            "TeacherReview candidate from branch `{}` with {:?} status.",
+            branch.branch_id, branch.status
+        ),
+        evidence,
+    }
+}
+
+fn teacher_candidate_from_leaf(review: &TeacherReviewSpec, leaf: &LeafResult) -> TeacherCandidate {
+    let kind = if leaf.status == WorkflowRunStatus::Failed {
+        TeacherCandidateKind::RegressionTest
+    } else if leaf.memo_usage.armh_hits > 0 || leaf.memo_usage.provider_prompt_cache_hits > 0 {
+        TeacherCandidateKind::CachePolicyPatch
+    } else {
+        TeacherCandidateKind::Note
+    };
+    let mut evidence = vec![format!("status={:?}", leaf.status)];
+    if let Some(output) = leaf.output.as_deref() {
+        evidence.push(format!("output={}", truncate_evidence(output)));
+    }
+    TeacherCandidate {
+        candidate_id: format!("{}:{}", review.id, leaf.leaf_id),
+        kind,
+        status: TeacherCandidateStatus::Proposed,
+        source_node_id: leaf.leaf_id.clone(),
+        source_branch_id: None,
+        summary: format!(
+            "TeacherReview candidate from leaf `{}` with {:?} status.",
+            leaf.leaf_id, leaf.status
+        ),
+        evidence,
+    }
+}
+
+fn teacher_candidate_from_control(
+    review: &TeacherReviewSpec,
+    control: &ControlNodeResult,
+) -> TeacherCandidate {
+    let mut evidence = vec![format!("status={:?}", control.status)];
+    if !control.selected_children.is_empty() {
+        evidence.push(format!(
+            "selected_children={}",
+            control.selected_children.join(",")
+        ));
+    }
+    if let Some(summary) = control.summary.as_deref() {
+        evidence.push(format!("summary={}", truncate_evidence(summary)));
+    }
+    TeacherCandidate {
+        candidate_id: format!("{}:{}", review.id, control.node_id),
+        kind: TeacherCandidateKind::StarlarkAuthoringPromptPatch,
+        status: TeacherCandidateStatus::Proposed,
+        source_node_id: control.node_id.clone(),
+        source_branch_id: None,
+        summary: format!(
+            "TeacherReview candidate from control node `{}` ({:?}).",
+            control.node_id, control.kind
+        ),
+        evidence,
+    }
+}
+
+fn truncate_evidence(value: &str) -> String {
+    const MAX_EVIDENCE_CHARS: usize = 240;
+    if value.chars().count() <= MAX_EVIDENCE_CHARS {
+        return value.to_string();
+    }
+    let mut truncated = value
+        .chars()
+        .take(MAX_EVIDENCE_CHARS.saturating_sub(1))
+        .collect::<String>();
+    truncated.push_str("...");
+    truncated
+}
+
 #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
 pub struct BranchTournament {
     #[serde(default)]
@@ -2227,6 +2423,107 @@ mod tests {
         );
     }
 
+    #[test]
+    fn teacher_candidate_serialization() {
+        let candidate = TeacherCandidate {
+            candidate_id: "teacher-review:branch-a".to_string(),
+            kind: TeacherCandidateKind::WorkflowRecipe,
+            status: TeacherCandidateStatus::Proposed,
+            source_node_id: "branch-a".to_string(),
+            source_branch_id: Some("branch-a".to_string()),
+            summary: "Winning branch found a reusable workflow recipe.".to_string(),
+            evidence: vec![
+                "status=Succeeded".to_string(),
+                "tokens=42, cost_microusd=7".to_string(),
+            ],
+        };
+
+        let json = serde_json::to_string(&candidate).expect("serialize teacher candidate");
+
+        assert!(json.contains("\"kind\":\"workflow_recipe\""));
+        assert!(json.contains("\"status\":\"proposed\""));
+        let parsed: TeacherCandidate =
+            serde_json::from_str(&json).expect("parse teacher candidate");
+        assert_eq!(parsed, candidate);
+    }
+
+    #[test]
+    fn teacher_review_produces_candidate_from_trace() {
+        let review = TeacherReviewSpec {
+            id: "teacher-review".to_string(),
+            candidates: vec!["winning-branch".to_string()],
+            promotion_policy: PromotionPolicy::default(),
+        };
+        let execution = WorkflowExecution {
+            branch_results: vec![BranchResult {
+                branch_id: "winning-branch".to_string(),
+                task_id: "winning-branch".to_string(),
+                status: WorkflowRunStatus::Succeeded,
+                usage: WorkflowUsage {
+                    input_tokens: 30,
+                    output_tokens: 12,
+                    cost_microusd: 7,
+                },
+                memo_usage: WorkflowMemoUsage::default(),
+                artifacts: vec!["trace://branches/winning-branch".to_string()],
+                notes: Some("branch produced a minimal verified patch".to_string()),
+            }],
+            ..WorkflowExecution::default()
+        };
+
+        let report = TeacherReviewReport::from_execution(&review, &execution);
+
+        assert_eq!(report.review_node_id, "teacher-review");
+        assert_eq!(report.candidates.len(), 1);
+        assert_eq!(
+            report.candidates[0].kind,
+            TeacherCandidateKind::WorkflowRecipe
+        );
+        assert_eq!(
+            report.candidates[0].status,
+            TeacherCandidateStatus::Proposed
+        );
+        assert!(
+            report.candidates[0]
+                .evidence
+                .iter()
+                .any(|line| line.contains("tokens=42"))
+        );
+    }
+
+    #[test]
+    fn failed_leaf_becomes_regression_test_candidate() {
+        let review = TeacherReviewSpec {
+            id: "teacher-review".to_string(),
+            candidates: vec!["verify-failure".to_string()],
+            promotion_policy: PromotionPolicy::default(),
+        };
+        let execution = WorkflowExecution {
+            leaf_results: vec![LeafResult {
+                leaf_id: "verify-failure".to_string(),
+                task_id: "verify-failure".to_string(),
+                status: WorkflowRunStatus::Failed,
+                usage: WorkflowUsage::default(),
+                memo_usage: WorkflowMemoUsage::default(),
+                output: Some("cargo test failed with a replay mismatch".to_string()),
+                artifacts: Vec::new(),
+            }],
+            ..WorkflowExecution::default()
+        };
+
+        let candidates = teacher_candidates_from_execution(&review, &execution);
+
+        assert_eq!(candidates.len(), 1);
+        assert_eq!(candidates[0].kind, TeacherCandidateKind::RegressionTest);
+        assert_eq!(candidates[0].status, TeacherCandidateStatus::Proposed);
+        assert!(
+            candidates[0]
+                .evidence
+                .iter()
+                .any(|line| { line.contains("cargo test failed with a replay mismatch") })
+        );
+    }
+
     #[test]
     fn tournament_selects_passing_minimal_branch() {
         let tournament = BranchTournament { min_score: 60 };