feat(subagents): add agent run receipts

This commit is contained in:
Hunter B
2026-06-12 22:45:53 -07:00
parent 07670871d0
commit c0ba6ce5ad
6 changed files with 686 additions and 4 deletions
+186
View File
@@ -55,6 +55,7 @@ use crate::skill_state::SkillStateStore;
use crate::task_manager::{
NewTaskRequest, SharedTaskManager, TaskManager, TaskManagerConfig, TaskRecord, TaskSummary,
};
use crate::tools::subagent::{AgentWorkerRecord, load_persisted_agent_worker_records};
use codewhale_protocol::fleet::{
FleetArtifactKind, FleetRun, FleetRunId, FleetWorkerEventPayload, FleetWorkerStatus,
};
@@ -322,6 +323,11 @@ struct SkillsResponse {
skills: Vec<SkillEntry>,
}
#[derive(Debug, Serialize)]
struct AgentRunsResponse {
runs: Vec<AgentWorkerRecord>,
}
#[derive(Debug, Deserialize)]
struct SetSkillEnabledRequest {
enabled: bool,
@@ -590,6 +596,8 @@ pub fn build_router(state: RuntimeApiState) -> Router {
post(resume_session_thread),
)
.route("/v1/workspace/status", get(workspace_status))
.route("/v1/agent-runs", get(list_agent_runs))
.route("/v1/agent-runs/{run_id}", get(get_agent_run))
.route("/v1/fleet/runs", get(list_fleet_runs))
.route("/v1/fleet/runs/{run_id}", get(get_fleet_run))
.route(
@@ -1344,6 +1352,36 @@ async fn workspace_status(
Ok(Json(collect_workspace_status(&state.workspace)))
}
async fn list_agent_runs(
State(state): State<RuntimeApiState>,
) -> Result<Json<AgentRunsResponse>, ApiError> {
let runs = load_persisted_agent_worker_records(&state.workspace).map_err(|err| {
ApiError::internal(format!("Failed to load persisted agent run records: {err}"))
})?;
Ok(Json(AgentRunsResponse { runs }))
}
async fn get_agent_run(
State(state): State<RuntimeApiState>,
Path(run_id): Path<String>,
) -> Result<Json<AgentWorkerRecord>, ApiError> {
let runs = load_persisted_agent_worker_records(&state.workspace).map_err(|err| {
ApiError::internal(format!("Failed to load persisted agent run records: {err}"))
})?;
let run = runs
.into_iter()
.find(|record| {
let effective_run_id = if record.spec.run_id.is_empty() {
record.spec.worker_id.as_str()
} else {
record.spec.run_id.as_str()
};
effective_run_id == run_id || record.spec.worker_id == run_id
})
.ok_or_else(|| ApiError::not_found(format!("agent run '{run_id}' not found")))?;
Ok(Json(run))
}
async fn list_fleet_runs(State(state): State<RuntimeApiState>) -> Result<Json<Value>, ApiError> {
let manager = open_fleet_manager(&state)?;
let ledger_state = manager
@@ -4030,6 +4068,154 @@ mod tests {
Ok(())
}
#[tokio::test]
async fn agent_runs_runtime_api_exposes_persisted_worker_receipts() -> Result<()> {
use crate::tools::subagent::{
AgentRunArtifactRef, AgentRunFollowUpTarget, AgentRunTakeoverTarget, AgentRunUsage,
AgentRunVerificationSummary, AgentWorkerEvent, AgentWorkerRecord, AgentWorkerSpec,
AgentWorkerStatus, AgentWorkerToolProfile, SubAgentType,
};
use std::collections::VecDeque;
let root =
std::env::temp_dir().join(format!("codewhale-agent-runs-api-{}", Uuid::new_v4()));
let workspace = root.join("workspace");
fs::create_dir_all(workspace.join(".codewhale/state"))?;
let record = AgentWorkerRecord {
spec: AgentWorkerSpec {
worker_id: "agent_receipt".to_string(),
run_id: "run_receipt".to_string(),
parent_run_id: Some("parent_run".to_string()),
session_name: Some("receipt_lane".to_string()),
objective: "Verify run receipt projection".to_string(),
role: Some("verifier".to_string()),
agent_type: SubAgentType::Verifier,
model: "deepseek-v4-flash".to_string(),
workspace: workspace.clone(),
git_branch: Some("codex/v0.8.60".to_string()),
context_mode: "fresh".to_string(),
fork_context: false,
tool_profile: AgentWorkerToolProfile::Explicit(vec!["read_file".to_string()]),
max_steps: 4,
spawn_depth: 1,
max_spawn_depth: crate::tools::subagent::DEFAULT_MAX_SPAWN_DEPTH,
},
actor_kind: "subagent".to_string(),
parent_run_id: Some("parent_run".to_string()),
follow_up: AgentRunFollowUpTarget {
tool: "agent_eval".to_string(),
agent_id: "agent_receipt".to_string(),
session_name: Some("receipt_lane".to_string()),
accepted_statuses: vec![
"running".to_string(),
"interrupted_continuable".to_string(),
],
latest_delivery: None,
},
takeover: AgentRunTakeoverTarget {
kind: "local_subagent_session".to_string(),
supported: true,
agent_id: "agent_receipt".to_string(),
session_name: Some("receipt_lane".to_string()),
instructions: "Use agent_eval with agent_id 'agent_receipt'.".to_string(),
unsupported_reason: None,
},
artifacts: vec![AgentRunArtifactRef {
kind: "transcript".to_string(),
name: "transcript_handle".to_string(),
target: "agent:agent_receipt".to_string(),
description: "Read with handle_read from a live projection.".to_string(),
}],
usage: AgentRunUsage {
status: "unknown".to_string(),
total_tokens: None,
note: "not reported".to_string(),
},
verification: AgentRunVerificationSummary {
status: "self_report_only".to_string(),
summary: "no verified receipt attached".to_string(),
},
status: AgentWorkerStatus::Completed,
created_at_ms: 1,
updated_at_ms: 2,
started_at_ms: Some(1),
completed_at_ms: Some(2),
latest_message: Some("completed".to_string()),
result_summary: Some("receipt complete".to_string()),
error: None,
steps_taken: 2,
events: VecDeque::from([AgentWorkerEvent {
seq: 1,
worker_id: "agent_receipt".to_string(),
status: AgentWorkerStatus::Completed,
timestamp_ms: 2,
message: Some("completed".to_string()),
step: Some(2),
tool_name: None,
}]),
};
let state_payload = json!({
"schema_version": 1,
"agents": [],
"workers": [record],
});
fs::write(
workspace.join(".codewhale/state/subagents.v1.json"),
serde_json::to_vec_pretty(&state_payload)?,
)?;
let sessions_dir = root.join("sessions");
let Some((addr, _runtime_threads, handle)) =
spawn_test_server_with_root_token_mobile_workspace(
root.clone(),
sessions_dir,
None,
false,
workspace,
)
.await?
else {
return Ok(());
};
let client = crate::tls::reqwest_client();
let runs: serde_json::Value = client
.get(format!("http://{addr}/v1/agent-runs"))
.send()
.await?
.error_for_status()?
.json()
.await?;
assert_eq!(runs["runs"][0]["spec"]["run_id"], "run_receipt");
assert_eq!(runs["runs"][0]["follow_up"]["tool"], "agent_eval");
assert_eq!(
runs["runs"][0]["verification"]["status"],
"self_report_only"
);
let run: serde_json::Value = client
.get(format!("http://{addr}/v1/agent-runs/run_receipt"))
.send()
.await?
.error_for_status()?
.json()
.await?;
assert_eq!(run["spec"]["worker_id"], "agent_receipt");
assert_eq!(run["takeover"]["supported"], true);
assert_eq!(run["artifacts"][0]["kind"], "transcript");
let missing = client
.get(format!("http://{addr}/v1/agent-runs/missing"))
.send()
.await?
.status();
assert_eq!(missing, StatusCode::NOT_FOUND);
handle.abort();
Ok(())
}
#[tokio::test]
async fn stream_requires_prompt() -> Result<()> {
let Some((addr, _runtime_threads, handle)) = spawn_test_server().await? else {
+391 -4
View File
@@ -637,6 +637,7 @@ pub enum AgentWorkerStatus {
Queued,
Starting,
Running,
WaitingForUser,
ModelWait,
RunningTool,
Completed,
@@ -659,6 +660,12 @@ pub enum AgentWorkerToolProfile {
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct AgentWorkerSpec {
pub worker_id: String,
#[serde(default, skip_serializing_if = "String::is_empty")]
pub run_id: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub parent_run_id: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub session_name: Option<String>,
pub objective: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub role: Option<String>,
@@ -675,6 +682,69 @@ pub struct AgentWorkerSpec {
pub max_spawn_depth: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct AgentRunFollowUpDelivery {
pub delivered: bool,
pub timestamp_ms: u64,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub message_preview: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub reason: Option<String>,
#[serde(default, skip_serializing_if = "is_false")]
pub interrupt: bool,
#[serde(default, skip_serializing_if = "is_false")]
pub continued_from_checkpoint: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct AgentRunFollowUpTarget {
#[serde(default = "default_agent_eval_tool")]
pub tool: String,
pub agent_id: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub session_name: Option<String>,
#[serde(default)]
pub accepted_statuses: Vec<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub latest_delivery: Option<AgentRunFollowUpDelivery>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct AgentRunTakeoverTarget {
#[serde(default = "default_subagent_takeover_kind")]
pub kind: String,
#[serde(default)]
pub supported: bool,
pub agent_id: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub session_name: Option<String>,
pub instructions: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub unsupported_reason: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct AgentRunArtifactRef {
pub kind: String,
pub name: String,
pub target: String,
pub description: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct AgentRunUsage {
pub status: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub total_tokens: Option<u64>,
pub note: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct AgentRunVerificationSummary {
pub status: String,
pub summary: String,
}
/// Structured headless worker event.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct AgentWorkerEvent {
@@ -694,6 +764,20 @@ pub struct AgentWorkerEvent {
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct AgentWorkerRecord {
pub spec: AgentWorkerSpec,
#[serde(default = "default_subagent_actor_kind")]
pub actor_kind: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub parent_run_id: Option<String>,
#[serde(default = "default_agent_run_follow_up")]
pub follow_up: AgentRunFollowUpTarget,
#[serde(default = "default_agent_run_takeover")]
pub takeover: AgentRunTakeoverTarget,
#[serde(default)]
pub artifacts: Vec<AgentRunArtifactRef>,
#[serde(default = "default_agent_run_usage")]
pub usage: AgentRunUsage,
#[serde(default = "default_agent_run_verification")]
pub verification: AgentRunVerificationSummary,
pub status: AgentWorkerStatus,
pub created_at_ms: u64,
pub updated_at_ms: u64,
@@ -715,8 +799,19 @@ pub struct AgentWorkerRecord {
impl AgentWorkerRecord {
fn new(spec: AgentWorkerSpec, now_ms: u64) -> Self {
let run_id = agent_worker_run_id(&spec);
let artifacts = default_subagent_artifacts(&run_id);
let follow_up = follow_up_target_for_spec(&spec);
let takeover = takeover_target_for_spec(&spec);
Self {
parent_run_id: spec.parent_run_id.clone(),
spec,
actor_kind: default_subagent_actor_kind(),
follow_up,
takeover,
artifacts,
usage: default_agent_run_usage(),
verification: default_agent_run_verification(),
status: AgentWorkerStatus::Starting,
created_at_ms: now_ms,
updated_at_ms: now_ms,
@@ -731,6 +826,162 @@ impl AgentWorkerRecord {
}
}
fn default_subagent_actor_kind() -> String {
"subagent".to_string()
}
fn default_agent_eval_tool() -> String {
"agent_eval".to_string()
}
fn default_subagent_takeover_kind() -> String {
"local_subagent_session".to_string()
}
fn default_agent_run_follow_up() -> AgentRunFollowUpTarget {
AgentRunFollowUpTarget {
tool: default_agent_eval_tool(),
agent_id: String::new(),
session_name: None,
accepted_statuses: vec!["running".to_string(), "interrupted_continuable".to_string()],
latest_delivery: None,
}
}
fn default_agent_run_takeover() -> AgentRunTakeoverTarget {
AgentRunTakeoverTarget {
kind: default_subagent_takeover_kind(),
supported: false,
agent_id: String::new(),
session_name: None,
instructions: "No takeover target is available for this older record.".to_string(),
unsupported_reason: Some("legacy_record_missing_agent_id".to_string()),
}
}
fn default_agent_run_usage() -> AgentRunUsage {
AgentRunUsage {
status: "unknown".to_string(),
total_tokens: None,
note: "Token usage is not yet reported by the sub-agent worker ledger.".to_string(),
}
}
fn default_agent_run_verification() -> AgentRunVerificationSummary {
AgentRunVerificationSummary {
status: "self_report_only".to_string(),
summary:
"No verified command or test receipt is attached; treat the result summary as a child self-report."
.to_string(),
}
}
fn agent_worker_run_id(spec: &AgentWorkerSpec) -> String {
if spec.run_id.is_empty() {
spec.worker_id.clone()
} else {
spec.run_id.clone()
}
}
fn follow_up_target_for_spec(spec: &AgentWorkerSpec) -> AgentRunFollowUpTarget {
AgentRunFollowUpTarget {
tool: default_agent_eval_tool(),
agent_id: spec.worker_id.clone(),
session_name: spec.session_name.clone(),
accepted_statuses: vec!["running".to_string(), "interrupted_continuable".to_string()],
latest_delivery: None,
}
}
fn takeover_target_for_spec(spec: &AgentWorkerSpec) -> AgentRunTakeoverTarget {
let agent_ref = spec
.session_name
.as_deref()
.filter(|name| !name.is_empty())
.unwrap_or(&spec.worker_id);
AgentRunTakeoverTarget {
kind: default_subagent_takeover_kind(),
supported: true,
agent_id: spec.worker_id.clone(),
session_name: spec.session_name.clone(),
instructions: format!(
"Use agent_eval with agent_id '{agent_ref}' to inspect or send follow-up input; use agent_close to cancel the lane."
),
unsupported_reason: None,
}
}
fn default_subagent_artifacts(run_id: &str) -> Vec<AgentRunArtifactRef> {
vec![
AgentRunArtifactRef {
kind: "worker_events".to_string(),
name: "worker_record.events".to_string(),
target: run_id.to_string(),
description: "Bounded structured lifecycle events retained on the worker record."
.to_string(),
},
AgentRunArtifactRef {
kind: "transcript".to_string(),
name: "transcript_handle".to_string(),
target: format!("agent:{run_id}"),
description:
"Use the projection transcript_handle with handle_read for the child transcript."
.to_string(),
},
AgentRunArtifactRef {
kind: "receipt".to_string(),
name: "result_summary".to_string(),
target: run_id.to_string(),
description: "Child final summary when present; verify before treating as fact."
.to_string(),
},
]
}
fn message_preview(text: &str) -> String {
const MAX_PREVIEW_CHARS: usize = 120;
let mut preview: String = text.chars().take(MAX_PREVIEW_CHARS).collect();
if text.chars().count() > MAX_PREVIEW_CHARS {
preview.push_str("...");
}
preview
}
fn normalize_worker_spec(mut spec: AgentWorkerSpec) -> AgentWorkerSpec {
if spec.run_id.is_empty() {
spec.run_id = spec.worker_id.clone();
}
spec
}
fn normalize_worker_record(mut record: AgentWorkerRecord) -> AgentWorkerRecord {
record.spec = normalize_worker_spec(record.spec);
let run_id = agent_worker_run_id(&record.spec);
if record.actor_kind.is_empty() {
record.actor_kind = default_subagent_actor_kind();
}
if record.parent_run_id.is_none() {
record.parent_run_id = record.spec.parent_run_id.clone();
}
if record.follow_up.agent_id.is_empty() {
record.follow_up = follow_up_target_for_spec(&record.spec);
}
if record.takeover.agent_id.is_empty() {
record.takeover = takeover_target_for_spec(&record.spec);
}
if record.artifacts.is_empty() {
record.artifacts = default_subagent_artifacts(&run_id);
}
if record.usage.status.is_empty() {
record.usage = default_agent_run_usage();
}
if record.verification.status.is_empty() {
record.verification = default_agent_run_verification();
}
record
}
fn is_false(b: &bool) -> bool {
!*b
}
@@ -1502,6 +1753,7 @@ impl SubAgentManager {
self.agents.insert(persisted.id, agent);
}
for worker in state.workers {
let worker = normalize_worker_record(worker);
self.worker_event_seq = self.worker_event_seq.max(
worker
.events
@@ -1545,7 +1797,7 @@ impl SubAgentManager {
fn register_worker(&mut self, spec: AgentWorkerSpec) {
let worker_id = spec.worker_id.clone();
let now_ms = epoch_millis_now();
let mut record = AgentWorkerRecord::new(spec, now_ms);
let mut record = AgentWorkerRecord::new(normalize_worker_spec(spec), now_ms);
self.push_worker_event(
&mut record,
AgentWorkerStatus::Starting,
@@ -1634,7 +1886,7 @@ impl SubAgentManager {
}
fn complete_worker_from_result(&mut self, worker_id: &str, result: &SubAgentResult) {
let status = worker_status_from_subagent_status(&result.status);
let status = worker_status_from_subagent_result(result);
let message = match &result.status {
SubAgentStatus::Completed => Some("completed".to_string()),
SubAgentStatus::Failed(err) => Some(err.clone()),
@@ -1652,6 +1904,33 @@ impl SubAgentManager {
}
}
fn record_follow_up_delivery(
&mut self,
worker_id: &str,
delivered: bool,
message: Option<&str>,
reason: Option<&str>,
interrupt: bool,
continued_from_checkpoint: bool,
) {
let Some(record) = self.worker_records.get_mut(worker_id) else {
return;
};
let now_ms = epoch_millis_now();
record.updated_at_ms = now_ms;
record.follow_up.latest_delivery = Some(AgentRunFollowUpDelivery {
delivered,
timestamp_ms: now_ms,
message_preview: message.map(message_preview),
reason: reason.map(str::to_string),
interrupt,
continued_from_checkpoint,
});
if delivered {
record.latest_message = Some("follow-up delivered via agent_eval".to_string());
}
}
fn fail_worker(&mut self, worker_id: &str, error: String) {
self.record_worker_event(
worker_id,
@@ -1830,6 +2109,9 @@ impl SubAgentManager {
let max_steps = self.max_steps;
let worker_spec = AgentWorkerSpec {
worker_id: agent_id.clone(),
run_id: agent_id.clone(),
parent_run_id: None,
session_name: Some(agent.session_name.clone()),
objective: assignment.objective.clone(),
role: assignment.role.clone(),
agent_type: agent_type.clone(),
@@ -2400,17 +2682,36 @@ impl SubAgentManager {
/// Thread-safe wrapper for `SubAgentManager`.
pub type SharedSubAgentManager = Arc<RwLock<SubAgentManager>>;
pub fn load_persisted_agent_worker_records(workspace: &Path) -> Result<Vec<AgentWorkerRecord>> {
let mut manager = SubAgentManager::new(workspace.to_path_buf(), 1)
.with_state_path(default_state_path(workspace));
manager.load_state()?;
Ok(manager.list_worker_records())
}
/// Model-facing session projection returned by the v0.8.33 sub-agent API.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SubAgentSessionProjection {
pub name: String,
pub agent_id: String,
#[serde(default)]
pub run_id: String,
pub status: String,
pub terminal: bool,
pub context_mode: String,
pub fork_context: bool,
pub prefix_cache: SubAgentPrefixCacheProjection,
pub transcript_handle: VarHandle,
#[serde(default = "default_agent_run_follow_up")]
pub follow_up: AgentRunFollowUpTarget,
#[serde(default = "default_agent_run_takeover")]
pub takeover: AgentRunTakeoverTarget,
#[serde(default)]
pub artifacts: Vec<AgentRunArtifactRef>,
#[serde(default = "default_agent_run_usage")]
pub usage: AgentRunUsage,
#[serde(default = "default_agent_run_verification")]
pub verification: AgentRunVerificationSummary,
pub snapshot: SubAgentResult,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub checkpoint: Option<SubAgentCheckpoint>,
@@ -2493,16 +2794,62 @@ async fn subagent_session_projection(
store.insert_json(transcript_session_id, "transcript", transcript_payload)
}
};
let run_id = worker_record
.as_ref()
.map(|record| agent_worker_run_id(&record.spec))
.unwrap_or_else(|| snapshot.agent_id.clone());
let follow_up = worker_record
.as_ref()
.map(|record| record.follow_up.clone())
.unwrap_or_else(|| AgentRunFollowUpTarget {
tool: default_agent_eval_tool(),
agent_id: snapshot.agent_id.clone(),
session_name: Some(snapshot.name.clone()),
accepted_statuses: vec!["running".to_string(), "interrupted_continuable".to_string()],
latest_delivery: None,
});
let takeover = worker_record
.as_ref()
.map(|record| record.takeover.clone())
.unwrap_or_else(|| AgentRunTakeoverTarget {
kind: default_subagent_takeover_kind(),
supported: true,
agent_id: snapshot.agent_id.clone(),
session_name: Some(snapshot.name.clone()),
instructions: format!(
"Use agent_eval with agent_id '{}' to inspect or send follow-up input; use agent_close to cancel the lane.",
snapshot.agent_id
),
unsupported_reason: None,
});
let artifacts = worker_record
.as_ref()
.map(|record| record.artifacts.clone())
.unwrap_or_else(|| default_subagent_artifacts(&run_id));
let usage = worker_record
.as_ref()
.map(|record| record.usage.clone())
.unwrap_or_else(default_agent_run_usage);
let verification = worker_record
.as_ref()
.map(|record| record.verification.clone())
.unwrap_or_else(default_agent_run_verification);
SubAgentSessionProjection {
name: snapshot.name.clone(),
agent_id: snapshot.agent_id.clone(),
run_id,
status: subagent_status_name(&snapshot.status).to_string(),
terminal: snapshot.status != SubAgentStatus::Running,
context_mode: snapshot.context_mode.clone(),
fork_context: snapshot.fork_context,
prefix_cache: subagent_prefix_cache_projection(&snapshot),
transcript_handle,
follow_up,
takeover,
artifacts,
usage,
verification,
checkpoint: snapshot.checkpoint.clone(),
continuable: subagent_checkpoint_is_continuable(&snapshot),
snapshot,
@@ -3266,10 +3613,20 @@ impl ToolSpec for AgentEvalTool {
// child output".
let mut message_delivery: Option<Value> = None;
if continue_from_checkpoint {
let delivery_preview = message.clone();
let mut manager = self.manager.write().await;
manager
.continue_checkpointed(&agent_id, message, interrupt)
.map_err(|e| ToolError::execution_failed(e.to_string()))?;
manager.record_follow_up_delivery(
&agent_id,
true,
delivery_preview.as_deref(),
None,
interrupt,
true,
);
manager.persist_state_best_effort();
message_delivery = Some(json!({
"delivered": true,
"continued_from_checkpoint": true
@@ -3283,16 +3640,36 @@ impl ToolSpec for AgentEvalTool {
.unwrap_or(false)
};
if terminal {
let reason = "session already terminated; follow-up not delivered";
let mut manager = self.manager.write().await;
manager.record_follow_up_delivery(
&agent_id,
false,
Some(message.as_str()),
Some(reason),
interrupt,
false,
);
manager.persist_state_best_effort();
message_delivery = Some(json!({
"delivered": false,
"reason": "session already terminated; follow-up not delivered",
"reason": reason,
"recover_full_output": "read the returned transcript_handle with handle_read"
}));
} else {
let mut manager = self.manager.write().await;
manager
.send_input(&agent_id, message, interrupt)
.send_input(&agent_id, message.clone(), interrupt)
.map_err(|e| ToolError::execution_failed(e.to_string()))?;
manager.record_follow_up_delivery(
&agent_id,
true,
Some(message.as_str()),
None,
interrupt,
false,
);
manager.persist_state_best_effort();
message_delivery = Some(json!({ "delivered": true }));
}
}
@@ -6005,11 +6382,21 @@ fn worker_status_from_subagent_status(status: &SubAgentStatus) -> AgentWorkerSta
}
}
fn worker_status_from_subagent_result(result: &SubAgentResult) -> AgentWorkerStatus {
if subagent_checkpoint_is_continuable(result) {
AgentWorkerStatus::WaitingForUser
} else {
worker_status_from_subagent_status(&result.status)
}
}
fn worker_progress_event_parts(message: &str) -> (AgentWorkerStatus, Option<u32>, Option<String>) {
let step = parse_progress_step(message);
let lower = message.to_ascii_lowercase();
let status = if lower.contains("queued") {
AgentWorkerStatus::Queued
} else if lower.contains("waiting for user") || lower.contains("waiting for follow-up") {
AgentWorkerStatus::WaitingForUser
} else if lower.contains("requesting model response")
|| lower.contains(SUBAGENT_MODEL_WAIT_REASON)
{
+53
View File
@@ -32,6 +32,9 @@ fn make_snapshot(status: SubAgentStatus) -> SubAgentResult {
fn make_worker_spec(worker_id: &str, workspace: PathBuf) -> AgentWorkerSpec {
AgentWorkerSpec {
worker_id: worker_id.to_string(),
run_id: worker_id.to_string(),
parent_run_id: None,
session_name: Some(worker_id.to_string()),
objective: "inspect the repo".to_string(),
role: Some("explorer".to_string()),
agent_type: SubAgentType::Explore,
@@ -86,6 +89,8 @@ fn headless_worker_record_tracks_lifecycle_without_tui_projection() {
.get_worker_record("agent_worker_contract")
.expect("worker record");
assert_eq!(record.status, AgentWorkerStatus::Completed);
assert_eq!(record.spec.run_id, "agent_worker_contract");
assert_eq!(record.actor_kind, "subagent");
assert_eq!(record.spec.agent_type, SubAgentType::Explore);
assert_eq!(
record.spec.tool_profile,
@@ -93,6 +98,23 @@ fn headless_worker_record_tracks_lifecycle_without_tui_projection() {
);
assert_eq!(record.result_summary.as_deref(), Some("worker summary"));
assert_eq!(record.steps_taken, 1);
assert_eq!(record.follow_up.tool, "agent_eval");
assert_eq!(record.follow_up.agent_id.as_str(), "agent_worker_contract");
assert!(record.takeover.supported);
assert!(
record
.takeover
.instructions
.contains("agent_eval with agent_id")
);
assert_eq!(record.usage.status, "unknown");
assert_eq!(record.verification.status, "self_report_only");
assert!(
record
.artifacts
.iter()
.any(|artifact| artifact.kind == "transcript")
);
let statuses: Vec<_> = record.events.iter().map(|event| event.status).collect();
assert!(statuses.contains(&AgentWorkerStatus::Queued));
assert!(statuses.contains(&AgentWorkerStatus::ModelWait));
@@ -142,6 +164,9 @@ fn headless_worker_records_persist_with_subagent_state() {
loaded.load_state().expect("load state");
let record = loaded.get_worker_record("agent_persisted").expect("record");
assert_eq!(record.spec.run_id, "agent_persisted");
assert_eq!(record.follow_up.agent_id, "agent_persisted");
assert!(record.takeover.supported);
assert_eq!(record.status, AgentWorkerStatus::Failed);
assert_eq!(record.error.as_deref(), Some("boom"));
assert_eq!(record.steps_taken, 3);
@@ -659,6 +684,12 @@ async fn session_projection_exposes_forked_prefix_cache_contract() {
assert_eq!(projection.name, "fanout_review");
assert_eq!(projection.context_mode, "forked");
assert_eq!(projection.run_id, "agent_test");
assert_eq!(projection.follow_up.tool, "agent_eval");
assert_eq!(projection.follow_up.agent_id, "agent_test");
assert!(projection.takeover.supported);
assert_eq!(projection.usage.status, "unknown");
assert_eq!(projection.verification.status, "self_report_only");
assert!(projection.fork_context);
assert_eq!(projection.prefix_cache.mode, "forked");
assert_eq!(
@@ -1304,8 +1335,11 @@ async fn test_wait_for_result_reports_timeout_when_still_running() {
"boot_test".to_string(),
);
let agent_id = agent.id.clone();
let snapshot = agent.snapshot();
{
let mut guard = manager.write().await;
guard.register_worker(make_worker_spec(&agent_id, PathBuf::from(".")));
guard.complete_worker_from_result(&agent_id, &snapshot);
guard.agents.insert(agent_id.clone(), agent);
}
@@ -1340,8 +1374,11 @@ async fn agent_eval_on_completed_session_returns_full_projection_not_running_err
agent.status = SubAgentStatus::Completed;
agent.result = Some(full_output.clone());
let agent_id = agent.id.clone();
let snapshot = agent.snapshot();
{
let mut guard = manager.write().await;
guard.register_worker(make_worker_spec(&agent_id, PathBuf::from(".")));
guard.complete_worker_from_result(&agent_id, &snapshot);
guard.agents.insert(agent_id.clone(), agent);
}
@@ -1366,6 +1403,22 @@ async fn agent_eval_on_completed_session_returns_full_projection_not_running_err
let projection: SubAgentSessionProjection =
serde_json::from_str(&result.content).expect("projection deserializes");
assert_eq!(projection.status, "completed");
assert_eq!(projection.run_id, "test_agent_2");
let worker_record = projection.worker_record.as_ref().expect("worker record");
let delivery = worker_record
.follow_up
.latest_delivery
.as_ref()
.expect("delivery receipt");
assert!(!delivery.delivered);
assert_eq!(
delivery.reason.as_deref(),
Some("session already terminated; follow-up not delivered")
);
assert_eq!(
delivery.message_preview.as_deref(),
Some("give me the full per-issue breakdown")
);
assert_eq!(projection.transcript_handle.kind, "var_handle");
// The full, untruncated child output survives in the snapshot the
// transcript_handle points at.
+22
View File
@@ -495,6 +495,28 @@ Verification:
npm test --workspace @codewhale/runtime-sdk
```
## Agent Run Receipts
Sub-agent lanes persist compact run receipts in
`.codewhale/state/subagents.v1.json`. The Runtime API exposes those receipts as
a read-only inspection surface:
| Operation | Endpoint |
|---|---|
| List persisted agent runs | `GET /v1/agent-runs` |
| Inspect one run | `GET /v1/agent-runs/{run_id}` |
The response is the same worker-record shape returned by `agent_eval`:
`spec.run_id`, `actor_kind`, lifecycle `status`, bounded `events`,
`follow_up`, `takeover`, `artifacts`, `usage`, and `verification`. `run_id`
falls back to the worker id for older records, and `{run_id}` may be either the
run id or the worker id.
These endpoints do not start, cancel, or steer sub-agents. Live follow-up still
goes through `agent_eval`; live cancellation still goes through `agent_close`.
The API surface exists so app/editor/headless clients can inspect the same
handoff receipts that the TUI and parent model see.
## Session lifecycle (native UI supervision)
| Operation | Endpoint |
+27
View File
@@ -228,6 +228,33 @@ Records that loaded from a pre-#405 persisted state file (no
`session_boot_id` field) classify as prior-session because the
manager can't match them to the current boot.
## Run receipts, follow-up, and takeover
Each sub-agent has a persisted worker record in
`.codewhale/state/subagents.v1.json`. The record is the current run-ledger
slice for sub-agent lanes: it stores `run_id`, objective, role/model,
workspace/branch, lifecycle events, artifact refs, follow-up target, takeover
target, usage provenance, and verification provenance.
`agent_eval` returns these fields at the top level of the session projection and
inside `worker_record`. A running or continuable interrupted child should be
continued through the returned `follow_up` target (`agent_eval` with the same
agent id or session name). A local takeover should use the returned `takeover`
instructions; unsupported future cases must say why instead of leaving the
operator to guess.
Follow-up delivery is explicit. If a message was delivered, the worker record
stores a bounded preview and timestamp. If the child had already terminated,
`agent_eval` still returns the projection and transcript handle, but records the
undelivered follow-up reason so queued instructions do not disappear into UI
state.
Artifacts are symbolic refs. Use `handle_read` on the returned
`transcript_handle` for transcript details, and treat `result_summary` as a
child self-report unless `verification.status` points to a separate gate or
receipt. `usage.status` is `unknown` until sub-agent token accounting is wired
into the worker ledger.
## Output contract
Every sub-agent produces a final result string with five sections,
+7
View File
@@ -136,6 +136,13 @@ without losing its source context.
Large logs and command outputs should be artifacts with compact summaries in the transcript. `task_gate_run` handles this automatically for active durable tasks.
Sub-agent runs also expose a compact run receipt through `agent_eval`: `run_id`,
`follow_up`, `takeover`, `artifacts`, `usage`, `verification`, and
`worker_record`. Follow-up delivery receipts record whether an `agent_eval`
message actually reached the child or why it did not. Usage is marked
`unknown` until worker-level token accounting is available, and verification is
`self_report_only` unless a separate gate or artifact proves the claim.
### GitHub context and guarded writes
| Tool | Niche |