diff --git a/crates/tui/src/runtime_api.rs b/crates/tui/src/runtime_api.rs index ec4587ed..0d927099 100644 --- a/crates/tui/src/runtime_api.rs +++ b/crates/tui/src/runtime_api.rs @@ -55,6 +55,7 @@ use crate::skill_state::SkillStateStore; use crate::task_manager::{ NewTaskRequest, SharedTaskManager, TaskManager, TaskManagerConfig, TaskRecord, TaskSummary, }; +use crate::tools::subagent::{AgentWorkerRecord, load_persisted_agent_worker_records}; use codewhale_protocol::fleet::{ FleetArtifactKind, FleetRun, FleetRunId, FleetWorkerEventPayload, FleetWorkerStatus, }; @@ -322,6 +323,11 @@ struct SkillsResponse { skills: Vec, } +#[derive(Debug, Serialize)] +struct AgentRunsResponse { + runs: Vec, +} + #[derive(Debug, Deserialize)] struct SetSkillEnabledRequest { enabled: bool, @@ -590,6 +596,8 @@ pub fn build_router(state: RuntimeApiState) -> Router { post(resume_session_thread), ) .route("/v1/workspace/status", get(workspace_status)) + .route("/v1/agent-runs", get(list_agent_runs)) + .route("/v1/agent-runs/{run_id}", get(get_agent_run)) .route("/v1/fleet/runs", get(list_fleet_runs)) .route("/v1/fleet/runs/{run_id}", get(get_fleet_run)) .route( @@ -1344,6 +1352,36 @@ async fn workspace_status( Ok(Json(collect_workspace_status(&state.workspace))) } +async fn list_agent_runs( + State(state): State, +) -> Result, ApiError> { + let runs = load_persisted_agent_worker_records(&state.workspace).map_err(|err| { + ApiError::internal(format!("Failed to load persisted agent run records: {err}")) + })?; + Ok(Json(AgentRunsResponse { runs })) +} + +async fn get_agent_run( + State(state): State, + Path(run_id): Path, +) -> Result, ApiError> { + let runs = load_persisted_agent_worker_records(&state.workspace).map_err(|err| { + ApiError::internal(format!("Failed to load persisted agent run records: {err}")) + })?; + let run = runs + .into_iter() + .find(|record| { + let effective_run_id = if record.spec.run_id.is_empty() { + record.spec.worker_id.as_str() + } else { + record.spec.run_id.as_str() + }; + effective_run_id == run_id || record.spec.worker_id == run_id + }) + .ok_or_else(|| ApiError::not_found(format!("agent run '{run_id}' not found")))?; + Ok(Json(run)) +} + async fn list_fleet_runs(State(state): State) -> Result, ApiError> { let manager = open_fleet_manager(&state)?; let ledger_state = manager @@ -4030,6 +4068,154 @@ mod tests { Ok(()) } + #[tokio::test] + async fn agent_runs_runtime_api_exposes_persisted_worker_receipts() -> Result<()> { + use crate::tools::subagent::{ + AgentRunArtifactRef, AgentRunFollowUpTarget, AgentRunTakeoverTarget, AgentRunUsage, + AgentRunVerificationSummary, AgentWorkerEvent, AgentWorkerRecord, AgentWorkerSpec, + AgentWorkerStatus, AgentWorkerToolProfile, SubAgentType, + }; + use std::collections::VecDeque; + + let root = + std::env::temp_dir().join(format!("codewhale-agent-runs-api-{}", Uuid::new_v4())); + let workspace = root.join("workspace"); + fs::create_dir_all(workspace.join(".codewhale/state"))?; + + let record = AgentWorkerRecord { + spec: AgentWorkerSpec { + worker_id: "agent_receipt".to_string(), + run_id: "run_receipt".to_string(), + parent_run_id: Some("parent_run".to_string()), + session_name: Some("receipt_lane".to_string()), + objective: "Verify run receipt projection".to_string(), + role: Some("verifier".to_string()), + agent_type: SubAgentType::Verifier, + model: "deepseek-v4-flash".to_string(), + workspace: workspace.clone(), + git_branch: Some("codex/v0.8.60".to_string()), + context_mode: "fresh".to_string(), + fork_context: false, + tool_profile: AgentWorkerToolProfile::Explicit(vec!["read_file".to_string()]), + max_steps: 4, + spawn_depth: 1, + max_spawn_depth: crate::tools::subagent::DEFAULT_MAX_SPAWN_DEPTH, + }, + actor_kind: "subagent".to_string(), + parent_run_id: Some("parent_run".to_string()), + follow_up: AgentRunFollowUpTarget { + tool: "agent_eval".to_string(), + agent_id: "agent_receipt".to_string(), + session_name: Some("receipt_lane".to_string()), + accepted_statuses: vec![ + "running".to_string(), + "interrupted_continuable".to_string(), + ], + latest_delivery: None, + }, + takeover: AgentRunTakeoverTarget { + kind: "local_subagent_session".to_string(), + supported: true, + agent_id: "agent_receipt".to_string(), + session_name: Some("receipt_lane".to_string()), + instructions: "Use agent_eval with agent_id 'agent_receipt'.".to_string(), + unsupported_reason: None, + }, + artifacts: vec![AgentRunArtifactRef { + kind: "transcript".to_string(), + name: "transcript_handle".to_string(), + target: "agent:agent_receipt".to_string(), + description: "Read with handle_read from a live projection.".to_string(), + }], + usage: AgentRunUsage { + status: "unknown".to_string(), + total_tokens: None, + note: "not reported".to_string(), + }, + verification: AgentRunVerificationSummary { + status: "self_report_only".to_string(), + summary: "no verified receipt attached".to_string(), + }, + status: AgentWorkerStatus::Completed, + created_at_ms: 1, + updated_at_ms: 2, + started_at_ms: Some(1), + completed_at_ms: Some(2), + latest_message: Some("completed".to_string()), + result_summary: Some("receipt complete".to_string()), + error: None, + steps_taken: 2, + events: VecDeque::from([AgentWorkerEvent { + seq: 1, + worker_id: "agent_receipt".to_string(), + status: AgentWorkerStatus::Completed, + timestamp_ms: 2, + message: Some("completed".to_string()), + step: Some(2), + tool_name: None, + }]), + }; + let state_payload = json!({ + "schema_version": 1, + "agents": [], + "workers": [record], + }); + fs::write( + workspace.join(".codewhale/state/subagents.v1.json"), + serde_json::to_vec_pretty(&state_payload)?, + )?; + + let sessions_dir = root.join("sessions"); + let Some((addr, _runtime_threads, handle)) = + spawn_test_server_with_root_token_mobile_workspace( + root.clone(), + sessions_dir, + None, + false, + workspace, + ) + .await? + else { + return Ok(()); + }; + let client = crate::tls::reqwest_client(); + + let runs: serde_json::Value = client + .get(format!("http://{addr}/v1/agent-runs")) + .send() + .await? + .error_for_status()? + .json() + .await?; + assert_eq!(runs["runs"][0]["spec"]["run_id"], "run_receipt"); + assert_eq!(runs["runs"][0]["follow_up"]["tool"], "agent_eval"); + assert_eq!( + runs["runs"][0]["verification"]["status"], + "self_report_only" + ); + + let run: serde_json::Value = client + .get(format!("http://{addr}/v1/agent-runs/run_receipt")) + .send() + .await? + .error_for_status()? + .json() + .await?; + assert_eq!(run["spec"]["worker_id"], "agent_receipt"); + assert_eq!(run["takeover"]["supported"], true); + assert_eq!(run["artifacts"][0]["kind"], "transcript"); + + let missing = client + .get(format!("http://{addr}/v1/agent-runs/missing")) + .send() + .await? + .status(); + assert_eq!(missing, StatusCode::NOT_FOUND); + + handle.abort(); + Ok(()) + } + #[tokio::test] async fn stream_requires_prompt() -> Result<()> { let Some((addr, _runtime_threads, handle)) = spawn_test_server().await? else { diff --git a/crates/tui/src/tools/subagent/mod.rs b/crates/tui/src/tools/subagent/mod.rs index e269cec0..c1411fb8 100644 --- a/crates/tui/src/tools/subagent/mod.rs +++ b/crates/tui/src/tools/subagent/mod.rs @@ -637,6 +637,7 @@ pub enum AgentWorkerStatus { Queued, Starting, Running, + WaitingForUser, ModelWait, RunningTool, Completed, @@ -659,6 +660,12 @@ pub enum AgentWorkerToolProfile { #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub struct AgentWorkerSpec { pub worker_id: String, + #[serde(default, skip_serializing_if = "String::is_empty")] + pub run_id: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub parent_run_id: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub session_name: Option, pub objective: String, #[serde(default, skip_serializing_if = "Option::is_none")] pub role: Option, @@ -675,6 +682,69 @@ pub struct AgentWorkerSpec { pub max_spawn_depth: u32, } +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct AgentRunFollowUpDelivery { + pub delivered: bool, + pub timestamp_ms: u64, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub message_preview: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub reason: Option, + #[serde(default, skip_serializing_if = "is_false")] + pub interrupt: bool, + #[serde(default, skip_serializing_if = "is_false")] + pub continued_from_checkpoint: bool, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct AgentRunFollowUpTarget { + #[serde(default = "default_agent_eval_tool")] + pub tool: String, + pub agent_id: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub session_name: Option, + #[serde(default)] + pub accepted_statuses: Vec, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub latest_delivery: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct AgentRunTakeoverTarget { + #[serde(default = "default_subagent_takeover_kind")] + pub kind: String, + #[serde(default)] + pub supported: bool, + pub agent_id: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub session_name: Option, + pub instructions: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub unsupported_reason: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct AgentRunArtifactRef { + pub kind: String, + pub name: String, + pub target: String, + pub description: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct AgentRunUsage { + pub status: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub total_tokens: Option, + pub note: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct AgentRunVerificationSummary { + pub status: String, + pub summary: String, +} + /// Structured headless worker event. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub struct AgentWorkerEvent { @@ -694,6 +764,20 @@ pub struct AgentWorkerEvent { #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub struct AgentWorkerRecord { pub spec: AgentWorkerSpec, + #[serde(default = "default_subagent_actor_kind")] + pub actor_kind: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub parent_run_id: Option, + #[serde(default = "default_agent_run_follow_up")] + pub follow_up: AgentRunFollowUpTarget, + #[serde(default = "default_agent_run_takeover")] + pub takeover: AgentRunTakeoverTarget, + #[serde(default)] + pub artifacts: Vec, + #[serde(default = "default_agent_run_usage")] + pub usage: AgentRunUsage, + #[serde(default = "default_agent_run_verification")] + pub verification: AgentRunVerificationSummary, pub status: AgentWorkerStatus, pub created_at_ms: u64, pub updated_at_ms: u64, @@ -715,8 +799,19 @@ pub struct AgentWorkerRecord { impl AgentWorkerRecord { fn new(spec: AgentWorkerSpec, now_ms: u64) -> Self { + let run_id = agent_worker_run_id(&spec); + let artifacts = default_subagent_artifacts(&run_id); + let follow_up = follow_up_target_for_spec(&spec); + let takeover = takeover_target_for_spec(&spec); Self { + parent_run_id: spec.parent_run_id.clone(), spec, + actor_kind: default_subagent_actor_kind(), + follow_up, + takeover, + artifacts, + usage: default_agent_run_usage(), + verification: default_agent_run_verification(), status: AgentWorkerStatus::Starting, created_at_ms: now_ms, updated_at_ms: now_ms, @@ -731,6 +826,162 @@ impl AgentWorkerRecord { } } +fn default_subagent_actor_kind() -> String { + "subagent".to_string() +} + +fn default_agent_eval_tool() -> String { + "agent_eval".to_string() +} + +fn default_subagent_takeover_kind() -> String { + "local_subagent_session".to_string() +} + +fn default_agent_run_follow_up() -> AgentRunFollowUpTarget { + AgentRunFollowUpTarget { + tool: default_agent_eval_tool(), + agent_id: String::new(), + session_name: None, + accepted_statuses: vec!["running".to_string(), "interrupted_continuable".to_string()], + latest_delivery: None, + } +} + +fn default_agent_run_takeover() -> AgentRunTakeoverTarget { + AgentRunTakeoverTarget { + kind: default_subagent_takeover_kind(), + supported: false, + agent_id: String::new(), + session_name: None, + instructions: "No takeover target is available for this older record.".to_string(), + unsupported_reason: Some("legacy_record_missing_agent_id".to_string()), + } +} + +fn default_agent_run_usage() -> AgentRunUsage { + AgentRunUsage { + status: "unknown".to_string(), + total_tokens: None, + note: "Token usage is not yet reported by the sub-agent worker ledger.".to_string(), + } +} + +fn default_agent_run_verification() -> AgentRunVerificationSummary { + AgentRunVerificationSummary { + status: "self_report_only".to_string(), + summary: + "No verified command or test receipt is attached; treat the result summary as a child self-report." + .to_string(), + } +} + +fn agent_worker_run_id(spec: &AgentWorkerSpec) -> String { + if spec.run_id.is_empty() { + spec.worker_id.clone() + } else { + spec.run_id.clone() + } +} + +fn follow_up_target_for_spec(spec: &AgentWorkerSpec) -> AgentRunFollowUpTarget { + AgentRunFollowUpTarget { + tool: default_agent_eval_tool(), + agent_id: spec.worker_id.clone(), + session_name: spec.session_name.clone(), + accepted_statuses: vec!["running".to_string(), "interrupted_continuable".to_string()], + latest_delivery: None, + } +} + +fn takeover_target_for_spec(spec: &AgentWorkerSpec) -> AgentRunTakeoverTarget { + let agent_ref = spec + .session_name + .as_deref() + .filter(|name| !name.is_empty()) + .unwrap_or(&spec.worker_id); + AgentRunTakeoverTarget { + kind: default_subagent_takeover_kind(), + supported: true, + agent_id: spec.worker_id.clone(), + session_name: spec.session_name.clone(), + instructions: format!( + "Use agent_eval with agent_id '{agent_ref}' to inspect or send follow-up input; use agent_close to cancel the lane." + ), + unsupported_reason: None, + } +} + +fn default_subagent_artifacts(run_id: &str) -> Vec { + vec![ + AgentRunArtifactRef { + kind: "worker_events".to_string(), + name: "worker_record.events".to_string(), + target: run_id.to_string(), + description: "Bounded structured lifecycle events retained on the worker record." + .to_string(), + }, + AgentRunArtifactRef { + kind: "transcript".to_string(), + name: "transcript_handle".to_string(), + target: format!("agent:{run_id}"), + description: + "Use the projection transcript_handle with handle_read for the child transcript." + .to_string(), + }, + AgentRunArtifactRef { + kind: "receipt".to_string(), + name: "result_summary".to_string(), + target: run_id.to_string(), + description: "Child final summary when present; verify before treating as fact." + .to_string(), + }, + ] +} + +fn message_preview(text: &str) -> String { + const MAX_PREVIEW_CHARS: usize = 120; + let mut preview: String = text.chars().take(MAX_PREVIEW_CHARS).collect(); + if text.chars().count() > MAX_PREVIEW_CHARS { + preview.push_str("..."); + } + preview +} + +fn normalize_worker_spec(mut spec: AgentWorkerSpec) -> AgentWorkerSpec { + if spec.run_id.is_empty() { + spec.run_id = spec.worker_id.clone(); + } + spec +} + +fn normalize_worker_record(mut record: AgentWorkerRecord) -> AgentWorkerRecord { + record.spec = normalize_worker_spec(record.spec); + let run_id = agent_worker_run_id(&record.spec); + if record.actor_kind.is_empty() { + record.actor_kind = default_subagent_actor_kind(); + } + if record.parent_run_id.is_none() { + record.parent_run_id = record.spec.parent_run_id.clone(); + } + if record.follow_up.agent_id.is_empty() { + record.follow_up = follow_up_target_for_spec(&record.spec); + } + if record.takeover.agent_id.is_empty() { + record.takeover = takeover_target_for_spec(&record.spec); + } + if record.artifacts.is_empty() { + record.artifacts = default_subagent_artifacts(&run_id); + } + if record.usage.status.is_empty() { + record.usage = default_agent_run_usage(); + } + if record.verification.status.is_empty() { + record.verification = default_agent_run_verification(); + } + record +} + fn is_false(b: &bool) -> bool { !*b } @@ -1502,6 +1753,7 @@ impl SubAgentManager { self.agents.insert(persisted.id, agent); } for worker in state.workers { + let worker = normalize_worker_record(worker); self.worker_event_seq = self.worker_event_seq.max( worker .events @@ -1545,7 +1797,7 @@ impl SubAgentManager { fn register_worker(&mut self, spec: AgentWorkerSpec) { let worker_id = spec.worker_id.clone(); let now_ms = epoch_millis_now(); - let mut record = AgentWorkerRecord::new(spec, now_ms); + let mut record = AgentWorkerRecord::new(normalize_worker_spec(spec), now_ms); self.push_worker_event( &mut record, AgentWorkerStatus::Starting, @@ -1634,7 +1886,7 @@ impl SubAgentManager { } fn complete_worker_from_result(&mut self, worker_id: &str, result: &SubAgentResult) { - let status = worker_status_from_subagent_status(&result.status); + let status = worker_status_from_subagent_result(result); let message = match &result.status { SubAgentStatus::Completed => Some("completed".to_string()), SubAgentStatus::Failed(err) => Some(err.clone()), @@ -1652,6 +1904,33 @@ impl SubAgentManager { } } + fn record_follow_up_delivery( + &mut self, + worker_id: &str, + delivered: bool, + message: Option<&str>, + reason: Option<&str>, + interrupt: bool, + continued_from_checkpoint: bool, + ) { + let Some(record) = self.worker_records.get_mut(worker_id) else { + return; + }; + let now_ms = epoch_millis_now(); + record.updated_at_ms = now_ms; + record.follow_up.latest_delivery = Some(AgentRunFollowUpDelivery { + delivered, + timestamp_ms: now_ms, + message_preview: message.map(message_preview), + reason: reason.map(str::to_string), + interrupt, + continued_from_checkpoint, + }); + if delivered { + record.latest_message = Some("follow-up delivered via agent_eval".to_string()); + } + } + fn fail_worker(&mut self, worker_id: &str, error: String) { self.record_worker_event( worker_id, @@ -1830,6 +2109,9 @@ impl SubAgentManager { let max_steps = self.max_steps; let worker_spec = AgentWorkerSpec { worker_id: agent_id.clone(), + run_id: agent_id.clone(), + parent_run_id: None, + session_name: Some(agent.session_name.clone()), objective: assignment.objective.clone(), role: assignment.role.clone(), agent_type: agent_type.clone(), @@ -2400,17 +2682,36 @@ impl SubAgentManager { /// Thread-safe wrapper for `SubAgentManager`. pub type SharedSubAgentManager = Arc>; +pub fn load_persisted_agent_worker_records(workspace: &Path) -> Result> { + let mut manager = SubAgentManager::new(workspace.to_path_buf(), 1) + .with_state_path(default_state_path(workspace)); + manager.load_state()?; + Ok(manager.list_worker_records()) +} + /// Model-facing session projection returned by the v0.8.33 sub-agent API. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct SubAgentSessionProjection { pub name: String, pub agent_id: String, + #[serde(default)] + pub run_id: String, pub status: String, pub terminal: bool, pub context_mode: String, pub fork_context: bool, pub prefix_cache: SubAgentPrefixCacheProjection, pub transcript_handle: VarHandle, + #[serde(default = "default_agent_run_follow_up")] + pub follow_up: AgentRunFollowUpTarget, + #[serde(default = "default_agent_run_takeover")] + pub takeover: AgentRunTakeoverTarget, + #[serde(default)] + pub artifacts: Vec, + #[serde(default = "default_agent_run_usage")] + pub usage: AgentRunUsage, + #[serde(default = "default_agent_run_verification")] + pub verification: AgentRunVerificationSummary, pub snapshot: SubAgentResult, #[serde(default, skip_serializing_if = "Option::is_none")] pub checkpoint: Option, @@ -2493,16 +2794,62 @@ async fn subagent_session_projection( store.insert_json(transcript_session_id, "transcript", transcript_payload) } }; + let run_id = worker_record + .as_ref() + .map(|record| agent_worker_run_id(&record.spec)) + .unwrap_or_else(|| snapshot.agent_id.clone()); + let follow_up = worker_record + .as_ref() + .map(|record| record.follow_up.clone()) + .unwrap_or_else(|| AgentRunFollowUpTarget { + tool: default_agent_eval_tool(), + agent_id: snapshot.agent_id.clone(), + session_name: Some(snapshot.name.clone()), + accepted_statuses: vec!["running".to_string(), "interrupted_continuable".to_string()], + latest_delivery: None, + }); + let takeover = worker_record + .as_ref() + .map(|record| record.takeover.clone()) + .unwrap_or_else(|| AgentRunTakeoverTarget { + kind: default_subagent_takeover_kind(), + supported: true, + agent_id: snapshot.agent_id.clone(), + session_name: Some(snapshot.name.clone()), + instructions: format!( + "Use agent_eval with agent_id '{}' to inspect or send follow-up input; use agent_close to cancel the lane.", + snapshot.agent_id + ), + unsupported_reason: None, + }); + let artifacts = worker_record + .as_ref() + .map(|record| record.artifacts.clone()) + .unwrap_or_else(|| default_subagent_artifacts(&run_id)); + let usage = worker_record + .as_ref() + .map(|record| record.usage.clone()) + .unwrap_or_else(default_agent_run_usage); + let verification = worker_record + .as_ref() + .map(|record| record.verification.clone()) + .unwrap_or_else(default_agent_run_verification); SubAgentSessionProjection { name: snapshot.name.clone(), agent_id: snapshot.agent_id.clone(), + run_id, status: subagent_status_name(&snapshot.status).to_string(), terminal: snapshot.status != SubAgentStatus::Running, context_mode: snapshot.context_mode.clone(), fork_context: snapshot.fork_context, prefix_cache: subagent_prefix_cache_projection(&snapshot), transcript_handle, + follow_up, + takeover, + artifacts, + usage, + verification, checkpoint: snapshot.checkpoint.clone(), continuable: subagent_checkpoint_is_continuable(&snapshot), snapshot, @@ -3266,10 +3613,20 @@ impl ToolSpec for AgentEvalTool { // child output". let mut message_delivery: Option = None; if continue_from_checkpoint { + let delivery_preview = message.clone(); let mut manager = self.manager.write().await; manager .continue_checkpointed(&agent_id, message, interrupt) .map_err(|e| ToolError::execution_failed(e.to_string()))?; + manager.record_follow_up_delivery( + &agent_id, + true, + delivery_preview.as_deref(), + None, + interrupt, + true, + ); + manager.persist_state_best_effort(); message_delivery = Some(json!({ "delivered": true, "continued_from_checkpoint": true @@ -3283,16 +3640,36 @@ impl ToolSpec for AgentEvalTool { .unwrap_or(false) }; if terminal { + let reason = "session already terminated; follow-up not delivered"; + let mut manager = self.manager.write().await; + manager.record_follow_up_delivery( + &agent_id, + false, + Some(message.as_str()), + Some(reason), + interrupt, + false, + ); + manager.persist_state_best_effort(); message_delivery = Some(json!({ "delivered": false, - "reason": "session already terminated; follow-up not delivered", + "reason": reason, "recover_full_output": "read the returned transcript_handle with handle_read" })); } else { let mut manager = self.manager.write().await; manager - .send_input(&agent_id, message, interrupt) + .send_input(&agent_id, message.clone(), interrupt) .map_err(|e| ToolError::execution_failed(e.to_string()))?; + manager.record_follow_up_delivery( + &agent_id, + true, + Some(message.as_str()), + None, + interrupt, + false, + ); + manager.persist_state_best_effort(); message_delivery = Some(json!({ "delivered": true })); } } @@ -6005,11 +6382,21 @@ fn worker_status_from_subagent_status(status: &SubAgentStatus) -> AgentWorkerSta } } +fn worker_status_from_subagent_result(result: &SubAgentResult) -> AgentWorkerStatus { + if subagent_checkpoint_is_continuable(result) { + AgentWorkerStatus::WaitingForUser + } else { + worker_status_from_subagent_status(&result.status) + } +} + fn worker_progress_event_parts(message: &str) -> (AgentWorkerStatus, Option, Option) { let step = parse_progress_step(message); let lower = message.to_ascii_lowercase(); let status = if lower.contains("queued") { AgentWorkerStatus::Queued + } else if lower.contains("waiting for user") || lower.contains("waiting for follow-up") { + AgentWorkerStatus::WaitingForUser } else if lower.contains("requesting model response") || lower.contains(SUBAGENT_MODEL_WAIT_REASON) { diff --git a/crates/tui/src/tools/subagent/tests.rs b/crates/tui/src/tools/subagent/tests.rs index 8b0fa8eb..63301bb2 100644 --- a/crates/tui/src/tools/subagent/tests.rs +++ b/crates/tui/src/tools/subagent/tests.rs @@ -32,6 +32,9 @@ fn make_snapshot(status: SubAgentStatus) -> SubAgentResult { fn make_worker_spec(worker_id: &str, workspace: PathBuf) -> AgentWorkerSpec { AgentWorkerSpec { worker_id: worker_id.to_string(), + run_id: worker_id.to_string(), + parent_run_id: None, + session_name: Some(worker_id.to_string()), objective: "inspect the repo".to_string(), role: Some("explorer".to_string()), agent_type: SubAgentType::Explore, @@ -86,6 +89,8 @@ fn headless_worker_record_tracks_lifecycle_without_tui_projection() { .get_worker_record("agent_worker_contract") .expect("worker record"); assert_eq!(record.status, AgentWorkerStatus::Completed); + assert_eq!(record.spec.run_id, "agent_worker_contract"); + assert_eq!(record.actor_kind, "subagent"); assert_eq!(record.spec.agent_type, SubAgentType::Explore); assert_eq!( record.spec.tool_profile, @@ -93,6 +98,23 @@ fn headless_worker_record_tracks_lifecycle_without_tui_projection() { ); assert_eq!(record.result_summary.as_deref(), Some("worker summary")); assert_eq!(record.steps_taken, 1); + assert_eq!(record.follow_up.tool, "agent_eval"); + assert_eq!(record.follow_up.agent_id.as_str(), "agent_worker_contract"); + assert!(record.takeover.supported); + assert!( + record + .takeover + .instructions + .contains("agent_eval with agent_id") + ); + assert_eq!(record.usage.status, "unknown"); + assert_eq!(record.verification.status, "self_report_only"); + assert!( + record + .artifacts + .iter() + .any(|artifact| artifact.kind == "transcript") + ); let statuses: Vec<_> = record.events.iter().map(|event| event.status).collect(); assert!(statuses.contains(&AgentWorkerStatus::Queued)); assert!(statuses.contains(&AgentWorkerStatus::ModelWait)); @@ -142,6 +164,9 @@ fn headless_worker_records_persist_with_subagent_state() { loaded.load_state().expect("load state"); let record = loaded.get_worker_record("agent_persisted").expect("record"); + assert_eq!(record.spec.run_id, "agent_persisted"); + assert_eq!(record.follow_up.agent_id, "agent_persisted"); + assert!(record.takeover.supported); assert_eq!(record.status, AgentWorkerStatus::Failed); assert_eq!(record.error.as_deref(), Some("boom")); assert_eq!(record.steps_taken, 3); @@ -659,6 +684,12 @@ async fn session_projection_exposes_forked_prefix_cache_contract() { assert_eq!(projection.name, "fanout_review"); assert_eq!(projection.context_mode, "forked"); + assert_eq!(projection.run_id, "agent_test"); + assert_eq!(projection.follow_up.tool, "agent_eval"); + assert_eq!(projection.follow_up.agent_id, "agent_test"); + assert!(projection.takeover.supported); + assert_eq!(projection.usage.status, "unknown"); + assert_eq!(projection.verification.status, "self_report_only"); assert!(projection.fork_context); assert_eq!(projection.prefix_cache.mode, "forked"); assert_eq!( @@ -1304,8 +1335,11 @@ async fn test_wait_for_result_reports_timeout_when_still_running() { "boot_test".to_string(), ); let agent_id = agent.id.clone(); + let snapshot = agent.snapshot(); { let mut guard = manager.write().await; + guard.register_worker(make_worker_spec(&agent_id, PathBuf::from("."))); + guard.complete_worker_from_result(&agent_id, &snapshot); guard.agents.insert(agent_id.clone(), agent); } @@ -1340,8 +1374,11 @@ async fn agent_eval_on_completed_session_returns_full_projection_not_running_err agent.status = SubAgentStatus::Completed; agent.result = Some(full_output.clone()); let agent_id = agent.id.clone(); + let snapshot = agent.snapshot(); { let mut guard = manager.write().await; + guard.register_worker(make_worker_spec(&agent_id, PathBuf::from("."))); + guard.complete_worker_from_result(&agent_id, &snapshot); guard.agents.insert(agent_id.clone(), agent); } @@ -1366,6 +1403,22 @@ async fn agent_eval_on_completed_session_returns_full_projection_not_running_err let projection: SubAgentSessionProjection = serde_json::from_str(&result.content).expect("projection deserializes"); assert_eq!(projection.status, "completed"); + assert_eq!(projection.run_id, "test_agent_2"); + let worker_record = projection.worker_record.as_ref().expect("worker record"); + let delivery = worker_record + .follow_up + .latest_delivery + .as_ref() + .expect("delivery receipt"); + assert!(!delivery.delivered); + assert_eq!( + delivery.reason.as_deref(), + Some("session already terminated; follow-up not delivered") + ); + assert_eq!( + delivery.message_preview.as_deref(), + Some("give me the full per-issue breakdown") + ); assert_eq!(projection.transcript_handle.kind, "var_handle"); // The full, untruncated child output survives in the snapshot the // transcript_handle points at. diff --git a/docs/RUNTIME_API.md b/docs/RUNTIME_API.md index ce3a88f6..b399557b 100644 --- a/docs/RUNTIME_API.md +++ b/docs/RUNTIME_API.md @@ -495,6 +495,28 @@ Verification: npm test --workspace @codewhale/runtime-sdk ``` +## Agent Run Receipts + +Sub-agent lanes persist compact run receipts in +`.codewhale/state/subagents.v1.json`. The Runtime API exposes those receipts as +a read-only inspection surface: + +| Operation | Endpoint | +|---|---| +| List persisted agent runs | `GET /v1/agent-runs` | +| Inspect one run | `GET /v1/agent-runs/{run_id}` | + +The response is the same worker-record shape returned by `agent_eval`: +`spec.run_id`, `actor_kind`, lifecycle `status`, bounded `events`, +`follow_up`, `takeover`, `artifacts`, `usage`, and `verification`. `run_id` +falls back to the worker id for older records, and `{run_id}` may be either the +run id or the worker id. + +These endpoints do not start, cancel, or steer sub-agents. Live follow-up still +goes through `agent_eval`; live cancellation still goes through `agent_close`. +The API surface exists so app/editor/headless clients can inspect the same +handoff receipts that the TUI and parent model see. + ## Session lifecycle (native UI supervision) | Operation | Endpoint | diff --git a/docs/SUBAGENTS.md b/docs/SUBAGENTS.md index b874ff6e..9d92f927 100644 --- a/docs/SUBAGENTS.md +++ b/docs/SUBAGENTS.md @@ -228,6 +228,33 @@ Records that loaded from a pre-#405 persisted state file (no `session_boot_id` field) classify as prior-session because the manager can't match them to the current boot. +## Run receipts, follow-up, and takeover + +Each sub-agent has a persisted worker record in +`.codewhale/state/subagents.v1.json`. The record is the current run-ledger +slice for sub-agent lanes: it stores `run_id`, objective, role/model, +workspace/branch, lifecycle events, artifact refs, follow-up target, takeover +target, usage provenance, and verification provenance. + +`agent_eval` returns these fields at the top level of the session projection and +inside `worker_record`. A running or continuable interrupted child should be +continued through the returned `follow_up` target (`agent_eval` with the same +agent id or session name). A local takeover should use the returned `takeover` +instructions; unsupported future cases must say why instead of leaving the +operator to guess. + +Follow-up delivery is explicit. If a message was delivered, the worker record +stores a bounded preview and timestamp. If the child had already terminated, +`agent_eval` still returns the projection and transcript handle, but records the +undelivered follow-up reason so queued instructions do not disappear into UI +state. + +Artifacts are symbolic refs. Use `handle_read` on the returned +`transcript_handle` for transcript details, and treat `result_summary` as a +child self-report unless `verification.status` points to a separate gate or +receipt. `usage.status` is `unknown` until sub-agent token accounting is wired +into the worker ledger. + ## Output contract Every sub-agent produces a final result string with five sections, diff --git a/docs/TOOL_SURFACE.md b/docs/TOOL_SURFACE.md index 25bd4d75..a016cc7b 100644 --- a/docs/TOOL_SURFACE.md +++ b/docs/TOOL_SURFACE.md @@ -136,6 +136,13 @@ without losing its source context. Large logs and command outputs should be artifacts with compact summaries in the transcript. `task_gate_run` handles this automatically for active durable tasks. +Sub-agent runs also expose a compact run receipt through `agent_eval`: `run_id`, +`follow_up`, `takeover`, `artifacts`, `usage`, `verification`, and +`worker_record`. Follow-up delivery receipts record whether an `agent_eval` +message actually reached the child or why it did not. Usage is marked +`unknown` until worker-level token accounting is available, and verification is +`self_report_only` unless a separate gate or artifact proves the claim. + ### GitHub context and guarded writes | Tool | Niche |