merge #3142 Agent run receipts
This commit is contained in:
@@ -55,6 +55,7 @@ use crate::skill_state::SkillStateStore;
|
||||
use crate::task_manager::{
|
||||
NewTaskRequest, SharedTaskManager, TaskManager, TaskManagerConfig, TaskRecord, TaskSummary,
|
||||
};
|
||||
use crate::tools::subagent::{AgentWorkerRecord, load_persisted_agent_worker_records};
|
||||
use codewhale_protocol::fleet::{
|
||||
FleetArtifactKind, FleetRun, FleetRunId, FleetWorkerEventPayload, FleetWorkerStatus,
|
||||
};
|
||||
@@ -322,6 +323,11 @@ struct SkillsResponse {
|
||||
skills: Vec<SkillEntry>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct AgentRunsResponse {
|
||||
runs: Vec<AgentWorkerRecord>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct SetSkillEnabledRequest {
|
||||
enabled: bool,
|
||||
@@ -590,6 +596,8 @@ pub fn build_router(state: RuntimeApiState) -> Router {
|
||||
post(resume_session_thread),
|
||||
)
|
||||
.route("/v1/workspace/status", get(workspace_status))
|
||||
.route("/v1/agent-runs", get(list_agent_runs))
|
||||
.route("/v1/agent-runs/{run_id}", get(get_agent_run))
|
||||
.route("/v1/fleet/runs", get(list_fleet_runs))
|
||||
.route("/v1/fleet/runs/{run_id}", get(get_fleet_run))
|
||||
.route(
|
||||
@@ -1344,6 +1352,36 @@ async fn workspace_status(
|
||||
Ok(Json(collect_workspace_status(&state.workspace)))
|
||||
}
|
||||
|
||||
async fn list_agent_runs(
|
||||
State(state): State<RuntimeApiState>,
|
||||
) -> Result<Json<AgentRunsResponse>, ApiError> {
|
||||
let runs = load_persisted_agent_worker_records(&state.workspace).map_err(|err| {
|
||||
ApiError::internal(format!("Failed to load persisted agent run records: {err}"))
|
||||
})?;
|
||||
Ok(Json(AgentRunsResponse { runs }))
|
||||
}
|
||||
|
||||
async fn get_agent_run(
|
||||
State(state): State<RuntimeApiState>,
|
||||
Path(run_id): Path<String>,
|
||||
) -> Result<Json<AgentWorkerRecord>, ApiError> {
|
||||
let runs = load_persisted_agent_worker_records(&state.workspace).map_err(|err| {
|
||||
ApiError::internal(format!("Failed to load persisted agent run records: {err}"))
|
||||
})?;
|
||||
let run = runs
|
||||
.into_iter()
|
||||
.find(|record| {
|
||||
let effective_run_id = if record.spec.run_id.is_empty() {
|
||||
record.spec.worker_id.as_str()
|
||||
} else {
|
||||
record.spec.run_id.as_str()
|
||||
};
|
||||
effective_run_id == run_id || record.spec.worker_id == run_id
|
||||
})
|
||||
.ok_or_else(|| ApiError::not_found(format!("agent run '{run_id}' not found")))?;
|
||||
Ok(Json(run))
|
||||
}
|
||||
|
||||
async fn list_fleet_runs(State(state): State<RuntimeApiState>) -> Result<Json<Value>, ApiError> {
|
||||
let manager = open_fleet_manager(&state)?;
|
||||
let ledger_state = manager
|
||||
@@ -4030,6 +4068,154 @@ mod tests {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn agent_runs_runtime_api_exposes_persisted_worker_receipts() -> Result<()> {
|
||||
use crate::tools::subagent::{
|
||||
AgentRunArtifactRef, AgentRunFollowUpTarget, AgentRunTakeoverTarget, AgentRunUsage,
|
||||
AgentRunVerificationSummary, AgentWorkerEvent, AgentWorkerRecord, AgentWorkerSpec,
|
||||
AgentWorkerStatus, AgentWorkerToolProfile, SubAgentType,
|
||||
};
|
||||
use std::collections::VecDeque;
|
||||
|
||||
let root =
|
||||
std::env::temp_dir().join(format!("codewhale-agent-runs-api-{}", Uuid::new_v4()));
|
||||
let workspace = root.join("workspace");
|
||||
fs::create_dir_all(workspace.join(".codewhale/state"))?;
|
||||
|
||||
let record = AgentWorkerRecord {
|
||||
spec: AgentWorkerSpec {
|
||||
worker_id: "agent_receipt".to_string(),
|
||||
run_id: "run_receipt".to_string(),
|
||||
parent_run_id: Some("parent_run".to_string()),
|
||||
session_name: Some("receipt_lane".to_string()),
|
||||
objective: "Verify run receipt projection".to_string(),
|
||||
role: Some("verifier".to_string()),
|
||||
agent_type: SubAgentType::Verifier,
|
||||
model: "deepseek-v4-flash".to_string(),
|
||||
workspace: workspace.clone(),
|
||||
git_branch: Some("codex/v0.8.60".to_string()),
|
||||
context_mode: "fresh".to_string(),
|
||||
fork_context: false,
|
||||
tool_profile: AgentWorkerToolProfile::Explicit(vec!["read_file".to_string()]),
|
||||
max_steps: 4,
|
||||
spawn_depth: 1,
|
||||
max_spawn_depth: crate::tools::subagent::DEFAULT_MAX_SPAWN_DEPTH,
|
||||
},
|
||||
actor_kind: "subagent".to_string(),
|
||||
parent_run_id: Some("parent_run".to_string()),
|
||||
follow_up: AgentRunFollowUpTarget {
|
||||
tool: "agent_eval".to_string(),
|
||||
agent_id: "agent_receipt".to_string(),
|
||||
session_name: Some("receipt_lane".to_string()),
|
||||
accepted_statuses: vec![
|
||||
"running".to_string(),
|
||||
"interrupted_continuable".to_string(),
|
||||
],
|
||||
latest_delivery: None,
|
||||
},
|
||||
takeover: AgentRunTakeoverTarget {
|
||||
kind: "local_subagent_session".to_string(),
|
||||
supported: true,
|
||||
agent_id: "agent_receipt".to_string(),
|
||||
session_name: Some("receipt_lane".to_string()),
|
||||
instructions: "Use agent_eval with agent_id 'agent_receipt'.".to_string(),
|
||||
unsupported_reason: None,
|
||||
},
|
||||
artifacts: vec![AgentRunArtifactRef {
|
||||
kind: "transcript".to_string(),
|
||||
name: "transcript_handle".to_string(),
|
||||
target: "agent:agent_receipt".to_string(),
|
||||
description: "Read with handle_read from a live projection.".to_string(),
|
||||
}],
|
||||
usage: AgentRunUsage {
|
||||
status: "unknown".to_string(),
|
||||
total_tokens: None,
|
||||
note: "not reported".to_string(),
|
||||
},
|
||||
verification: AgentRunVerificationSummary {
|
||||
status: "self_report_only".to_string(),
|
||||
summary: "no verified receipt attached".to_string(),
|
||||
},
|
||||
status: AgentWorkerStatus::Completed,
|
||||
created_at_ms: 1,
|
||||
updated_at_ms: 2,
|
||||
started_at_ms: Some(1),
|
||||
completed_at_ms: Some(2),
|
||||
latest_message: Some("completed".to_string()),
|
||||
result_summary: Some("receipt complete".to_string()),
|
||||
error: None,
|
||||
steps_taken: 2,
|
||||
events: VecDeque::from([AgentWorkerEvent {
|
||||
seq: 1,
|
||||
worker_id: "agent_receipt".to_string(),
|
||||
status: AgentWorkerStatus::Completed,
|
||||
timestamp_ms: 2,
|
||||
message: Some("completed".to_string()),
|
||||
step: Some(2),
|
||||
tool_name: None,
|
||||
}]),
|
||||
};
|
||||
let state_payload = json!({
|
||||
"schema_version": 1,
|
||||
"agents": [],
|
||||
"workers": [record],
|
||||
});
|
||||
fs::write(
|
||||
workspace.join(".codewhale/state/subagents.v1.json"),
|
||||
serde_json::to_vec_pretty(&state_payload)?,
|
||||
)?;
|
||||
|
||||
let sessions_dir = root.join("sessions");
|
||||
let Some((addr, _runtime_threads, handle)) =
|
||||
spawn_test_server_with_root_token_mobile_workspace(
|
||||
root.clone(),
|
||||
sessions_dir,
|
||||
None,
|
||||
false,
|
||||
workspace,
|
||||
)
|
||||
.await?
|
||||
else {
|
||||
return Ok(());
|
||||
};
|
||||
let client = crate::tls::reqwest_client();
|
||||
|
||||
let runs: serde_json::Value = client
|
||||
.get(format!("http://{addr}/v1/agent-runs"))
|
||||
.send()
|
||||
.await?
|
||||
.error_for_status()?
|
||||
.json()
|
||||
.await?;
|
||||
assert_eq!(runs["runs"][0]["spec"]["run_id"], "run_receipt");
|
||||
assert_eq!(runs["runs"][0]["follow_up"]["tool"], "agent_eval");
|
||||
assert_eq!(
|
||||
runs["runs"][0]["verification"]["status"],
|
||||
"self_report_only"
|
||||
);
|
||||
|
||||
let run: serde_json::Value = client
|
||||
.get(format!("http://{addr}/v1/agent-runs/run_receipt"))
|
||||
.send()
|
||||
.await?
|
||||
.error_for_status()?
|
||||
.json()
|
||||
.await?;
|
||||
assert_eq!(run["spec"]["worker_id"], "agent_receipt");
|
||||
assert_eq!(run["takeover"]["supported"], true);
|
||||
assert_eq!(run["artifacts"][0]["kind"], "transcript");
|
||||
|
||||
let missing = client
|
||||
.get(format!("http://{addr}/v1/agent-runs/missing"))
|
||||
.send()
|
||||
.await?
|
||||
.status();
|
||||
assert_eq!(missing, StatusCode::NOT_FOUND);
|
||||
|
||||
handle.abort();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn stream_requires_prompt() -> Result<()> {
|
||||
let Some((addr, _runtime_threads, handle)) = spawn_test_server().await? else {
|
||||
|
||||
@@ -637,6 +637,7 @@ pub enum AgentWorkerStatus {
|
||||
Queued,
|
||||
Starting,
|
||||
Running,
|
||||
WaitingForUser,
|
||||
ModelWait,
|
||||
RunningTool,
|
||||
Completed,
|
||||
@@ -659,6 +660,12 @@ pub enum AgentWorkerToolProfile {
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct AgentWorkerSpec {
|
||||
pub worker_id: String,
|
||||
#[serde(default, skip_serializing_if = "String::is_empty")]
|
||||
pub run_id: String,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub parent_run_id: Option<String>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub session_name: Option<String>,
|
||||
pub objective: String,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub role: Option<String>,
|
||||
@@ -675,6 +682,69 @@ pub struct AgentWorkerSpec {
|
||||
pub max_spawn_depth: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct AgentRunFollowUpDelivery {
|
||||
pub delivered: bool,
|
||||
pub timestamp_ms: u64,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub message_preview: Option<String>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub reason: Option<String>,
|
||||
#[serde(default, skip_serializing_if = "is_false")]
|
||||
pub interrupt: bool,
|
||||
#[serde(default, skip_serializing_if = "is_false")]
|
||||
pub continued_from_checkpoint: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct AgentRunFollowUpTarget {
|
||||
#[serde(default = "default_agent_eval_tool")]
|
||||
pub tool: String,
|
||||
pub agent_id: String,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub session_name: Option<String>,
|
||||
#[serde(default)]
|
||||
pub accepted_statuses: Vec<String>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub latest_delivery: Option<AgentRunFollowUpDelivery>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct AgentRunTakeoverTarget {
|
||||
#[serde(default = "default_subagent_takeover_kind")]
|
||||
pub kind: String,
|
||||
#[serde(default)]
|
||||
pub supported: bool,
|
||||
pub agent_id: String,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub session_name: Option<String>,
|
||||
pub instructions: String,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub unsupported_reason: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct AgentRunArtifactRef {
|
||||
pub kind: String,
|
||||
pub name: String,
|
||||
pub target: String,
|
||||
pub description: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct AgentRunUsage {
|
||||
pub status: String,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub total_tokens: Option<u64>,
|
||||
pub note: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct AgentRunVerificationSummary {
|
||||
pub status: String,
|
||||
pub summary: String,
|
||||
}
|
||||
|
||||
/// Structured headless worker event.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct AgentWorkerEvent {
|
||||
@@ -694,6 +764,20 @@ pub struct AgentWorkerEvent {
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct AgentWorkerRecord {
|
||||
pub spec: AgentWorkerSpec,
|
||||
#[serde(default = "default_subagent_actor_kind")]
|
||||
pub actor_kind: String,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub parent_run_id: Option<String>,
|
||||
#[serde(default = "default_agent_run_follow_up")]
|
||||
pub follow_up: AgentRunFollowUpTarget,
|
||||
#[serde(default = "default_agent_run_takeover")]
|
||||
pub takeover: AgentRunTakeoverTarget,
|
||||
#[serde(default)]
|
||||
pub artifacts: Vec<AgentRunArtifactRef>,
|
||||
#[serde(default = "default_agent_run_usage")]
|
||||
pub usage: AgentRunUsage,
|
||||
#[serde(default = "default_agent_run_verification")]
|
||||
pub verification: AgentRunVerificationSummary,
|
||||
pub status: AgentWorkerStatus,
|
||||
pub created_at_ms: u64,
|
||||
pub updated_at_ms: u64,
|
||||
@@ -715,8 +799,19 @@ pub struct AgentWorkerRecord {
|
||||
|
||||
impl AgentWorkerRecord {
|
||||
fn new(spec: AgentWorkerSpec, now_ms: u64) -> Self {
|
||||
let run_id = agent_worker_run_id(&spec);
|
||||
let artifacts = default_subagent_artifacts(&run_id);
|
||||
let follow_up = follow_up_target_for_spec(&spec);
|
||||
let takeover = takeover_target_for_spec(&spec);
|
||||
Self {
|
||||
parent_run_id: spec.parent_run_id.clone(),
|
||||
spec,
|
||||
actor_kind: default_subagent_actor_kind(),
|
||||
follow_up,
|
||||
takeover,
|
||||
artifacts,
|
||||
usage: default_agent_run_usage(),
|
||||
verification: default_agent_run_verification(),
|
||||
status: AgentWorkerStatus::Starting,
|
||||
created_at_ms: now_ms,
|
||||
updated_at_ms: now_ms,
|
||||
@@ -731,6 +826,162 @@ impl AgentWorkerRecord {
|
||||
}
|
||||
}
|
||||
|
||||
fn default_subagent_actor_kind() -> String {
|
||||
"subagent".to_string()
|
||||
}
|
||||
|
||||
fn default_agent_eval_tool() -> String {
|
||||
"agent_eval".to_string()
|
||||
}
|
||||
|
||||
fn default_subagent_takeover_kind() -> String {
|
||||
"local_subagent_session".to_string()
|
||||
}
|
||||
|
||||
fn default_agent_run_follow_up() -> AgentRunFollowUpTarget {
|
||||
AgentRunFollowUpTarget {
|
||||
tool: default_agent_eval_tool(),
|
||||
agent_id: String::new(),
|
||||
session_name: None,
|
||||
accepted_statuses: vec!["running".to_string(), "interrupted_continuable".to_string()],
|
||||
latest_delivery: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn default_agent_run_takeover() -> AgentRunTakeoverTarget {
|
||||
AgentRunTakeoverTarget {
|
||||
kind: default_subagent_takeover_kind(),
|
||||
supported: false,
|
||||
agent_id: String::new(),
|
||||
session_name: None,
|
||||
instructions: "No takeover target is available for this older record.".to_string(),
|
||||
unsupported_reason: Some("legacy_record_missing_agent_id".to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
fn default_agent_run_usage() -> AgentRunUsage {
|
||||
AgentRunUsage {
|
||||
status: "unknown".to_string(),
|
||||
total_tokens: None,
|
||||
note: "Token usage is not yet reported by the sub-agent worker ledger.".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
fn default_agent_run_verification() -> AgentRunVerificationSummary {
|
||||
AgentRunVerificationSummary {
|
||||
status: "self_report_only".to_string(),
|
||||
summary:
|
||||
"No verified command or test receipt is attached; treat the result summary as a child self-report."
|
||||
.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
fn agent_worker_run_id(spec: &AgentWorkerSpec) -> String {
|
||||
if spec.run_id.is_empty() {
|
||||
spec.worker_id.clone()
|
||||
} else {
|
||||
spec.run_id.clone()
|
||||
}
|
||||
}
|
||||
|
||||
fn follow_up_target_for_spec(spec: &AgentWorkerSpec) -> AgentRunFollowUpTarget {
|
||||
AgentRunFollowUpTarget {
|
||||
tool: default_agent_eval_tool(),
|
||||
agent_id: spec.worker_id.clone(),
|
||||
session_name: spec.session_name.clone(),
|
||||
accepted_statuses: vec!["running".to_string(), "interrupted_continuable".to_string()],
|
||||
latest_delivery: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn takeover_target_for_spec(spec: &AgentWorkerSpec) -> AgentRunTakeoverTarget {
|
||||
let agent_ref = spec
|
||||
.session_name
|
||||
.as_deref()
|
||||
.filter(|name| !name.is_empty())
|
||||
.unwrap_or(&spec.worker_id);
|
||||
AgentRunTakeoverTarget {
|
||||
kind: default_subagent_takeover_kind(),
|
||||
supported: true,
|
||||
agent_id: spec.worker_id.clone(),
|
||||
session_name: spec.session_name.clone(),
|
||||
instructions: format!(
|
||||
"Use agent_eval with agent_id '{agent_ref}' to inspect or send follow-up input; use agent_close to cancel the lane."
|
||||
),
|
||||
unsupported_reason: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn default_subagent_artifacts(run_id: &str) -> Vec<AgentRunArtifactRef> {
|
||||
vec![
|
||||
AgentRunArtifactRef {
|
||||
kind: "worker_events".to_string(),
|
||||
name: "worker_record.events".to_string(),
|
||||
target: run_id.to_string(),
|
||||
description: "Bounded structured lifecycle events retained on the worker record."
|
||||
.to_string(),
|
||||
},
|
||||
AgentRunArtifactRef {
|
||||
kind: "transcript".to_string(),
|
||||
name: "transcript_handle".to_string(),
|
||||
target: format!("agent:{run_id}"),
|
||||
description:
|
||||
"Use the projection transcript_handle with handle_read for the child transcript."
|
||||
.to_string(),
|
||||
},
|
||||
AgentRunArtifactRef {
|
||||
kind: "receipt".to_string(),
|
||||
name: "result_summary".to_string(),
|
||||
target: run_id.to_string(),
|
||||
description: "Child final summary when present; verify before treating as fact."
|
||||
.to_string(),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn message_preview(text: &str) -> String {
|
||||
const MAX_PREVIEW_CHARS: usize = 120;
|
||||
let mut preview: String = text.chars().take(MAX_PREVIEW_CHARS).collect();
|
||||
if text.chars().count() > MAX_PREVIEW_CHARS {
|
||||
preview.push_str("...");
|
||||
}
|
||||
preview
|
||||
}
|
||||
|
||||
fn normalize_worker_spec(mut spec: AgentWorkerSpec) -> AgentWorkerSpec {
|
||||
if spec.run_id.is_empty() {
|
||||
spec.run_id = spec.worker_id.clone();
|
||||
}
|
||||
spec
|
||||
}
|
||||
|
||||
fn normalize_worker_record(mut record: AgentWorkerRecord) -> AgentWorkerRecord {
|
||||
record.spec = normalize_worker_spec(record.spec);
|
||||
let run_id = agent_worker_run_id(&record.spec);
|
||||
if record.actor_kind.is_empty() {
|
||||
record.actor_kind = default_subagent_actor_kind();
|
||||
}
|
||||
if record.parent_run_id.is_none() {
|
||||
record.parent_run_id = record.spec.parent_run_id.clone();
|
||||
}
|
||||
if record.follow_up.agent_id.is_empty() {
|
||||
record.follow_up = follow_up_target_for_spec(&record.spec);
|
||||
}
|
||||
if record.takeover.agent_id.is_empty() {
|
||||
record.takeover = takeover_target_for_spec(&record.spec);
|
||||
}
|
||||
if record.artifacts.is_empty() {
|
||||
record.artifacts = default_subagent_artifacts(&run_id);
|
||||
}
|
||||
if record.usage.status.is_empty() {
|
||||
record.usage = default_agent_run_usage();
|
||||
}
|
||||
if record.verification.status.is_empty() {
|
||||
record.verification = default_agent_run_verification();
|
||||
}
|
||||
record
|
||||
}
|
||||
|
||||
fn is_false(b: &bool) -> bool {
|
||||
!*b
|
||||
}
|
||||
@@ -1502,6 +1753,7 @@ impl SubAgentManager {
|
||||
self.agents.insert(persisted.id, agent);
|
||||
}
|
||||
for worker in state.workers {
|
||||
let worker = normalize_worker_record(worker);
|
||||
self.worker_event_seq = self.worker_event_seq.max(
|
||||
worker
|
||||
.events
|
||||
@@ -1545,7 +1797,7 @@ impl SubAgentManager {
|
||||
fn register_worker(&mut self, spec: AgentWorkerSpec) {
|
||||
let worker_id = spec.worker_id.clone();
|
||||
let now_ms = epoch_millis_now();
|
||||
let mut record = AgentWorkerRecord::new(spec, now_ms);
|
||||
let mut record = AgentWorkerRecord::new(normalize_worker_spec(spec), now_ms);
|
||||
self.push_worker_event(
|
||||
&mut record,
|
||||
AgentWorkerStatus::Starting,
|
||||
@@ -1634,7 +1886,7 @@ impl SubAgentManager {
|
||||
}
|
||||
|
||||
fn complete_worker_from_result(&mut self, worker_id: &str, result: &SubAgentResult) {
|
||||
let status = worker_status_from_subagent_status(&result.status);
|
||||
let status = worker_status_from_subagent_result(result);
|
||||
let message = match &result.status {
|
||||
SubAgentStatus::Completed => Some("completed".to_string()),
|
||||
SubAgentStatus::Failed(err) => Some(err.clone()),
|
||||
@@ -1652,6 +1904,33 @@ impl SubAgentManager {
|
||||
}
|
||||
}
|
||||
|
||||
fn record_follow_up_delivery(
|
||||
&mut self,
|
||||
worker_id: &str,
|
||||
delivered: bool,
|
||||
message: Option<&str>,
|
||||
reason: Option<&str>,
|
||||
interrupt: bool,
|
||||
continued_from_checkpoint: bool,
|
||||
) {
|
||||
let Some(record) = self.worker_records.get_mut(worker_id) else {
|
||||
return;
|
||||
};
|
||||
let now_ms = epoch_millis_now();
|
||||
record.updated_at_ms = now_ms;
|
||||
record.follow_up.latest_delivery = Some(AgentRunFollowUpDelivery {
|
||||
delivered,
|
||||
timestamp_ms: now_ms,
|
||||
message_preview: message.map(message_preview),
|
||||
reason: reason.map(str::to_string),
|
||||
interrupt,
|
||||
continued_from_checkpoint,
|
||||
});
|
||||
if delivered {
|
||||
record.latest_message = Some("follow-up delivered via agent_eval".to_string());
|
||||
}
|
||||
}
|
||||
|
||||
fn fail_worker(&mut self, worker_id: &str, error: String) {
|
||||
self.record_worker_event(
|
||||
worker_id,
|
||||
@@ -1830,6 +2109,9 @@ impl SubAgentManager {
|
||||
let max_steps = self.max_steps;
|
||||
let worker_spec = AgentWorkerSpec {
|
||||
worker_id: agent_id.clone(),
|
||||
run_id: agent_id.clone(),
|
||||
parent_run_id: None,
|
||||
session_name: Some(agent.session_name.clone()),
|
||||
objective: assignment.objective.clone(),
|
||||
role: assignment.role.clone(),
|
||||
agent_type: agent_type.clone(),
|
||||
@@ -2400,17 +2682,36 @@ impl SubAgentManager {
|
||||
/// Thread-safe wrapper for `SubAgentManager`.
|
||||
pub type SharedSubAgentManager = Arc<RwLock<SubAgentManager>>;
|
||||
|
||||
pub fn load_persisted_agent_worker_records(workspace: &Path) -> Result<Vec<AgentWorkerRecord>> {
|
||||
let mut manager = SubAgentManager::new(workspace.to_path_buf(), 1)
|
||||
.with_state_path(default_state_path(workspace));
|
||||
manager.load_state()?;
|
||||
Ok(manager.list_worker_records())
|
||||
}
|
||||
|
||||
/// Model-facing session projection returned by the v0.8.33 sub-agent API.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SubAgentSessionProjection {
|
||||
pub name: String,
|
||||
pub agent_id: String,
|
||||
#[serde(default)]
|
||||
pub run_id: String,
|
||||
pub status: String,
|
||||
pub terminal: bool,
|
||||
pub context_mode: String,
|
||||
pub fork_context: bool,
|
||||
pub prefix_cache: SubAgentPrefixCacheProjection,
|
||||
pub transcript_handle: VarHandle,
|
||||
#[serde(default = "default_agent_run_follow_up")]
|
||||
pub follow_up: AgentRunFollowUpTarget,
|
||||
#[serde(default = "default_agent_run_takeover")]
|
||||
pub takeover: AgentRunTakeoverTarget,
|
||||
#[serde(default)]
|
||||
pub artifacts: Vec<AgentRunArtifactRef>,
|
||||
#[serde(default = "default_agent_run_usage")]
|
||||
pub usage: AgentRunUsage,
|
||||
#[serde(default = "default_agent_run_verification")]
|
||||
pub verification: AgentRunVerificationSummary,
|
||||
pub snapshot: SubAgentResult,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub checkpoint: Option<SubAgentCheckpoint>,
|
||||
@@ -2493,16 +2794,62 @@ async fn subagent_session_projection(
|
||||
store.insert_json(transcript_session_id, "transcript", transcript_payload)
|
||||
}
|
||||
};
|
||||
let run_id = worker_record
|
||||
.as_ref()
|
||||
.map(|record| agent_worker_run_id(&record.spec))
|
||||
.unwrap_or_else(|| snapshot.agent_id.clone());
|
||||
let follow_up = worker_record
|
||||
.as_ref()
|
||||
.map(|record| record.follow_up.clone())
|
||||
.unwrap_or_else(|| AgentRunFollowUpTarget {
|
||||
tool: default_agent_eval_tool(),
|
||||
agent_id: snapshot.agent_id.clone(),
|
||||
session_name: Some(snapshot.name.clone()),
|
||||
accepted_statuses: vec!["running".to_string(), "interrupted_continuable".to_string()],
|
||||
latest_delivery: None,
|
||||
});
|
||||
let takeover = worker_record
|
||||
.as_ref()
|
||||
.map(|record| record.takeover.clone())
|
||||
.unwrap_or_else(|| AgentRunTakeoverTarget {
|
||||
kind: default_subagent_takeover_kind(),
|
||||
supported: true,
|
||||
agent_id: snapshot.agent_id.clone(),
|
||||
session_name: Some(snapshot.name.clone()),
|
||||
instructions: format!(
|
||||
"Use agent_eval with agent_id '{}' to inspect or send follow-up input; use agent_close to cancel the lane.",
|
||||
snapshot.agent_id
|
||||
),
|
||||
unsupported_reason: None,
|
||||
});
|
||||
let artifacts = worker_record
|
||||
.as_ref()
|
||||
.map(|record| record.artifacts.clone())
|
||||
.unwrap_or_else(|| default_subagent_artifacts(&run_id));
|
||||
let usage = worker_record
|
||||
.as_ref()
|
||||
.map(|record| record.usage.clone())
|
||||
.unwrap_or_else(default_agent_run_usage);
|
||||
let verification = worker_record
|
||||
.as_ref()
|
||||
.map(|record| record.verification.clone())
|
||||
.unwrap_or_else(default_agent_run_verification);
|
||||
|
||||
SubAgentSessionProjection {
|
||||
name: snapshot.name.clone(),
|
||||
agent_id: snapshot.agent_id.clone(),
|
||||
run_id,
|
||||
status: subagent_status_name(&snapshot.status).to_string(),
|
||||
terminal: snapshot.status != SubAgentStatus::Running,
|
||||
context_mode: snapshot.context_mode.clone(),
|
||||
fork_context: snapshot.fork_context,
|
||||
prefix_cache: subagent_prefix_cache_projection(&snapshot),
|
||||
transcript_handle,
|
||||
follow_up,
|
||||
takeover,
|
||||
artifacts,
|
||||
usage,
|
||||
verification,
|
||||
checkpoint: snapshot.checkpoint.clone(),
|
||||
continuable: subagent_checkpoint_is_continuable(&snapshot),
|
||||
snapshot,
|
||||
@@ -3266,10 +3613,20 @@ impl ToolSpec for AgentEvalTool {
|
||||
// child output".
|
||||
let mut message_delivery: Option<Value> = None;
|
||||
if continue_from_checkpoint {
|
||||
let delivery_preview = message.clone();
|
||||
let mut manager = self.manager.write().await;
|
||||
manager
|
||||
.continue_checkpointed(&agent_id, message, interrupt)
|
||||
.map_err(|e| ToolError::execution_failed(e.to_string()))?;
|
||||
manager.record_follow_up_delivery(
|
||||
&agent_id,
|
||||
true,
|
||||
delivery_preview.as_deref(),
|
||||
None,
|
||||
interrupt,
|
||||
true,
|
||||
);
|
||||
manager.persist_state_best_effort();
|
||||
message_delivery = Some(json!({
|
||||
"delivered": true,
|
||||
"continued_from_checkpoint": true
|
||||
@@ -3283,16 +3640,36 @@ impl ToolSpec for AgentEvalTool {
|
||||
.unwrap_or(false)
|
||||
};
|
||||
if terminal {
|
||||
let reason = "session already terminated; follow-up not delivered";
|
||||
let mut manager = self.manager.write().await;
|
||||
manager.record_follow_up_delivery(
|
||||
&agent_id,
|
||||
false,
|
||||
Some(message.as_str()),
|
||||
Some(reason),
|
||||
interrupt,
|
||||
false,
|
||||
);
|
||||
manager.persist_state_best_effort();
|
||||
message_delivery = Some(json!({
|
||||
"delivered": false,
|
||||
"reason": "session already terminated; follow-up not delivered",
|
||||
"reason": reason,
|
||||
"recover_full_output": "read the returned transcript_handle with handle_read"
|
||||
}));
|
||||
} else {
|
||||
let mut manager = self.manager.write().await;
|
||||
manager
|
||||
.send_input(&agent_id, message, interrupt)
|
||||
.send_input(&agent_id, message.clone(), interrupt)
|
||||
.map_err(|e| ToolError::execution_failed(e.to_string()))?;
|
||||
manager.record_follow_up_delivery(
|
||||
&agent_id,
|
||||
true,
|
||||
Some(message.as_str()),
|
||||
None,
|
||||
interrupt,
|
||||
false,
|
||||
);
|
||||
manager.persist_state_best_effort();
|
||||
message_delivery = Some(json!({ "delivered": true }));
|
||||
}
|
||||
}
|
||||
@@ -6005,11 +6382,21 @@ fn worker_status_from_subagent_status(status: &SubAgentStatus) -> AgentWorkerSta
|
||||
}
|
||||
}
|
||||
|
||||
fn worker_status_from_subagent_result(result: &SubAgentResult) -> AgentWorkerStatus {
|
||||
if subagent_checkpoint_is_continuable(result) {
|
||||
AgentWorkerStatus::WaitingForUser
|
||||
} else {
|
||||
worker_status_from_subagent_status(&result.status)
|
||||
}
|
||||
}
|
||||
|
||||
fn worker_progress_event_parts(message: &str) -> (AgentWorkerStatus, Option<u32>, Option<String>) {
|
||||
let step = parse_progress_step(message);
|
||||
let lower = message.to_ascii_lowercase();
|
||||
let status = if lower.contains("queued") {
|
||||
AgentWorkerStatus::Queued
|
||||
} else if lower.contains("waiting for user") || lower.contains("waiting for follow-up") {
|
||||
AgentWorkerStatus::WaitingForUser
|
||||
} else if lower.contains("requesting model response")
|
||||
|| lower.contains(SUBAGENT_MODEL_WAIT_REASON)
|
||||
{
|
||||
|
||||
@@ -32,6 +32,9 @@ fn make_snapshot(status: SubAgentStatus) -> SubAgentResult {
|
||||
fn make_worker_spec(worker_id: &str, workspace: PathBuf) -> AgentWorkerSpec {
|
||||
AgentWorkerSpec {
|
||||
worker_id: worker_id.to_string(),
|
||||
run_id: worker_id.to_string(),
|
||||
parent_run_id: None,
|
||||
session_name: Some(worker_id.to_string()),
|
||||
objective: "inspect the repo".to_string(),
|
||||
role: Some("explorer".to_string()),
|
||||
agent_type: SubAgentType::Explore,
|
||||
@@ -86,6 +89,8 @@ fn headless_worker_record_tracks_lifecycle_without_tui_projection() {
|
||||
.get_worker_record("agent_worker_contract")
|
||||
.expect("worker record");
|
||||
assert_eq!(record.status, AgentWorkerStatus::Completed);
|
||||
assert_eq!(record.spec.run_id, "agent_worker_contract");
|
||||
assert_eq!(record.actor_kind, "subagent");
|
||||
assert_eq!(record.spec.agent_type, SubAgentType::Explore);
|
||||
assert_eq!(
|
||||
record.spec.tool_profile,
|
||||
@@ -93,6 +98,23 @@ fn headless_worker_record_tracks_lifecycle_without_tui_projection() {
|
||||
);
|
||||
assert_eq!(record.result_summary.as_deref(), Some("worker summary"));
|
||||
assert_eq!(record.steps_taken, 1);
|
||||
assert_eq!(record.follow_up.tool, "agent_eval");
|
||||
assert_eq!(record.follow_up.agent_id.as_str(), "agent_worker_contract");
|
||||
assert!(record.takeover.supported);
|
||||
assert!(
|
||||
record
|
||||
.takeover
|
||||
.instructions
|
||||
.contains("agent_eval with agent_id")
|
||||
);
|
||||
assert_eq!(record.usage.status, "unknown");
|
||||
assert_eq!(record.verification.status, "self_report_only");
|
||||
assert!(
|
||||
record
|
||||
.artifacts
|
||||
.iter()
|
||||
.any(|artifact| artifact.kind == "transcript")
|
||||
);
|
||||
let statuses: Vec<_> = record.events.iter().map(|event| event.status).collect();
|
||||
assert!(statuses.contains(&AgentWorkerStatus::Queued));
|
||||
assert!(statuses.contains(&AgentWorkerStatus::ModelWait));
|
||||
@@ -142,6 +164,9 @@ fn headless_worker_records_persist_with_subagent_state() {
|
||||
loaded.load_state().expect("load state");
|
||||
|
||||
let record = loaded.get_worker_record("agent_persisted").expect("record");
|
||||
assert_eq!(record.spec.run_id, "agent_persisted");
|
||||
assert_eq!(record.follow_up.agent_id, "agent_persisted");
|
||||
assert!(record.takeover.supported);
|
||||
assert_eq!(record.status, AgentWorkerStatus::Failed);
|
||||
assert_eq!(record.error.as_deref(), Some("boom"));
|
||||
assert_eq!(record.steps_taken, 3);
|
||||
@@ -659,6 +684,12 @@ async fn session_projection_exposes_forked_prefix_cache_contract() {
|
||||
|
||||
assert_eq!(projection.name, "fanout_review");
|
||||
assert_eq!(projection.context_mode, "forked");
|
||||
assert_eq!(projection.run_id, "agent_test");
|
||||
assert_eq!(projection.follow_up.tool, "agent_eval");
|
||||
assert_eq!(projection.follow_up.agent_id, "agent_test");
|
||||
assert!(projection.takeover.supported);
|
||||
assert_eq!(projection.usage.status, "unknown");
|
||||
assert_eq!(projection.verification.status, "self_report_only");
|
||||
assert!(projection.fork_context);
|
||||
assert_eq!(projection.prefix_cache.mode, "forked");
|
||||
assert_eq!(
|
||||
@@ -1304,8 +1335,11 @@ async fn test_wait_for_result_reports_timeout_when_still_running() {
|
||||
"boot_test".to_string(),
|
||||
);
|
||||
let agent_id = agent.id.clone();
|
||||
let snapshot = agent.snapshot();
|
||||
{
|
||||
let mut guard = manager.write().await;
|
||||
guard.register_worker(make_worker_spec(&agent_id, PathBuf::from(".")));
|
||||
guard.complete_worker_from_result(&agent_id, &snapshot);
|
||||
guard.agents.insert(agent_id.clone(), agent);
|
||||
}
|
||||
|
||||
@@ -1340,8 +1374,11 @@ async fn agent_eval_on_completed_session_returns_full_projection_not_running_err
|
||||
agent.status = SubAgentStatus::Completed;
|
||||
agent.result = Some(full_output.clone());
|
||||
let agent_id = agent.id.clone();
|
||||
let snapshot = agent.snapshot();
|
||||
{
|
||||
let mut guard = manager.write().await;
|
||||
guard.register_worker(make_worker_spec(&agent_id, PathBuf::from(".")));
|
||||
guard.complete_worker_from_result(&agent_id, &snapshot);
|
||||
guard.agents.insert(agent_id.clone(), agent);
|
||||
}
|
||||
|
||||
@@ -1366,6 +1403,22 @@ async fn agent_eval_on_completed_session_returns_full_projection_not_running_err
|
||||
let projection: SubAgentSessionProjection =
|
||||
serde_json::from_str(&result.content).expect("projection deserializes");
|
||||
assert_eq!(projection.status, "completed");
|
||||
assert_eq!(projection.run_id, "test_agent_2");
|
||||
let worker_record = projection.worker_record.as_ref().expect("worker record");
|
||||
let delivery = worker_record
|
||||
.follow_up
|
||||
.latest_delivery
|
||||
.as_ref()
|
||||
.expect("delivery receipt");
|
||||
assert!(!delivery.delivered);
|
||||
assert_eq!(
|
||||
delivery.reason.as_deref(),
|
||||
Some("session already terminated; follow-up not delivered")
|
||||
);
|
||||
assert_eq!(
|
||||
delivery.message_preview.as_deref(),
|
||||
Some("give me the full per-issue breakdown")
|
||||
);
|
||||
assert_eq!(projection.transcript_handle.kind, "var_handle");
|
||||
// The full, untruncated child output survives in the snapshot the
|
||||
// transcript_handle points at.
|
||||
|
||||
@@ -495,6 +495,28 @@ Verification:
|
||||
npm test --workspace @codewhale/runtime-sdk
|
||||
```
|
||||
|
||||
## Agent Run Receipts
|
||||
|
||||
Sub-agent lanes persist compact run receipts in
|
||||
`.codewhale/state/subagents.v1.json`. The Runtime API exposes those receipts as
|
||||
a read-only inspection surface:
|
||||
|
||||
| Operation | Endpoint |
|
||||
|---|---|
|
||||
| List persisted agent runs | `GET /v1/agent-runs` |
|
||||
| Inspect one run | `GET /v1/agent-runs/{run_id}` |
|
||||
|
||||
The response is the same worker-record shape returned by `agent_eval`:
|
||||
`spec.run_id`, `actor_kind`, lifecycle `status`, bounded `events`,
|
||||
`follow_up`, `takeover`, `artifacts`, `usage`, and `verification`. `run_id`
|
||||
falls back to the worker id for older records, and `{run_id}` may be either the
|
||||
run id or the worker id.
|
||||
|
||||
These endpoints do not start, cancel, or steer sub-agents. Live follow-up still
|
||||
goes through `agent_eval`; live cancellation still goes through `agent_close`.
|
||||
The API surface exists so app/editor/headless clients can inspect the same
|
||||
handoff receipts that the TUI and parent model see.
|
||||
|
||||
## Session lifecycle (native UI supervision)
|
||||
|
||||
| Operation | Endpoint |
|
||||
|
||||
@@ -228,6 +228,33 @@ Records that loaded from a pre-#405 persisted state file (no
|
||||
`session_boot_id` field) classify as prior-session because the
|
||||
manager can't match them to the current boot.
|
||||
|
||||
## Run receipts, follow-up, and takeover
|
||||
|
||||
Each sub-agent has a persisted worker record in
|
||||
`.codewhale/state/subagents.v1.json`. The record is the current run-ledger
|
||||
slice for sub-agent lanes: it stores `run_id`, objective, role/model,
|
||||
workspace/branch, lifecycle events, artifact refs, follow-up target, takeover
|
||||
target, usage provenance, and verification provenance.
|
||||
|
||||
`agent_eval` returns these fields at the top level of the session projection and
|
||||
inside `worker_record`. A running or continuable interrupted child should be
|
||||
continued through the returned `follow_up` target (`agent_eval` with the same
|
||||
agent id or session name). A local takeover should use the returned `takeover`
|
||||
instructions; unsupported future cases must say why instead of leaving the
|
||||
operator to guess.
|
||||
|
||||
Follow-up delivery is explicit. If a message was delivered, the worker record
|
||||
stores a bounded preview and timestamp. If the child had already terminated,
|
||||
`agent_eval` still returns the projection and transcript handle, but records the
|
||||
undelivered follow-up reason so queued instructions do not disappear into UI
|
||||
state.
|
||||
|
||||
Artifacts are symbolic refs. Use `handle_read` on the returned
|
||||
`transcript_handle` for transcript details, and treat `result_summary` as a
|
||||
child self-report unless `verification.status` points to a separate gate or
|
||||
receipt. `usage.status` is `unknown` until sub-agent token accounting is wired
|
||||
into the worker ledger.
|
||||
|
||||
## Output contract
|
||||
|
||||
Every sub-agent produces a final result string with five sections,
|
||||
|
||||
@@ -136,6 +136,13 @@ without losing its source context.
|
||||
|
||||
Large logs and command outputs should be artifacts with compact summaries in the transcript. `task_gate_run` handles this automatically for active durable tasks.
|
||||
|
||||
Sub-agent runs also expose a compact run receipt through `agent_eval`: `run_id`,
|
||||
`follow_up`, `takeover`, `artifacts`, `usage`, `verification`, and
|
||||
`worker_record`. Follow-up delivery receipts record whether an `agent_eval`
|
||||
message actually reached the child or why it did not. Usage is marked
|
||||
`unknown` until worker-level token accounting is available, and verification is
|
||||
`self_report_only` unless a separate gate or artifact proves the claim.
|
||||
|
||||
### GitHub context and guarded writes
|
||||
|
||||
| Tool | Niche |
|
||||
|
||||
Reference in New Issue
Block a user