diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index b48c63c8..02737eb7 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -42,6 +42,7 @@ use crate::models::{ }; use crate::prompts; use crate::seam_manager::{SeamConfig, SeamManager}; +use crate::tools::goal::{SharedGoalState, new_shared_goal_state}; use crate::tools::plan::{SharedPlanState, new_shared_plan_state}; use crate::tools::shell::{SharedShellManager, new_shared_shell_manager}; use crate::tools::spec::RuntimeToolServices; @@ -125,6 +126,8 @@ pub struct EngineConfig { pub todos: SharedTodoList, /// Shared Plan state. pub plan_state: SharedPlanState, + /// Shared runtime goal state for model-visible goal tools. + pub goal_state: SharedGoalState, /// Maximum sub-agent recursion depth (default 3). See /// `SubAgentRuntime::max_spawn_depth`. Override via /// `[runtime] max_spawn_depth = N` in `~/.deepseek/config.toml`. @@ -206,6 +209,7 @@ impl Default for EngineConfig { capacity: CapacityControllerConfig::default(), todos: new_shared_todo_list(), plan_state: new_shared_plan_state(), + goal_state: new_shared_goal_state(), max_spawn_depth: crate::tools::subagent::DEFAULT_MAX_SPAWN_DEPTH, network_policy: None, snapshots_enabled: true, @@ -414,6 +418,10 @@ impl Engine { /// Create a new engine with the given configuration pub fn new(config: EngineConfig, api_config: &Config) -> (Self, EngineHandle) { + if let Some(objective) = normalized_goal_objective(config.goal_objective.as_deref()) { + sync_goal_state_from_host(&config.goal_state, Some(&objective), None, false); + } + let (tx_op, rx_op) = mpsc::channel(32); let (tx_event, rx_event) = mpsc::channel(256); let (tx_approval, rx_approval) = mpsc::channel(64); @@ -445,6 +453,8 @@ impl Engine { // message at request time so file churn does not rewrite this prefix. let user_memory_block = crate::memory::compose_block(config.memory_enabled, &config.memory_path); + let prompt_goal_objective = + goal_objective_for_prompt(config.goal_objective.as_deref(), &config.goal_state); let system_prompt = prompts::system_prompt_for_mode_with_context_skills_session_and_approval( AppMode::Agent, @@ -454,7 +464,7 @@ impl Engine { Some(&config.instructions), prompts::PromptSessionContext { user_memory_block: user_memory_block.as_deref(), - goal_objective: config.goal_objective.as_deref(), + goal_objective: prompt_goal_objective.as_deref(), project_context_pack_enabled: config.project_context_pack_enabled, locale_tag: &config.locale_tag, translation_enabled: config.translation_enabled, @@ -1009,9 +1019,21 @@ impl Engine { let user_msg = self.user_text_message_with_turn_metadata(content); self.session.add_message(user_msg); + let previous_goal_objective = self.config.goal_objective.clone(); + self.session.model = model; self.config.model.clone_from(&self.session.model); - self.config.goal_objective = goal_objective; + self.config.goal_objective = goal_objective.clone(); + if normalized_goal_objective(previous_goal_objective.as_deref()) + != normalized_goal_objective(goal_objective.as_deref()) + { + sync_goal_state_from_host( + &self.config.goal_state, + normalized_goal_objective(goal_objective.as_deref()).as_deref(), + None, + false, + ); + } self.session.reasoning_effort = reasoning_effort; self.session.reasoning_effort_auto = reasoning_effort_auto; self.session.auto_model = auto_model; @@ -1849,6 +1871,10 @@ impl Engine { fn refresh_system_prompt(&mut self, mode: AppMode) { let user_memory_block = crate::memory::compose_block(self.config.memory_enabled, &self.config.memory_path); + let prompt_goal_objective = goal_objective_for_prompt( + self.config.goal_objective.as_deref(), + &self.config.goal_state, + ); let base = prompts::system_prompt_for_mode_with_context_skills_session_and_approval( mode, &self.config.workspace, @@ -1857,7 +1883,7 @@ impl Engine { Some(&self.config.instructions), prompts::PromptSessionContext { user_memory_block: user_memory_block.as_deref(), - goal_objective: self.config.goal_objective.as_deref(), + goal_objective: prompt_goal_objective.as_deref(), project_context_pack_enabled: self.config.project_context_pack_enabled, locale_tag: &self.config.locale_tag, translation_enabled: self.config.translation_enabled, @@ -1917,6 +1943,45 @@ fn system_prompt_hash(prompt: Option<&SystemPrompt>) -> u64 { hasher.finish() } +fn normalized_goal_objective(value: Option<&str>) -> Option { + value + .map(str::trim) + .filter(|value| !value.is_empty()) + .map(str::to_string) +} + +fn sync_goal_state_from_host( + goal_state: &SharedGoalState, + objective: Option<&str>, + token_budget: Option, + completed: bool, +) { + match goal_state.lock() { + Ok(mut state) => state.sync_from_host(objective, token_budget, completed), + Err(err) => tracing::warn!("goal state lock poisoned while syncing host goal: {err}"), + } +} + +fn goal_objective_for_prompt( + configured_goal: Option<&str>, + goal_state: &SharedGoalState, +) -> Option { + match goal_state.lock() { + Ok(state) => { + if state.objective().is_some() { + return state.is_active().then(|| { + state + .objective() + .expect("checked goal objective") + .to_string() + }); + } + } + Err(err) => tracing::warn!("goal state lock poisoned while building prompt: {err}"), + } + normalized_goal_objective(configured_goal) +} + /// Spawn the engine in a background task pub fn spawn_engine(config: EngineConfig, api_config: &Config) -> EngineHandle { let (engine, handle) = Engine::new(config, api_config); diff --git a/crates/tui/src/core/engine/tests.rs b/crates/tui/src/core/engine/tests.rs index 4271da23..b0294be2 100644 --- a/crates/tui/src/core/engine/tests.rs +++ b/crates/tui/src/core/engine/tests.rs @@ -199,6 +199,37 @@ fn engine_initial_prompt_includes_configured_goal() { assert!(prompt.contains("")); assert!(prompt.contains("Fix goal handoff")); + assert!( + engine + .config + .goal_state + .lock() + .expect("goal lock") + .is_active() + ); +} + +#[test] +fn refresh_system_prompt_uses_runtime_goal_state() { + let (mut engine, _handle) = Engine::new(EngineConfig::default(), &Config::default()); + { + let mut goal = engine.config.goal_state.lock().expect("goal lock"); + goal.create("Close the runtime goal loop".to_string(), None); + } + + engine.refresh_system_prompt(AppMode::Agent); + let prompt = match engine.session.system_prompt { + Some(SystemPrompt::Text(text)) => text, + Some(SystemPrompt::Blocks(blocks)) => blocks + .into_iter() + .map(|block| block.text) + .collect::>() + .join("\n"), + None => panic!("expected system prompt"), + }; + + assert!(prompt.contains("")); + assert!(prompt.contains("Close the runtime goal loop")); } #[test] @@ -904,6 +935,9 @@ fn turn_tool_registry_builder_keeps_plan_mode_read_only_for_files() { assert!(!registry.contains("rlm")); assert!(!registry.contains("fim_edit")); assert!(registry.contains("update_plan")); + assert!(registry.contains("create_goal")); + assert!(registry.contains("get_goal")); + assert!(registry.contains("update_goal")); assert!(registry.contains("task_list")); assert!(registry.contains("task_read")); assert!(registry.contains("handle_read")); @@ -956,6 +990,28 @@ fn parent_turn_registry_includes_recall_archive_for_investigative_modes() { } } +#[test] +fn parent_turn_registry_includes_goal_tools_for_all_modes() { + let (engine, _handle) = Engine::new(EngineConfig::default(), &Config::default()); + + for mode in [AppMode::Plan, AppMode::Agent, AppMode::Yolo] { + let registry = engine + .build_turn_tool_registry_builder( + mode, + engine.config.todos.clone(), + engine.config.plan_state.clone(), + ) + .build(engine.build_tool_context(mode, false)); + + for name in ["create_goal", "get_goal", "update_goal"] { + assert!( + registry.contains(name), + "parent {mode:?} registry should expose {name}" + ); + } + } +} + #[test] fn agent_mode_can_build_auto_approved_tool_context() { let (engine, _handle) = Engine::new(EngineConfig::default(), &Config::default()); diff --git a/crates/tui/src/core/engine/tool_setup.rs b/crates/tui/src/core/engine/tool_setup.rs index 2354d6a8..99c5b707 100644 --- a/crates/tui/src/core/engine/tool_setup.rs +++ b/crates/tui/src/core/engine/tool_setup.rs @@ -52,11 +52,13 @@ impl Engine { .with_runtime_read_only_task_tools() .with_todo_tool(todo_list) .with_plan_tool(plan_state) + .with_goal_tools(self.config.goal_state.clone()) } else { ToolRegistryBuilder::new() .with_agent_tools(self.session.allow_shell) .with_todo_tool(todo_list) .with_plan_tool(plan_state) + .with_goal_tools(self.config.goal_state.clone()) }; builder = builder diff --git a/crates/tui/src/core/engine/turn_loop.rs b/crates/tui/src/core/engine/turn_loop.rs index 1a1a9104..c0003910 100644 --- a/crates/tui/src/core/engine/turn_loop.rs +++ b/crates/tui/src/core/engine/turn_loop.rs @@ -39,6 +39,7 @@ impl Engine { } let mut active_tool_names = initial_active_tools(&tool_catalog); let mut loop_guard = LoopGuard::default(); + let mut goal_continuations_this_turn = 0u32; // Transparent stream-retry counter: when the chunked-transfer // connection dies mid-stream and we got nothing useful out of it @@ -1116,6 +1117,21 @@ impl Engine { continue; } + if let Some(continuation) = self + .goal_continuation_message_if_needed( + tool_registry, + &mut goal_continuations_this_turn, + ) + .await + { + self.add_session_message( + self.user_text_message_with_turn_metadata(continuation), + ) + .await; + turn.next_step(); + continue; + } + if thinking_only_no_sendable { let holding_for_subagents = { let running = { @@ -2006,6 +2022,55 @@ impl Engine { (TurnOutcomeStatus::Completed, None) } + async fn goal_continuation_message_if_needed( + &self, + tool_registry: Option<&crate::tools::ToolRegistry>, + continuations_this_turn: &mut u32, + ) -> Option { + let registry = tool_registry?; + if !registry.contains("update_goal") { + return None; + } + + let snapshot = match self.config.goal_state.lock() { + Ok(state) => state.snapshot(), + Err(err) => { + tracing::warn!("goal state lock poisoned during continuation check: {err}"); + return None; + } + }; + + if !snapshot.is_active() { + return None; + } + + let max = crate::tools::goal::MAX_GOAL_CONTINUATIONS_PER_TURN; + if *continuations_this_turn >= max { + let _ = self + .tx_event + .send(Event::status(format!( + "Goal remains active after {max} continuation pass(es); ending turn to avoid a runaway loop." + ))) + .await; + return None; + } + + *continuations_this_turn = (*continuations_this_turn).saturating_add(1); + let _ = self + .tx_event + .send(Event::status(format!( + "Continuing active goal audit ({}/{max})", + *continuations_this_turn + ))) + .await; + + Some(crate::tools::goal::render_continuation_prompt( + &snapshot, + *continuations_this_turn, + max, + )) + } + pub(super) fn messages_with_turn_metadata(&self) -> Vec { // `` is stored on user-text messages when the message is // appended. Do not rewrite historical messages at request time: doing diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs index 4a90d443..f2ab6af6 100644 --- a/crates/tui/src/main.rs +++ b/crates/tui/src/main.rs @@ -5185,6 +5185,7 @@ async fn run_exec_agent( capacity: crate::core::capacity::CapacityControllerConfig::from_app_config(config), todos: new_shared_todo_list(), plan_state: new_shared_plan_state(), + goal_state: crate::tools::goal::new_shared_goal_state(), max_spawn_depth: crate::tools::subagent::DEFAULT_MAX_SPAWN_DEPTH, network_policy, snapshots_enabled: config.snapshots_config().enabled, diff --git a/crates/tui/src/prompts.rs b/crates/tui/src/prompts.rs index 5584df68..a12b08ba 100644 --- a/crates/tui/src/prompts.rs +++ b/crates/tui/src/prompts.rs @@ -400,6 +400,10 @@ pub const NEVER_APPROVAL: &str = include_str!("prompts/approvals/never.md"); /// model knows the format to use when writing `.deepseek/handoff.md`. pub const COMPACT_TEMPLATE: &str = include_str!("prompts/compact.md"); +/// Goal continuation audit template — injected by the engine when a runtime +/// goal is active and the assistant tries to end a turn without closing it. +pub const GOAL_CONTINUATION_PROMPT: &str = include_str!("prompts/continuation.md"); + /// Memory hygiene guidance — appended to the system prompt only when the /// session has a non-empty user-memory block. Steers the model toward /// writing durable memories as declarative facts ("User prefers concise diff --git a/crates/tui/src/prompts/continuation.md b/crates/tui/src/prompts/continuation.md new file mode 100644 index 00000000..492cb1a6 --- /dev/null +++ b/crates/tui/src/prompts/continuation.md @@ -0,0 +1,19 @@ +## Goal Continuation + +You are working toward an active session goal. Your task now is to make concrete +progress toward the objective and audit whether the full goal is complete. + +Completion is unproven until you verify it against current-state evidence: + +1. Derive the concrete requirements from the goal and the latest user + instructions. +2. Inspect authoritative evidence for each requirement: files, command output, + tests, runtime behavior, issue or PR state, rendered artifacts, or other + current sources. +3. Treat uncertain or indirect evidence as not complete. Continue work or gather + stronger evidence. +4. Only when the full objective is satisfied, call `update_goal` with + `status: "complete"` and concise evidence. + +If the goal cannot continue because of a real blocker, call `update_goal` with +`status: "blocked"` and explain the blocker. Otherwise continue making progress. diff --git a/crates/tui/src/runtime_threads.rs b/crates/tui/src/runtime_threads.rs index 78580eaf..25196f04 100644 --- a/crates/tui/src/runtime_threads.rs +++ b/crates/tui/src/runtime_threads.rs @@ -1958,6 +1958,7 @@ impl RuntimeThreadManager { ), todos: new_shared_todo_list(), plan_state: new_shared_plan_state(), + goal_state: crate::tools::goal::new_shared_goal_state(), max_spawn_depth: crate::tools::subagent::DEFAULT_MAX_SPAWN_DEPTH, network_policy, snapshots_enabled: self.config.snapshots_config().enabled, diff --git a/crates/tui/src/tools/goal.rs b/crates/tui/src/tools/goal.rs new file mode 100644 index 00000000..5ce3c4d1 --- /dev/null +++ b/crates/tui/src/tools/goal.rs @@ -0,0 +1,559 @@ +//! Goal tools for the model-visible LLM-as-judge loop. +//! +//! The TUI already has a `/goal` command and passes its objective into the +//! engine prompt. This module keeps the runtime slice separate: a small +//! session-scoped state object plus tools the model can use to inspect and +//! close out that state. + +use std::sync::{Arc, Mutex}; +use std::time::Instant; + +use async_trait::async_trait; +use serde::Serialize; +use serde_json::{Value, json}; + +use crate::tools::spec::{ + ApprovalRequirement, ToolCapability, ToolContext, ToolError, ToolResult, ToolSpec, required_str, +}; + +/// Maximum number of automatic goal-continuation prompt injections in one +/// engine turn. This prevents a missing `update_goal` call from becoming an +/// unbounded local loop. +pub const MAX_GOAL_CONTINUATIONS_PER_TURN: u32 = 3; + +/// Shared reference to the current runtime goal. +pub type SharedGoalState = Arc>; + +/// Create an empty shared goal state. +#[must_use] +pub fn new_shared_goal_state() -> SharedGoalState { + Arc::new(Mutex::new(GoalState::default())) +} + +/// Create shared state seeded from the existing `/goal` surface. +#[must_use] +pub fn new_shared_goal_state_from_host( + objective: Option, + token_budget: Option, + completed: bool, +) -> SharedGoalState { + let mut state = GoalState::default(); + state.sync_from_host(objective.as_deref(), token_budget, completed); + Arc::new(Mutex::new(state)) +} + +/// Runtime status for a goal. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum GoalStatus { + Active, + Complete, + Blocked, +} + +impl GoalStatus { + #[must_use] + pub fn as_str(self) -> &'static str { + match self { + Self::Active => "active", + Self::Complete => "complete", + Self::Blocked => "blocked", + } + } +} + +/// Session-local goal state. `Instant` stays runtime-only; snapshots expose +/// elapsed seconds so tool output remains serializable and stable. +#[derive(Debug, Clone, Default)] +pub struct GoalState { + objective: Option, + token_budget: Option, + status: Option, + started_at: Option, + finished_at: Option, + evidence: Option, + blocker: Option, +} + +impl GoalState { + #[must_use] + pub fn objective(&self) -> Option<&str> { + self.objective.as_deref() + } + + #[must_use] + pub fn is_active(&self) -> bool { + self.objective.is_some() && self.status == Some(GoalStatus::Active) + } + + pub fn sync_from_host( + &mut self, + objective: Option<&str>, + token_budget: Option, + completed: bool, + ) { + let objective = objective.map(str::trim).filter(|value| !value.is_empty()); + match objective { + Some(objective) => { + let changed = self.objective.as_deref() != Some(objective); + if changed { + self.objective = Some(objective.to_string()); + self.token_budget = token_budget; + self.started_at = Some(Instant::now()); + self.evidence = None; + self.blocker = None; + } else if token_budget.is_some() { + self.token_budget = token_budget; + } + + if changed || self.status.is_none() { + self.status = Some(if completed { + GoalStatus::Complete + } else { + GoalStatus::Active + }); + self.finished_at = completed.then(Instant::now); + } + } + None => self.clear(), + } + } + + pub fn create(&mut self, objective: String, token_budget: Option) { + self.objective = Some(objective); + self.token_budget = token_budget; + self.status = Some(GoalStatus::Active); + self.started_at = Some(Instant::now()); + self.finished_at = None; + self.evidence = None; + self.blocker = None; + } + + pub fn resume(&mut self, objective: Option) -> Result<(), &'static str> { + if let Some(objective) = objective { + self.create(objective, self.token_budget); + return Ok(()); + } + if self.objective.is_none() { + return Err("No goal exists to resume."); + } + self.status = Some(GoalStatus::Active); + self.finished_at = None; + self.evidence = None; + self.blocker = None; + Ok(()) + } + + pub fn mark_complete(&mut self, evidence: String) -> Result<(), &'static str> { + if self.objective.is_none() { + return Err("No active goal exists to complete."); + } + self.status = Some(GoalStatus::Complete); + self.finished_at = Some(Instant::now()); + self.evidence = Some(evidence); + self.blocker = None; + Ok(()) + } + + pub fn mark_blocked(&mut self, blocker: String) -> Result<(), &'static str> { + if self.objective.is_none() { + return Err("No active goal exists to block."); + } + self.status = Some(GoalStatus::Blocked); + self.finished_at = Some(Instant::now()); + self.blocker = Some(blocker); + Ok(()) + } + + pub fn clear(&mut self) { + *self = Self::default(); + } + + #[must_use] + pub fn snapshot(&self) -> GoalSnapshot { + GoalSnapshot { + objective: self.objective.clone(), + status: self + .status + .map(GoalStatus::as_str) + .unwrap_or("none") + .to_string(), + token_budget: self.token_budget, + elapsed_seconds: self.started_at.map(|started| started.elapsed().as_secs()), + evidence: self.evidence.clone(), + blocker: self.blocker.clone(), + } + } +} + +/// Serializable tool output and prompt input for the current goal. +#[derive(Debug, Clone, Serialize, PartialEq, Eq)] +pub struct GoalSnapshot { + pub objective: Option, + pub status: String, + pub token_budget: Option, + pub elapsed_seconds: Option, + pub evidence: Option, + pub blocker: Option, +} + +impl GoalSnapshot { + #[must_use] + pub fn is_active(&self) -> bool { + self.objective.is_some() && self.status == GoalStatus::Active.as_str() + } +} + +/// Render the bounded continuation prompt injected when a goal is still active +/// after an assistant message has no tool calls. +#[must_use] +pub fn render_continuation_prompt( + snapshot: &GoalSnapshot, + continuation_index: u32, + max_continuations: u32, +) -> String { + let goal_json = serde_json::to_string_pretty(snapshot).unwrap_or_else(|_| "{}".to_string()); + format!( + "{}\n\n## Active Goal State\n\n```json\n{}\n```\n\nContinuation pass: {}/{}.\nIf the goal is complete, call `update_goal` with `status: \"complete\"` and concrete evidence. If it is blocked, call `update_goal` with `status: \"blocked\"` and the blocker. Otherwise continue making progress toward the objective.", + crate::prompts::GOAL_CONTINUATION_PROMPT.trim(), + goal_json, + continuation_index, + max_continuations, + ) +} + +fn lock_goal_state( + state: &SharedGoalState, +) -> Result, ToolError> { + state + .lock() + .map_err(|_| ToolError::execution_failed("goal state lock poisoned")) +} + +fn parse_token_budget(input: &Value) -> Result, ToolError> { + let Some(raw) = input.get("token_budget") else { + return Ok(None); + }; + if raw.is_null() { + return Ok(None); + } + let Some(value) = raw.as_u64() else { + return Err(ToolError::invalid_input( + "token_budget must be a non-negative integer", + )); + }; + u32::try_from(value) + .map(Some) + .map_err(|_| ToolError::invalid_input("token_budget is too large")) +} + +fn json_result(snapshot: &GoalSnapshot) -> Result { + ToolResult::json(snapshot).map_err(|err| ToolError::execution_failed(err.to_string())) +} + +pub struct CreateGoalTool { + goal_state: SharedGoalState, +} + +impl CreateGoalTool { + #[must_use] + pub fn new(goal_state: SharedGoalState) -> Self { + Self { goal_state } + } +} + +#[async_trait] +impl ToolSpec for CreateGoalTool { + fn name(&self) -> &'static str { + "create_goal" + } + + fn description(&self) -> &'static str { + "Create or replace the current runtime goal. Use this when the user asks for a persistent goal that should be audited before the turn is allowed to finish." + } + + fn input_schema(&self) -> Value { + json!({ + "type": "object", + "properties": { + "objective": { + "type": "string", + "description": "The full objective to pursue. Keep the complete user goal, not a shortened one-turn version." + }, + "token_budget": { + "type": "integer", + "minimum": 0, + "description": "Optional soft token budget for the goal." + } + }, + "required": ["objective"], + "additionalProperties": false + }) + } + + fn capabilities(&self) -> Vec { + Vec::new() + } + + fn approval_requirement(&self) -> ApprovalRequirement { + ApprovalRequirement::Auto + } + + async fn execute(&self, input: Value, _context: &ToolContext) -> Result { + let objective = required_str(&input, "objective")?.trim().to_string(); + if objective.is_empty() { + return Err(ToolError::invalid_input("objective cannot be empty")); + } + let token_budget = parse_token_budget(&input)?; + let snapshot = { + let mut state = lock_goal_state(&self.goal_state)?; + state.create(objective, token_budget); + state.snapshot() + }; + json_result(&snapshot) + } +} + +pub struct GetGoalTool { + goal_state: SharedGoalState, +} + +impl GetGoalTool { + #[must_use] + pub fn new(goal_state: SharedGoalState) -> Self { + Self { goal_state } + } +} + +#[async_trait] +impl ToolSpec for GetGoalTool { + fn name(&self) -> &'static str { + "get_goal" + } + + fn description(&self) -> &'static str { + "Inspect the current runtime goal state, including objective, status, token budget, elapsed time, evidence, and blocker." + } + + fn input_schema(&self) -> Value { + json!({ + "type": "object", + "properties": {}, + "additionalProperties": false + }) + } + + fn capabilities(&self) -> Vec { + vec![ToolCapability::ReadOnly] + } + + fn approval_requirement(&self) -> ApprovalRequirement { + ApprovalRequirement::Auto + } + + fn supports_parallel(&self) -> bool { + true + } + + async fn execute( + &self, + _input: Value, + _context: &ToolContext, + ) -> Result { + let snapshot = { + let state = lock_goal_state(&self.goal_state)?; + state.snapshot() + }; + json_result(&snapshot) + } +} + +pub struct UpdateGoalTool { + goal_state: SharedGoalState, +} + +impl UpdateGoalTool { + #[must_use] + pub fn new(goal_state: SharedGoalState) -> Self { + Self { goal_state } + } +} + +#[async_trait] +impl ToolSpec for UpdateGoalTool { + fn name(&self) -> &'static str { + "update_goal" + } + + fn description(&self) -> &'static str { + "Update the runtime goal. This is the LLM-as-judge completion gate: only mark complete when the objective has been verified against concrete current-state evidence." + } + + fn input_schema(&self) -> Value { + json!({ + "type": "object", + "properties": { + "status": { + "type": "string", + "enum": ["active", "complete", "blocked"], + "description": "Use complete only when the goal is fully satisfied; blocked when meaningful progress cannot continue; active to resume or revise the objective." + }, + "evidence": { + "type": "string", + "description": "Required when status is complete. Briefly cite the proof that the goal is done." + }, + "blocker": { + "type": "string", + "description": "Required when status is blocked. Explain the condition preventing progress." + }, + "objective": { + "type": "string", + "description": "Optional replacement objective when status is active." + } + }, + "required": ["status"], + "additionalProperties": false + }) + } + + fn capabilities(&self) -> Vec { + Vec::new() + } + + fn approval_requirement(&self) -> ApprovalRequirement { + ApprovalRequirement::Auto + } + + async fn execute(&self, input: Value, _context: &ToolContext) -> Result { + let status = required_str(&input, "status")?.trim().to_ascii_lowercase(); + let snapshot = { + let mut state = lock_goal_state(&self.goal_state)?; + match status.as_str() { + "complete" => { + let evidence = input + .get("evidence") + .and_then(Value::as_str) + .map(str::trim) + .unwrap_or_default() + .to_string(); + if evidence.is_empty() { + return Err(ToolError::invalid_input( + "evidence is required when status is complete", + )); + } + state + .mark_complete(evidence) + .map_err(ToolError::invalid_input)?; + } + "blocked" => { + let blocker = input + .get("blocker") + .and_then(Value::as_str) + .map(str::trim) + .unwrap_or_default() + .to_string(); + if blocker.is_empty() { + return Err(ToolError::invalid_input( + "blocker is required when status is blocked", + )); + } + state + .mark_blocked(blocker) + .map_err(ToolError::invalid_input)?; + } + "active" => { + let objective = input + .get("objective") + .and_then(Value::as_str) + .map(str::trim) + .filter(|value| !value.is_empty()) + .map(str::to_string); + state.resume(objective).map_err(ToolError::invalid_input)?; + } + other => { + return Err(ToolError::invalid_input(format!( + "unsupported goal status '{other}'" + ))); + } + } + state.snapshot() + }; + json_result(&snapshot) + } +} + +#[cfg(test)] +mod tests { + use serde_json::json; + + use super::*; + + #[tokio::test] + async fn create_get_and_complete_goal() { + let state = new_shared_goal_state(); + let ctx = ToolContext::new("."); + + let create = CreateGoalTool::new(state.clone()); + let created = create + .execute( + json!({ + "objective": "ship the runtime slice", + "token_budget": 1200 + }), + &ctx, + ) + .await + .expect("create goal"); + assert!(created.success); + assert!(created.content.contains("\"status\": \"active\"")); + + let get = GetGoalTool::new(state.clone()); + let current = get.execute(json!({}), &ctx).await.expect("get goal"); + assert!(current.content.contains("ship the runtime slice")); + assert!(current.content.contains("\"token_budget\": 1200")); + + let update = UpdateGoalTool::new(state.clone()); + let completed = update + .execute( + json!({ + "status": "complete", + "evidence": "focused tests passed" + }), + &ctx, + ) + .await + .expect("complete goal"); + assert!(completed.content.contains("\"status\": \"complete\"")); + assert!(completed.content.contains("focused tests passed")); + assert!(!state.lock().expect("goal lock").is_active()); + } + + #[tokio::test] + async fn update_goal_requires_completion_evidence() { + let state = + new_shared_goal_state_from_host(Some("prove completion".to_string()), None, false); + let update = UpdateGoalTool::new(state); + let err = update + .execute(json!({"status": "complete"}), &ToolContext::new(".")) + .await + .expect_err("missing evidence should fail"); + + assert!(err.to_string().contains("evidence is required")); + } + + #[test] + fn continuation_prompt_includes_bound_and_goal_state() { + let snapshot = GoalSnapshot { + objective: Some("finish issue 2199".to_string()), + status: "active".to_string(), + token_budget: None, + elapsed_seconds: Some(5), + evidence: None, + blocker: None, + }; + + let prompt = render_continuation_prompt(&snapshot, 2, 3); + assert!(prompt.contains("Goal Continuation")); + assert!(prompt.contains("finish issue 2199")); + assert!(prompt.contains("Continuation pass: 2/3")); + } +} diff --git a/crates/tui/src/tools/mod.rs b/crates/tui/src/tools/mod.rs index aea1cc5f..e5427065 100644 --- a/crates/tui/src/tools/mod.rs +++ b/crates/tui/src/tools/mod.rs @@ -24,6 +24,7 @@ pub mod fim; pub mod git; pub mod git_history; pub mod github; +pub mod goal; pub mod handle; pub mod image_ocr; pub mod js_execution; diff --git a/crates/tui/src/tools/registry.rs b/crates/tui/src/tools/registry.rs index 5254de70..5a437abf 100644 --- a/crates/tui/src/tools/registry.rs +++ b/crates/tui/src/tools/registry.rs @@ -844,6 +844,15 @@ impl ToolRegistryBuilder { self.with_tool(Arc::new(UpdatePlanTool::new(plan_state))) } + /// Include runtime goal tools (`create_goal`, `get_goal`, `update_goal`). + #[must_use] + pub fn with_goal_tools(self, goal_state: super::goal::SharedGoalState) -> Self { + use super::goal::{CreateGoalTool, GetGoalTool, UpdateGoalTool}; + self.with_tool(Arc::new(CreateGoalTool::new(goal_state.clone()))) + .with_tool(Arc::new(GetGoalTool::new(goal_state.clone()))) + .with_tool(Arc::new(UpdateGoalTool::new(goal_state))) + } + /// Include sub-agent management tools. #[must_use] pub fn with_subagent_tools( diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs index db3d6c8f..17be9a0e 100644 --- a/crates/tui/src/tui/ui.rs +++ b/crates/tui/src/tui/ui.rs @@ -701,6 +701,11 @@ fn build_engine_config(app: &App, config: &Config) -> EngineConfig { capacity: crate::core::capacity::CapacityControllerConfig::from_app_config(config), todos: app.todos.clone(), plan_state: app.plan_state.clone(), + goal_state: crate::tools::goal::new_shared_goal_state_from_host( + app.goal.goal_objective.clone(), + app.goal.goal_token_budget, + app.goal.goal_completed, + ), max_spawn_depth: crate::tools::subagent::DEFAULT_MAX_SPAWN_DEPTH, network_policy: config.network.clone().map(|toml_cfg| { crate::network_policy::NetworkPolicyDecider::with_default_audit(toml_cfg.into_runtime())