diff --git a/.github/ISSUE_TEMPLATE/agent-task.yml b/.github/ISSUE_TEMPLATE/agent-task.yml new file mode 100644 index 00000000..26311028 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/agent-task.yml @@ -0,0 +1,91 @@ +name: Agent task +description: Create a self-contained task that a headless agent (DeepSeek V4, remote droplet) can execute end-to-end without human context. +title: "v0.8.58: " +labels: ["agent-ready", "v0.8.58"] +body: + - type: markdown + attributes: + value: | + ## Instructions for authors + + This issue will be executed by an autonomous agent running `codewhale exec --auto` + on a headless VM. The body must be **self-sufficient** — every file path, command, + and acceptance criterion must be explicit. The agent has: + + - A fresh clone of `Hmbown/CodeWhale` at `main` + - Shell, read, write, and git tools with auto-approvals + - No conversation context — this issue body is all it knows + + Fill every section. Sections marked * are required. + + - type: textarea + id: goal + attributes: + label: "Goal / Why" + description: "What problem does this fix, and why now? (2-4 sentences)" + placeholder: | + e.g. "The TUI freezes when 4+ sub-agents run concurrently because + AgentProgress events trigger a full redraw each. This blocks v0.8.58's + recommended sub-agent fanout." + validations: + required: true + + - type: textarea + id: scope + attributes: + label: "Scope / Plan" + description: "Numbered steps with file paths. Each step is one concrete action." + placeholder: | + 1. crates/tui/src/tui/ui.rs — add throttle in AgentProgress handler (line ~2308) + 2. crates/tui/src/tui/app.rs — add `last_agent_progress_redraw` field + 3. cargo test -p codewhale-tui — verify no regressions + validations: + required: true + + - type: textarea + id: key-files + attributes: + label: "Key files" + description: "One file path per line. The agent will read these first." + placeholder: | + crates/tui/src/tui/ui.rs + crates/tui/src/tui/sidebar.rs + crates/tui/src/tui/app.rs + validations: + required: true + + - type: textarea + id: acceptance-criteria + attributes: + label: "Acceptance criteria" + description: "Behavior-level checkboxes. Every item must be testable." + placeholder: | + - [ ] 4 concurrent sub-agents do not freeze TUI input + - [ ] Ctrl+C works during sub-agent activity + - [ ] Sidebar updates throttle under load + validations: + required: true + + - type: textarea + id: verification + attributes: + label: "Verification" + description: "Exact shell commands the agent must run to prove the fix works." + placeholder: | + cargo check -p codewhale-tui + cargo test -p codewhale-tui -- subagent + cargo clippy -p codewhale-tui -- -D warnings + validations: + required: true + + - type: textarea + id: out-of-scope + attributes: + label: "Out of scope" + description: "What this issue does NOT change. Prevents scope creep." + placeholder: | + - Changing the sub-agent execution model + - Reducing the recommended fanout count + - Network-level optimizations + validations: + required: true diff --git a/.gitignore b/.gitignore index f577bb64..ab1cf906 100644 --- a/.gitignore +++ b/.gitignore @@ -80,6 +80,7 @@ npm/*/bin/downloads/ apps/ # Claude Code runtime artifacts +.claude/settings.json .claude/scheduled_tasks.lock .claude/worktrees/ .worktrees/ diff --git a/crates/tui/src/client/responses.rs b/crates/tui/src/client/responses.rs index 7a01435f..5c3b80a1 100644 --- a/crates/tui/src/client/responses.rs +++ b/crates/tui/src/client/responses.rs @@ -323,13 +323,8 @@ impl DeepSeekClient { .and_then(|s| s.as_str()) .unwrap_or("completed"); let stop_reason = match status { - "completed" => { - if saw_tool_call { - "tool_use" - } else { - "end_turn" - } - } + "completed" if saw_tool_call => "tool_use", + "completed" => "end_turn", "incomplete" => "max_tokens", _ => "end_turn", }; diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index 6f89fe19..07187c02 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -325,6 +325,9 @@ pub struct EngineConfig { /// Tool restriction from custom slash command frontmatter. /// `None` means the current turn may use the normal tool set. pub allowed_tools: Option>, + /// Tool deny-list. Deny always wins over allow (#3027). + /// `None` means no tools are explicitly denied. + pub disallowed_tools: Option>, /// Hook executor for control-plane hooks. /// `ToolCallBefore` hooks may deny a tool call with exit code 2. pub hook_executor: Option>, @@ -409,6 +412,7 @@ impl Default for EngineConfig { strict_tool_mode: false, goal_objective: None, allowed_tools: None, + disallowed_tools: None, hook_executor: None, locale_tag: "en".to_string(), workshop: None, @@ -1302,7 +1306,7 @@ impl Engine { } else if messages.is_empty() && system_prompt.is_none() { self.session.id = uuid::Uuid::new_v4().to_string(); } - self.session.messages = messages; + self.session.messages = messages.into(); self.session.compaction_summary_prompt = extract_compaction_summary_prompt(system_prompt.clone()); self.session.system_prompt = system_prompt; @@ -1351,7 +1355,7 @@ impl Engine { } } if let Some(idx) = cut { - self.session.messages.truncate(idx); + self.session.messages.truncate_to(idx); self.session.bump_messages_revision(); } // Now dispatch the new message as a normal send, @@ -1398,7 +1402,7 @@ impl Engine { .tx_event .send(Event::SessionUpdated { session_id: self.session.id.clone(), - messages: self.session.messages.clone(), + messages: self.session.messages.clone().into(), system_prompt: self.session.system_prompt.clone(), model: self.session.model.clone(), workspace: self.session.workspace.clone(), @@ -1809,6 +1813,11 @@ impl Engine { tool.defer_loading = Some(false); } } + filter_tool_catalog_for_gates( + &mut catalog, + self.config.allowed_tools.as_deref(), + self.config.disallowed_tools.as_deref(), + ); catalog }); let tool_catalog_for_event = tools.clone(); @@ -1938,7 +1947,7 @@ impl Engine { Ok(result) => { if !result.messages.is_empty() || self.session.messages.is_empty() { let messages_after = result.messages.len(); - self.session.messages = result.messages; + self.session.messages = result.messages.into(); self.merge_compaction_summary(result.summary_prompt); self.emit_session_updated().await; let removed = messages_before.saturating_sub(messages_after); @@ -2034,7 +2043,7 @@ impl Engine { { Ok(result) => { let messages_after = result.messages.len(); - self.session.messages = result.messages; + self.session.messages = result.messages.into(); self.emit_session_updated().await; let summary = format!( @@ -2088,7 +2097,7 @@ impl Engine { while self.session.messages.len() > MIN_RECENT_MESSAGES_TO_KEEP && self.estimated_input_tokens() > target_input_budget { - self.session.messages.remove(0); + self.session.messages.trim_front(1); self.session.bump_messages_revision(); removed = removed.saturating_add(1); } @@ -2110,7 +2119,7 @@ impl Engine { let mut retries_used = 0u32; let mut summary_prompt = None; - let mut compacted_messages = self.session.messages.clone(); + let mut compacted_messages: Vec = self.session.messages.clone().into(); let mut forced_config = self.config.compaction.clone(); forced_config.enabled = true; @@ -2145,7 +2154,7 @@ impl Engine { } if !compacted_messages.is_empty() || self.session.messages.is_empty() { - self.session.messages = compacted_messages; + self.session.messages = compacted_messages.into(); } self.merge_compaction_summary(summary_prompt); @@ -2219,7 +2228,7 @@ impl Engine { self.session.model.clone(), self.session.workspace.clone(), self.session.system_prompt.clone(), - self.session.messages.clone(), + self.session.messages.clone().into(), )) .with_cancel_token(self.cancel_token.clone()) .with_trusted_external_paths(trusted_external_paths); @@ -2782,6 +2791,19 @@ pub(crate) fn default_active_native_tool_names() -> &'static [&'static str] { tool_catalog::DEFAULT_ACTIVE_NATIVE_TOOLS } +/// Drop catalog entries the execution gates would reject (#3027): the model +/// should never be advertised a tool it cannot call. Deny wins over allow. +fn filter_tool_catalog_for_gates( + catalog: &mut Vec, + allowed_tools: Option<&[String]>, + disallowed_tools: Option<&[String]>, +) { + catalog.retain(|tool| { + !turn_loop::command_denies_tool(disallowed_tools, &tool.name) + && turn_loop::command_allows_tool(allowed_tools, &tool.name) + }); +} + use self::approval::{ApprovalDecision, ApprovalResult, UserInputDecision}; #[cfg(test)] use self::dispatch::should_parallelize_tool_batch; diff --git a/crates/tui/src/core/engine/capacity_flow.rs b/crates/tui/src/core/engine/capacity_flow.rs index 514ad120..3385f1e7 100644 --- a/crates/tui/src/core/engine/capacity_flow.rs +++ b/crates/tui/src/core/engine/capacity_flow.rs @@ -415,7 +415,7 @@ impl Engine { { Ok(result) => { if !result.messages.is_empty() || self.session.messages.is_empty() { - self.session.messages = result.messages; + self.session.messages = result.messages.into(); self.merge_compaction_summary(result.summary_prompt); refreshed = true; } diff --git a/crates/tui/src/core/engine/dispatch.rs b/crates/tui/src/core/engine/dispatch.rs index 335639c4..1595507d 100644 --- a/crates/tui/src/core/engine/dispatch.rs +++ b/crates/tui/src/core/engine/dispatch.rs @@ -99,6 +99,15 @@ pub(super) fn caller_allowed_for_tool( requested == "direct" } +/// Whole-word check for "mode"/"modes" — a plain `contains("mode")` also +/// matched "model", letting provider model errors skip the actionable-hint +/// suffix (#3020). +fn mentions_mode_word(lower: &str) -> bool { + lower + .split(|ch: char| !ch.is_ascii_alphanumeric()) + .any(|word| word == "mode" || word == "modes") +} + pub(super) fn format_tool_error(err: &ToolError, tool_name: &str) -> String { match err { ToolError::InvalidInput { message } => { @@ -117,7 +126,16 @@ pub(super) fn format_tool_error(err: &ToolError, tool_name: &str) -> String { ), ToolError::NotAvailable { message } => { let lower = message.to_ascii_lowercase(); - if lower.contains("current tool catalog") || lower.contains("did you mean:") { + // #3020: Pass through self-explanatory messages that already name the + // cause (mode switch, allow_shell, feature flag). Avoids appending a + // conflicting "Check mode, feature flags" suffix on top of + // "switch to Agent, Goal, or YOLO mode" which confuses the model. + if lower.contains("current tool catalog") + || lower.contains("did you mean:") + || mentions_mode_word(&lower) + || lower.contains("allow_shell") + || lower.contains("feature flag") + { message.clone() } else { format!( @@ -125,9 +143,20 @@ pub(super) fn format_tool_error(err: &ToolError, tool_name: &str) -> String { ) } } - ToolError::PermissionDenied { message } => format!( - "Tool '{tool_name}' was denied: {message}. Adjust approval mode or request permission." - ), + ToolError::PermissionDenied { message } => { + let lower = message.to_ascii_lowercase(); + // #3020: Pass through messages that already name the denial cause. + if mentions_mode_word(&lower) + || lower.contains("allow_shell") + || lower.contains("denied by user") + { + message.clone() + } else { + format!( + "Tool '{tool_name}' was denied: {message}. Adjust approval mode or request permission." + ) + } + } } } diff --git a/crates/tui/src/core/engine/tests.rs b/crates/tui/src/core/engine/tests.rs index 212042c4..859c4525 100644 --- a/crates/tui/src/core/engine/tests.rs +++ b/crates/tui/src/core/engine/tests.rs @@ -85,6 +85,45 @@ fn build_engine_with_capacity(capacity: CapacityControllerConfig) -> Engine { engine } +fn catalog_tool(name: &str) -> Tool { + Tool { + tool_type: None, + name: name.to_string(), + description: String::new(), + input_schema: json!({"type": "object"}), + allowed_callers: None, + defer_loading: None, + input_examples: None, + strict: None, + cache_control: None, + } +} + +#[test] +fn tool_catalog_filter_applies_allow_and_deny_gates() { + // #3027 AC1: the advertised catalog must not contain tools the execution + // gates would deny; deny wins over allow. + let mut catalog = vec![ + catalog_tool("read_file"), + catalog_tool("exec_shell"), + catalog_tool("grep_files"), + ]; + filter_tool_catalog_for_gates( + &mut catalog, + Some(&["read_file".to_string(), "exec_shell".to_string()][..]), + Some(&["exec_shell".to_string()][..]), + ); + let names: Vec<&str> = catalog.iter().map(|t| t.name.as_str()).collect(); + assert_eq!(names, ["read_file"]); +} + +#[test] +fn tool_catalog_filter_is_inert_without_gates() { + let mut catalog = vec![catalog_tool("read_file"), catalog_tool("exec_shell")]; + filter_tool_catalog_for_gates(&mut catalog, None, None); + assert_eq!(catalog.len(), 2); +} + #[test] fn structured_state_block_includes_rich_plan_artifact() { let state = StructuredState { @@ -486,6 +525,33 @@ fn tool_error_messages_include_actionable_hints() { let timeout = ToolError::Timeout { seconds: 5 }; let formatted = format_tool_error(&timeout, "exec_shell"); assert!(formatted.contains("timed out")); + + // #3020: Plan-mode denials already explain the fix — pass through + // verbatim, with no conflicting "Adjust approval mode" suffix. + let plan_denied = ToolError::permission_denied( + "'exec_shell' is not available in Plan mode — switch to Agent, Goal, or YOLO mode to run commands and code.", + ); + let formatted = format_tool_error(&plan_denied, "exec_shell"); + assert_eq!( + formatted, + "'exec_shell' is not available in Plan mode — switch to Agent, Goal, or YOLO mode to run commands and code." + ); + + // Bare denials still get the actionable suffix. + let bare_denied = ToolError::permission_denied("nope"); + let formatted = format_tool_error(&bare_denied, "exec_shell"); + assert!( + formatted.contains("Adjust approval mode or request permission"), + "{formatted}" + ); + + // "model" must not satisfy the "mode" pass-through check. + let model_denied = ToolError::permission_denied("requested model is not allowed"); + let formatted = format_tool_error(&model_denied, "agent_open"); + assert!( + formatted.contains("Adjust approval mode or request permission"), + "{formatted}" + ); } #[test] @@ -1799,12 +1865,13 @@ fn runtime_prompt_is_projected_without_persisting_to_session_messages() { text: "summary after compaction".to_string(), cache_control: None, }], - }]; + }] + .into(); let stored = engine.session.messages.clone(); let request_messages = engine.messages_with_turn_metadata(); - assert_eq!(engine.session.messages, stored); + assert_eq!(&*engine.session.messages, &*stored); assert_eq!(request_messages.len(), stored.len() + 1); assert!( request_messages @@ -2488,7 +2555,7 @@ fn messages_with_turn_metadata_preserves_stored_messages_for_prefix_cache() { let first_request = engine.messages_with_turn_metadata(); assert_eq!( &first_request[..engine.session.messages.len()], - engine.session.messages.as_slice() + &engine.session.messages[..] ); assert_eq!(first_request.len(), engine.session.messages.len() + 1); assert_eq!(first_request.first(), Some(&first_user)); @@ -2514,7 +2581,7 @@ fn messages_with_turn_metadata_preserves_stored_messages_for_prefix_cache() { let second_request = engine.messages_with_turn_metadata(); assert_eq!( &second_request[..engine.session.messages.len()], - engine.session.messages.as_slice() + &engine.session.messages[..] ); assert_eq!(second_request.len(), engine.session.messages.len() + 1); assert_eq!(second_request.first(), Some(&first_user)); diff --git a/crates/tui/src/core/engine/turn_loop.rs b/crates/tui/src/core/engine/turn_loop.rs index 53cdf927..296e937c 100644 --- a/crates/tui/src/core/engine/turn_loop.rs +++ b/crates/tui/src/core/engine/turn_loop.rs @@ -153,7 +153,7 @@ impl Engine { // Only update if we got valid messages (never corrupt state) if !result.messages.is_empty() || self.session.messages.is_empty() { let auto_messages_after = result.messages.len(); - self.session.messages = result.messages; + self.session.messages = result.messages.into(); self.merge_compaction_summary(result.summary_prompt); self.emit_session_updated().await; let removed = auto_messages_before.saturating_sub(auto_messages_after); @@ -1332,11 +1332,15 @@ impl Engine { let active_tools_at_batch_start = active_tool_names.clone(); let mut deferred_tools_hydrated_this_batch: std::collections::HashSet = std::collections::HashSet::new(); + // #3026: `additionalContext` strings from tool_call_before hooks, + // keyed by tool id; appended to the tool result sent to the model. + let mut hook_contexts: std::collections::HashMap = + std::collections::HashMap::new(); let mut plans: Vec = Vec::with_capacity(tool_uses.len()); for (index, tool) in tool_uses.iter_mut().enumerate() { let tool_id = tool.id.clone(); let mut tool_name = tool.name.clone(); - let tool_input = tool.input.clone(); + let mut tool_input = tool.input.clone(); let tool_caller = tool.caller.clone(); crate::logging::info(format!( "Planning tool '{tool_name}' with input: {tool_input:?}" @@ -1362,6 +1366,10 @@ impl Engine { let mut read_only = false; let mut blocked_error: Option = None; let mut guard_result: Option = None; + // #3026: set by a hook `ask` decision; applied AFTER the + // registry-based approval computation below so it cannot be + // clobbered by it. + let mut hook_requires_approval = false; if mode == AppMode::Plan && matches!( @@ -1380,6 +1388,16 @@ impl Engine { ))); } + // #3027: deny wins over allow — check the deny-list first so a + // tool present in both lists is still blocked. + if blocked_error.is_none() + && command_denies_tool(self.config.disallowed_tools.as_deref(), &tool_name) + { + blocked_error = Some(ToolError::permission_denied(format!( + "Tool '{tool_name}' is in the disallowed-tools list" + ))); + } + if blocked_error.is_none() && !command_allows_tool(self.config.allowed_tools.as_deref(), &tool_name) { @@ -1446,29 +1464,25 @@ impl Engine { tracing::error!("Hook executor task panicked: {join_err}"); Vec::new() }); - if let Some(denial) = hook_results - .iter() - .find(|result| result.exit_code == Some(2)) - { - let reason = denial - .stdout - .trim() - .lines() - .next() - .filter(|line| !line.is_empty()) - .or_else(|| { - denial - .stderr - .trim() - .lines() - .next() - .filter(|line| !line.is_empty()) - }) - .or(denial.error.as_deref()) - .unwrap_or("ToolCallBefore hook denied tool execution"); + // #3026: fold all foreground hook results into one + // decision: deny (exit code 2 or JSON) > ask > allow; + // last `updatedInput` writer wins; `additionalContext` + // strings are concatenated. + let fold = fold_tool_call_before_results(&hook_results); + if let Some(reason) = fold.deny_reason { blocked_error = Some(ToolError::permission_denied(format!( "ToolCallBefore hook denied tool '{tool_name}': {reason}" ))); + } else { + if fold.requires_approval { + hook_requires_approval = true; + } + if let Some(updated) = fold.updated_input { + tool_input = updated; + } + if let Some(context) = fold.additional_context { + hook_contexts.insert(tool_id.clone(), context); + } } } @@ -1504,6 +1518,14 @@ impl Engine { read_only = true; } + // #3026: a hook `ask` decision forces the approval prompt even + // for tools the registry would auto-run. Must stay after the + // registry-based computation above, which assigns rather than + // ORs `approval_required`. + if hook_requires_approval { + approval_required = true; + } + let should_emit_hydration_status = !deferred_tools_hydrated_this_batch.contains(&tool_name); if blocked_error.is_none() @@ -2129,6 +2151,15 @@ impl Engine { .await; } + // #3026: pipe `additionalContext` from tool_call_before + // hooks back to the model alongside the tool result. + let output_for_context = match hook_contexts.get(&outcome.id) { + Some(context) => { + format!("{output_for_context}\n\n[hook context] {context}") + } + None => output_for_context, + }; + self.add_session_message(Message { role: "user".to_string(), content: vec![ContentBlock::ToolResult { @@ -2312,7 +2343,7 @@ impl Engine { // messages. This preserves the stable prefix through all stored // messages while avoiding strict chat templates that only allow // system messages at messages[0]. - let mut messages = self.session.messages.clone(); + let mut messages: Vec = self.session.messages.clone().into(); messages.push(self.runtime_prompt_message()); messages } @@ -2371,13 +2402,97 @@ mod stream_timeout_tests { } } -fn command_allows_tool(allowed_tools: Option<&[String]>, tool_name: &str) -> bool { +pub(super) fn command_allows_tool(allowed_tools: Option<&[String]>, tool_name: &str) -> bool { let Some(allowed_tools) = allowed_tools else { return true; }; allowed_tools.contains(&tool_name.to_ascii_lowercase()) } +/// Folded outcome of all `tool_call_before` hook results for one tool call +/// (#3026). Precedence: deny (exit code 2 or JSON) > ask > allow; +/// `updatedInput` is last-writer-wins; `additionalContext` is concatenated. +#[derive(Debug, Default, PartialEq)] +struct ToolCallHookFold { + /// Denial reason from an exit-code-2 hook or a JSON `deny` decision. + deny_reason: Option, + /// At least one hook returned a JSON `ask` decision. + requires_approval: bool, + /// Replacement tool input from the last hook that supplied one. + updated_input: Option, + /// Concatenated `additionalContext` strings from all hooks. + additional_context: Option, +} + +fn fold_tool_call_before_results(results: &[crate::hooks::HookResult]) -> ToolCallHookFold { + let mut fold = ToolCallHookFold::default(); + + // Legacy hard deny: exit code 2 wins regardless of stdout (backwards + // compatible with pre-#3026 hooks). + if let Some(denial) = results.iter().find(|result| result.exit_code == Some(2)) { + let reason = denial + .stdout + .trim() + .lines() + .next() + .filter(|line| !line.is_empty()) + .or_else(|| { + denial + .stderr + .trim() + .lines() + .next() + .filter(|line| !line.is_empty()) + }) + .or(denial.error.as_deref()) + .unwrap_or("ToolCallBefore hook denied tool execution"); + fold.deny_reason = Some(reason.to_string()); + return fold; + } + + for result in results { + // Background hooks return immediately with no process result and + // cannot steer (the caller warns about that configuration). + if result.exit_code.is_none() { + continue; + } + let parsed = crate::hooks::parse_tool_call_before_stdout(&result.stdout); + match parsed.decision { + Some(crate::hooks::ToolCallDecision::Deny) => { + fold.deny_reason = + Some(parsed.reason.unwrap_or_else(|| { + "ToolCallBefore hook denied tool execution".to_string() + })); + return fold; + } + Some(crate::hooks::ToolCallDecision::Ask) => fold.requires_approval = true, + Some(crate::hooks::ToolCallDecision::Allow) | None => {} + } + if let Some(updated) = parsed.updated_input { + fold.updated_input = Some(updated); + } + if let Some(context) = parsed.additional_context { + match &mut fold.additional_context { + Some(existing) => { + existing.push('\n'); + existing.push_str(&context); + } + None => fold.additional_context = Some(context), + } + } + } + fold +} + +/// Check whether `tool_name` is explicitly denied (#3027). +/// Deny always wins over allow. +pub(super) fn command_denies_tool(disallowed_tools: Option<&[String]>, tool_name: &str) -> bool { + let Some(disallowed_tools) = disallowed_tools else { + return false; + }; + disallowed_tools.contains(&tool_name.to_ascii_lowercase()) +} + fn resolve_tool_definition<'a>( tool_name: &mut String, tool_catalog: &'a [Tool], @@ -2713,6 +2828,36 @@ mod tests { assert!(!command_allows_tool(Some(&allowed), "bash")); } + #[test] + fn disallowed_tools_gate_blocks_listed_tool() { + let disallowed = vec!["exec_shell".to_string()]; + assert!(command_denies_tool(Some(&disallowed), "exec_shell")); + assert!(!command_denies_tool(Some(&disallowed), "read_file")); + } + + #[test] + fn disallowed_tools_gate_blocks_case_insensitively() { + let disallowed = vec!["exec_shell".to_string()]; + assert!(command_denies_tool(Some(&disallowed), "Exec_Shell")); + } + + #[test] + fn disallowed_tools_gate_is_inert_when_not_set() { + assert!(!command_denies_tool(None, "exec_shell")); + let empty: Vec = Vec::new(); + assert!(!command_denies_tool(Some(&empty), "exec_shell")); + } + + #[test] + fn deny_wins_over_allow_for_same_tool() { + // The turn-loop gate chain checks the deny-list before the allow-list, + // so a tool present in both must still be blocked. + let allowed = vec!["exec_shell".to_string()]; + let disallowed = vec!["exec_shell".to_string()]; + assert!(command_allows_tool(Some(&allowed), "exec_shell")); + assert!(command_denies_tool(Some(&disallowed), "exec_shell")); + } + #[test] fn review_regression_allowed_tools_gate_checks_canonical_tool_name() { let tmp = tempfile::tempdir().expect("tempdir"); @@ -2831,4 +2976,149 @@ mod tests { assert_eq!(results[0].exit_code, Some(2)); assert!(results[0].stdout.contains("security")); } + + // ── #3026: JSON decision contract fold ───────────────────────────────── + + fn hook_result(stdout: &str, exit_code: Option) -> crate::hooks::HookResult { + crate::hooks::HookResult { + name: None, + success: exit_code == Some(0), + exit_code, + stdout: stdout.to_string(), + stderr: String::new(), + duration: Duration::from_millis(1), + error: None, + } + } + + #[test] + fn hook_fold_json_deny_blocks_with_reason() { + let fold = fold_tool_call_before_results(&[hook_result( + r#"{"decision":"deny","reason":"nope"}"#, + Some(0), + )]); + assert_eq!(fold.deny_reason.as_deref(), Some("nope")); + assert!(!fold.requires_approval); + } + + #[test] + fn hook_fold_exit_code_2_denies_regardless_of_stdout() { + let fold = + fold_tool_call_before_results(&[hook_result(r#"{"decision":"allow"}"#, Some(2))]); + assert!( + fold.deny_reason.is_some(), + "exit code 2 must hard-deny even when stdout says allow" + ); + } + + #[test] + fn hook_fold_deny_wins_over_ask_and_allow() { + let fold = fold_tool_call_before_results(&[ + hook_result(r#"{"decision":"allow"}"#, Some(0)), + hook_result(r#"{"decision":"ask"}"#, Some(0)), + hook_result(r#"{"decision":"deny","reason":"policy"}"#, Some(0)), + ]); + assert_eq!(fold.deny_reason.as_deref(), Some("policy")); + } + + #[test] + fn hook_fold_ask_requires_approval() { + let fold = fold_tool_call_before_results(&[ + hook_result(r#"{"decision":"allow"}"#, Some(0)), + hook_result(r#"{"decision":"ask"}"#, Some(0)), + ]); + assert!(fold.deny_reason.is_none()); + assert!(fold.requires_approval); + } + + #[test] + fn hook_fold_updated_input_last_writer_wins() { + let fold = fold_tool_call_before_results(&[ + hook_result(r#"{"updatedInput":{"command":"first"}}"#, Some(0)), + hook_result(r#"{"updatedInput":{"command":"second"}}"#, Some(0)), + ]); + assert_eq!( + fold.updated_input, + Some(serde_json::json!({"command":"second"})) + ); + } + + #[test] + fn hook_fold_background_results_cannot_steer() { + // Background hooks return exit_code: None immediately — their stdout + // (if any were captured) must not deny, ask, or rewrite input. + let fold = fold_tool_call_before_results(&[hook_result( + r#"{"decision":"deny","reason":"too late"}"#, + None, + )]); + assert_eq!(fold, ToolCallHookFold::default()); + } + + #[test] + fn hook_fold_concatenates_additional_context() { + let fold = fold_tool_call_before_results(&[ + hook_result(r#"{"additionalContext":"one"}"#, Some(0)), + hook_result(r#"{"additionalContext":"two"}"#, Some(0)), + ]); + assert_eq!(fold.additional_context.as_deref(), Some("one\ntwo")); + } + + #[test] + fn hook_fold_legacy_stdout_is_passthrough() { + let fold = fold_tool_call_before_results(&[ + hook_result("", Some(0)), + hook_result("not json at all", Some(0)), + hook_result(r#"{"status":"fine"}"#, Some(1)), + ]); + assert_eq!(fold, ToolCallHookFold::default()); + } + + #[test] + fn hook_gate_denies_with_json_decision_from_executor() { + use crate::hooks::{Hook, HookContext, HookEvent, HookExecutor, HooksConfig}; + + let deny_cmd = if cfg!(windows) { + r#"echo {"decision":"deny","reason":"blocked by project policy"}"# + } else { + r#"echo '{"decision":"deny","reason":"blocked by project policy"}'"# + }; + let config = HooksConfig { + enabled: true, + hooks: vec![Hook::new(HookEvent::ToolCallBefore, deny_cmd)], + ..HooksConfig::default() + }; + let executor = HookExecutor::new(config, std::path::PathBuf::from(".")); + let ctx = HookContext::new().with_tool_name("exec_shell"); + let results = executor.execute(HookEvent::ToolCallBefore, &ctx); + + let fold = fold_tool_call_before_results(&results); + assert_eq!( + fold.deny_reason.as_deref(), + Some("blocked by project policy"), + "JSON deny with exit code 0 must block: {results:?}" + ); + } + + #[test] + fn hook_gate_ask_forces_approval_from_executor() { + use crate::hooks::{Hook, HookContext, HookEvent, HookExecutor, HooksConfig}; + + let ask_cmd = if cfg!(windows) { + r#"echo {"decision":"ask"}"# + } else { + r#"echo '{"decision":"ask"}'"# + }; + let config = HooksConfig { + enabled: true, + hooks: vec![Hook::new(HookEvent::ToolCallBefore, ask_cmd)], + ..HooksConfig::default() + }; + let executor = HookExecutor::new(config, std::path::PathBuf::from(".")); + let ctx = HookContext::new().with_tool_name("write_file"); + let results = executor.execute(HookEvent::ToolCallBefore, &ctx); + + let fold = fold_tool_call_before_results(&results); + assert!(fold.deny_reason.is_none()); + assert!(fold.requires_approval); + } } diff --git a/crates/tui/src/core/session.rs b/crates/tui/src/core/session.rs index 67ffb7ad..a0cd4d94 100644 --- a/crates/tui/src/core/session.rs +++ b/crates/tui/src/core/session.rs @@ -5,7 +5,7 @@ use crate::models::{Message, SystemPrompt, Usage}; use crate::prefix_cache::PrefixStabilityManager; use crate::project_context::{ProjectContext, load_project_context_with_parents}; -use crate::prompt_zones::FrozenPrefix; +use crate::prompt_zones::{AppendLog, FrozenPrefix}; use crate::tui::approval::ApprovalMode; use crate::working_set::WorkingSet; use std::path::PathBuf; @@ -40,8 +40,8 @@ pub struct Session { /// Persisted summary blocks generated by context compaction. pub compaction_summary_prompt: Option, - /// Conversation history (API format) - pub messages: Vec, + /// Conversation history (API format), backed by AppendLog (#2264). + pub messages: AppendLog, /// Total tokens used in this session pub total_usage: SessionUsage, @@ -145,7 +145,7 @@ impl Session { system_prompt: None, system_prompt_override: false, compaction_summary_prompt: None, - messages: Vec::new(), + messages: AppendLog::new(), total_usage: SessionUsage::default(), allow_shell, trust_mode, @@ -179,7 +179,7 @@ impl Session { /// invalidate atomically. #[allow(dead_code)] pub fn replace_messages(&mut self, messages: Vec) { - self.messages = messages; + self.messages = messages.into(); self.messages_revision = self.messages_revision.saturating_add(1); } diff --git a/crates/tui/src/hooks.rs b/crates/tui/src/hooks.rs index f6d2ddee..7843ed54 100644 --- a/crates/tui/src/hooks.rs +++ b/crates/tui/src/hooks.rs @@ -217,6 +217,30 @@ fn default_enabled() -> bool { } impl HooksConfig { + /// Load global hooks merged with project-local `.codewhale/hooks.toml` (#3026). + /// + /// Project hooks are appended after global hooks. A malformed project file + /// logs a warning and falls back to global-only. + pub fn load_with_project(global: HooksConfig, workspace: &std::path::Path) -> HooksConfig { + let project_path = workspace.join(".codewhale").join("hooks.toml"); + let Ok(contents) = std::fs::read_to_string(&project_path) else { + return global; + }; + let project: HooksConfig = match toml::from_str(&contents) { + Ok(cfg) => cfg, + Err(e) => { + tracing::warn!( + "Failed to parse project hooks at {}: {e}; falling back to global hooks only", + project_path.display() + ); + return global; + } + }; + let mut merged = global; + merged.hooks.extend(project.hooks); + merged + } + /// Get hooks for a specific event pub fn hooks_for_event(&self, event: HookEvent) -> Vec<&Hook> { if !self.enabled { @@ -484,6 +508,90 @@ enum MessageSubmitStdout { Invalid(String), } +/// Parsed stdout from a `tool_call_before` hook (#3026). +/// +/// Hooks may emit a JSON decision on stdout: +/// `{"decision": "allow"|"deny"|"ask", "reason": "...", +/// "updatedInput": {...}, "additionalContext": "..."}` +/// Non-JSON or empty stdout → legacy passthrough (allow). +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ToolCallBeforeStdout { + pub decision: Option, + pub reason: Option, + pub updated_input: Option, + pub additional_context: Option, +} + +/// Decision a hook can return for a tool call. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ToolCallDecision { + Allow, + Deny, + Ask, +} + +pub(crate) fn parse_tool_call_before_stdout(stdout: &str) -> ToolCallBeforeStdout { + let passthrough = ToolCallBeforeStdout { + decision: None, + reason: None, + updated_input: None, + additional_context: None, + }; + let trimmed = stdout.trim(); + if trimmed.is_empty() { + return passthrough; + } + let value: serde_json::Value = match serde_json::from_str(trimmed) { + Ok(v) => v, + // Non-JSON stdout → legacy passthrough (allow). + Err(_) => return passthrough, + }; + let Some(obj) = value.as_object() else { + tracing::warn!( + "tool_call_before hook stdout is JSON but not an object; \ + ignoring it (legacy passthrough)" + ); + return passthrough; + }; + let decision = obj + .get("decision") + .and_then(|v| v.as_str()) + .and_then(|s| match s { + "allow" => Some(ToolCallDecision::Allow), + "deny" => Some(ToolCallDecision::Deny), + "ask" => Some(ToolCallDecision::Ask), + other => { + tracing::warn!( + "tool_call_before hook returned unrecognized decision \ + '{other}' (expected allow|deny|ask); treating as allow" + ); + None + } + }); + let reason = obj + .get("reason") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()); + let updated_input = obj.get("updatedInput").cloned().filter(|v| { + if v.is_object() { + true + } else { + tracing::warn!("tool_call_before hook updatedInput must be a JSON object; ignoring"); + false + } + }); + let additional_context = obj + .get("additionalContext") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()); + ToolCallBeforeStdout { + decision, + reason, + updated_input, + additional_context, + } +} + /// Post-turn accumulated totals included in the `turn_end` observer payload. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct TurnEndTotals { @@ -518,8 +626,11 @@ impl HookExecutor { fn build_shell_command(command: &str) -> Command { #[cfg(windows)] { + use std::os::windows::process::CommandExt as _; let mut cmd = Command::new("cmd"); - cmd.arg("/C").arg(command); + // raw_arg: cmd.exe does not parse the CRT-style \" escapes that + // Command::arg would insert, so pass the command line verbatim. + cmd.arg("/C").raw_arg(command); cmd } #[cfg(not(windows))] @@ -862,13 +973,30 @@ impl HookExecutor { results } + /// Check whether a tool name matches a condition pattern with `*` glob support. + fn tool_name_matches_condition(tool_name: &str, pattern: &str) -> bool { + if !pattern.contains('*') { + return tool_name == pattern; + } + // Escape regex metacharacters except `*`, which becomes `.*`. + let escaped = regex::escape(pattern); + let regex_pattern = escaped.replace(r"\*", ".*"); + let anchored = format!("^{regex_pattern}$"); + regex::Regex::new(&anchored).is_ok_and(|re| re.is_match(tool_name)) + } + /// Check if a hook's condition matches the context #[allow(clippy::only_used_in_recursion)] fn matches_condition(&self, hook: &Hook, context: &HookContext) -> bool { match &hook.condition { None | Some(HookCondition::Always) => true, Some(HookCondition::ToolName { name }) => { - context.tool_name.as_ref().is_some_and(|n| n == name) + // #3026: Support `*` globs in tool_name conditions so + // `mcp__*` matches all MCP tools. Exact names keep working. + context + .tool_name + .as_ref() + .is_some_and(|n| Self::tool_name_matches_condition(n, name)) } Some(HookCondition::ToolCategory { category }) => { // Map tool names to categories @@ -2147,4 +2275,216 @@ exit 7 assert!(!executor.has_hooks_for_event(HookEvent::OnError)); assert!(!executor.has_hooks_for_event(HookEvent::ModeChange)); } + + // ── #3026: tool_call_before stdout decision contract ────────────────── + + #[test] + fn tool_call_before_stdout_parses_deny_with_reason() { + let parsed = + parse_tool_call_before_stdout(r#"{"decision":"deny","reason":"blocked by policy"}"#); + assert_eq!(parsed.decision, Some(ToolCallDecision::Deny)); + assert_eq!(parsed.reason.as_deref(), Some("blocked by policy")); + assert!(parsed.updated_input.is_none()); + assert!(parsed.additional_context.is_none()); + } + + #[test] + fn tool_call_before_stdout_parses_ask_and_allow() { + let ask = parse_tool_call_before_stdout(r#"{"decision":"ask"}"#); + assert_eq!(ask.decision, Some(ToolCallDecision::Ask)); + + let allow = parse_tool_call_before_stdout(r#"{"decision":"allow"}"#); + assert_eq!(allow.decision, Some(ToolCallDecision::Allow)); + } + + #[test] + fn tool_call_before_stdout_parses_updated_input_object() { + let parsed = + parse_tool_call_before_stdout(r#"{"updatedInput":{"command":"ls -la","timeout":5}}"#); + assert!(parsed.decision.is_none()); + assert_eq!( + parsed.updated_input, + Some(serde_json::json!({"command":"ls -la","timeout":5})) + ); + } + + #[test] + fn tool_call_before_stdout_rejects_non_object_updated_input() { + let parsed = parse_tool_call_before_stdout(r#"{"updatedInput":"rm -rf /"}"#); + assert!( + parsed.updated_input.is_none(), + "updatedInput must be a JSON object" + ); + let parsed = parse_tool_call_before_stdout(r#"{"updatedInput":[1,2]}"#); + assert!(parsed.updated_input.is_none()); + } + + #[test] + fn tool_call_before_stdout_parses_additional_context() { + let parsed = + parse_tool_call_before_stdout(r#"{"additionalContext":"remember the style guide"}"#); + assert_eq!( + parsed.additional_context.as_deref(), + Some("remember the style guide") + ); + } + + #[test] + fn tool_call_before_stdout_empty_and_non_json_are_passthrough() { + for stdout in ["", " \n ", "ok, proceeding", "exit code zero"] { + let parsed = parse_tool_call_before_stdout(stdout); + assert!(parsed.decision.is_none(), "stdout {stdout:?}"); + assert!(parsed.reason.is_none()); + assert!(parsed.updated_input.is_none()); + assert!(parsed.additional_context.is_none()); + } + } + + #[test] + fn tool_call_before_stdout_json_without_decision_is_passthrough() { + let parsed = parse_tool_call_before_stdout(r#"{"status":"fine"}"#); + assert!(parsed.decision.is_none()); + } + + #[test] + fn tool_call_before_stdout_non_object_json_is_passthrough() { + for stdout in [r#""deny""#, "[1,2,3]", "42", "true"] { + let parsed = parse_tool_call_before_stdout(stdout); + assert!(parsed.decision.is_none(), "stdout {stdout:?}"); + } + } + + #[test] + fn tool_call_before_stdout_unknown_decision_treated_as_allow() { + let parsed = parse_tool_call_before_stdout(r#"{"decision":"block"}"#); + assert!(parsed.decision.is_none()); + } + + // ── #3026: glob matchers for tool_name conditions ────────────────────── + + #[test] + fn tool_name_glob_matches_mcp_prefix() { + assert!(HookExecutor::tool_name_matches_condition( + "mcp__github__create_issue", + "mcp__*" + )); + assert!(!HookExecutor::tool_name_matches_condition( + "read_file", + "mcp__*" + )); + } + + #[test] + fn tool_name_exact_match_still_works() { + assert!(HookExecutor::tool_name_matches_condition( + "read_file", + "read_file" + )); + assert!(!HookExecutor::tool_name_matches_condition( + "read_files", + "read_file" + )); + } + + #[test] + fn tool_name_glob_escapes_regex_metacharacters() { + // Without escaping, `.` would match any character. + assert!(!HookExecutor::tool_name_matches_condition( + "mcpXgithub", + "mcp.git*" + )); + assert!(HookExecutor::tool_name_matches_condition( + "mcp.github", + "mcp.git*" + )); + // `+` and parens must be literal too. + assert!(HookExecutor::tool_name_matches_condition( + "weird+tool(name)", + "weird+tool(*)" + )); + } + + #[test] + fn tool_name_glob_supports_infix_and_suffix_positions() { + assert!(HookExecutor::tool_name_matches_condition( + "mcp__github__create_issue", + "mcp__*__create_issue" + )); + assert!(HookExecutor::tool_name_matches_condition( + "task_shell_start", + "*_shell_start" + )); + assert!(!HookExecutor::tool_name_matches_condition( + "task_shell_wait", + "*_shell_start" + )); + } + + // ── #3026: project-local hooks ───────────────────────────────────────── + + #[test] + fn load_with_project_missing_file_keeps_global() { + let dir = tempfile::tempdir().expect("tempdir"); + let global = HooksConfig { + enabled: true, + hooks: vec![Hook::new(HookEvent::ToolCallBefore, "echo global")], + ..HooksConfig::default() + }; + + let merged = HooksConfig::load_with_project(global.clone(), dir.path()); + assert_eq!(merged.hooks.len(), 1); + assert_eq!(merged.hooks[0].command, "echo global"); + } + + #[test] + fn load_with_project_appends_project_hooks_after_global() { + let dir = tempfile::tempdir().expect("tempdir"); + let project_dir = dir.path().join(".codewhale"); + std::fs::create_dir_all(&project_dir).expect("mkdir .codewhale"); + std::fs::write( + project_dir.join("hooks.toml"), + r#" +[[hooks]] +event = "tool_call_before" +command = "echo project" +"#, + ) + .expect("write hooks.toml"); + + let global = HooksConfig { + enabled: true, + hooks: vec![Hook::new(HookEvent::ToolCallBefore, "echo global")], + ..HooksConfig::default() + }; + + let merged = HooksConfig::load_with_project(global, dir.path()); + assert_eq!(merged.hooks.len(), 2); + assert_eq!( + merged.hooks[0].command, "echo global", + "global hooks run first" + ); + assert_eq!( + merged.hooks[1].command, "echo project", + "project hooks are appended after global" + ); + } + + #[test] + fn load_with_project_malformed_file_falls_back_to_global() { + let dir = tempfile::tempdir().expect("tempdir"); + let project_dir = dir.path().join(".codewhale"); + std::fs::create_dir_all(&project_dir).expect("mkdir .codewhale"); + std::fs::write(project_dir.join("hooks.toml"), "this is [ not toml") + .expect("write hooks.toml"); + + let global = HooksConfig { + enabled: true, + hooks: vec![Hook::new(HookEvent::ToolCallBefore, "echo global")], + ..HooksConfig::default() + }; + + let merged = HooksConfig::load_with_project(global, dir.path()); + assert_eq!(merged.hooks.len(), 1, "malformed project file is ignored"); + assert_eq!(merged.hooks[0].command, "echo global"); + } } diff --git a/crates/tui/src/localization.rs b/crates/tui/src/localization.rs index 3dfec9a9..2317a737 100644 --- a/crates/tui/src/localization.rs +++ b/crates/tui/src/localization.rs @@ -537,6 +537,25 @@ pub enum MessageId { ApprovalChooseAction, ApprovalIntentLabel, ApprovalMoreLines, + // Sandbox elevation dialog. + ElevationTitleSandboxDenied, + ElevationTitleRequired, + ElevationFieldTool, + ElevationFieldCmd, + ElevationFieldReason, + ElevationImpactHeader, + ElevationImpactNetwork, + ElevationImpactWrite, + ElevationImpactFullAccess, + ElevationPromptProceed, + ElevationOptionNetwork, + ElevationOptionWrite, + ElevationOptionFullAccess, + ElevationOptionAbort, + ElevationOptionNetworkDesc, + ElevationOptionWriteDesc, + ElevationOptionFullAccessDesc, + ElevationOptionAbortDesc, CtxInspTitle, CtxInspSessionContext, @@ -892,6 +911,24 @@ pub const ALL_MESSAGE_IDS: &[MessageId] = &[ MessageId::ApprovalChooseAction, MessageId::ApprovalIntentLabel, MessageId::ApprovalMoreLines, + MessageId::ElevationTitleSandboxDenied, + MessageId::ElevationTitleRequired, + MessageId::ElevationFieldTool, + MessageId::ElevationFieldCmd, + MessageId::ElevationFieldReason, + MessageId::ElevationImpactHeader, + MessageId::ElevationImpactNetwork, + MessageId::ElevationImpactWrite, + MessageId::ElevationImpactFullAccess, + MessageId::ElevationPromptProceed, + MessageId::ElevationOptionNetwork, + MessageId::ElevationOptionWrite, + MessageId::ElevationOptionFullAccess, + MessageId::ElevationOptionAbort, + MessageId::ElevationOptionNetworkDesc, + MessageId::ElevationOptionWriteDesc, + MessageId::ElevationOptionFullAccessDesc, + MessageId::ElevationOptionAbortDesc, MessageId::CtxInspTitle, MessageId::CtxInspSessionContext, MessageId::CtxInspSystemPrompt, @@ -1373,7 +1410,7 @@ fn english(id: MessageId) -> &'static str { MessageId::KbSendDraft => "Send the current draft", MessageId::KbCloseMenu => "Close menu, cancel request, discard draft, or clear input", MessageId::KbCancelOrExit => "Cancel request, or exit when idle", - MessageId::KbShellControls => "Open shell controls for a running foreground command", + MessageId::KbShellControls => "Background the running foreground shell command", MessageId::KbExitEmpty => "Exit when input is empty", MessageId::KbCommandPalette => "Open the command palette", MessageId::KbFuzzyFilePicker => "Open the fuzzy file picker (insert @path on Enter)", @@ -1555,6 +1592,37 @@ fn english(id: MessageId) -> &'static str { MessageId::ApprovalChooseAction => "Enter selected option, or press y/a/d directly", MessageId::ApprovalIntentLabel => "Intent: ", MessageId::ApprovalMoreLines => " … (+{count} lines)", + // Sandbox elevation dialog. + MessageId::ElevationTitleSandboxDenied => " \u{26a0} Sandbox Denied ", + MessageId::ElevationTitleRequired => " Sandbox Elevation Required ", + MessageId::ElevationFieldTool => " Tool: ", + MessageId::ElevationFieldCmd => " Cmd: ", + MessageId::ElevationFieldReason => " Reason: ", + MessageId::ElevationImpactHeader => " Impact if approved:", + MessageId::ElevationImpactNetwork => { + " - network retry enables outbound downloads and HTTP requests" + } + MessageId::ElevationImpactWrite => { + " - write retry expands writable filesystem scope for this tool call" + } + MessageId::ElevationImpactFullAccess => { + " - full access removes sandbox restrictions entirely for this retry" + } + MessageId::ElevationPromptProceed => " Choose how to proceed:", + MessageId::ElevationOptionNetwork => "Allow outbound network", + MessageId::ElevationOptionWrite => "Allow extra write access", + MessageId::ElevationOptionFullAccess => "Full access (filesystem + network)", + MessageId::ElevationOptionAbort => "Abort", + MessageId::ElevationOptionNetworkDesc => { + "Retry this tool call with outbound network access for downloads and HTTP requests" + } + MessageId::ElevationOptionWriteDesc => { + "Retry this tool call with additional writable filesystem scope" + } + MessageId::ElevationOptionFullAccessDesc => { + "Retry without sandbox limits; grants unrestricted filesystem and network access" + } + MessageId::ElevationOptionAbortDesc => "Cancel this tool execution", MessageId::CtxInspTitle => "Context inspector", MessageId::CtxInspSessionContext => "Session Context", @@ -1894,7 +1962,7 @@ fn vietnamese(id: MessageId) -> Option<&'static str> { MessageId::KbSendDraft => "Gửi bản nháp hiện tại", MessageId::KbCloseMenu => "Đóng menu, hủy yêu cầu, hủy bản nháp hoặc xóa sạch đầu vào", MessageId::KbCancelOrExit => "Hủy yêu cầu, hoặc thoát khi rảnh", - MessageId::KbShellControls => "Mở các điều khiển shell cho một lệnh đang chạy ở tiền cảnh", + MessageId::KbShellControls => "Chuyển lệnh shell đang chạy ở tiền cảnh xuống nền", MessageId::KbExitEmpty => "Thoát khi khung nhập trống", MessageId::KbCommandPalette => "Mở bảng lệnh (command palette)", MessageId::KbFuzzyFilePicker => { @@ -2090,6 +2158,38 @@ fn vietnamese(id: MessageId) -> Option<&'static str> { MessageId::ApprovalChooseAction => "Enter để chọn, hoặc nhấn y/a/d trực tiếp", MessageId::ApprovalIntentLabel => "Ý định: ", MessageId::ApprovalMoreLines => " … (+{count} dòng)", + // Sandbox elevation dialog. + // Sandbox elevation dialog. + MessageId::ElevationTitleSandboxDenied => " \u{26a0} Sandbox Bị Từ Chối ", + MessageId::ElevationTitleRequired => " Yêu Cầu Nâng Cấp Sandbox ", + MessageId::ElevationFieldTool => " Công cụ: ", + MessageId::ElevationFieldCmd => " Lệnh: ", + MessageId::ElevationFieldReason => " Lý do: ", + MessageId::ElevationImpactHeader => " Tác động nếu được chấp thuận:", + MessageId::ElevationImpactNetwork => { + " - thử lại với mạng cho phép tải xuống và yêu cầu HTTP" + } + MessageId::ElevationImpactWrite => { + " - thử lại với quyền ghi mở rộng phạm vi hệ thống tệp" + } + MessageId::ElevationImpactFullAccess => { + " - truy cập đầy đủ loại bỏ hoàn toàn hạn chế sandbox" + } + MessageId::ElevationPromptProceed => " Chọn cách tiếp tục:", + MessageId::ElevationOptionNetwork => "Cho phép mạng ngoài", + MessageId::ElevationOptionWrite => "Cho phép quyền ghi bổ sung", + MessageId::ElevationOptionFullAccess => "Truy cập đầy đủ (hệ thống tệp + mạng)", + MessageId::ElevationOptionAbort => "Hủy bỏ", + MessageId::ElevationOptionNetworkDesc => { + "Thử lại cuộc gọi công cụ này với quyền truy cập mạng ngoài" + } + MessageId::ElevationOptionWriteDesc => { + "Thử lại cuộc gọi công cụ này với phạm vi hệ thống tệp có thể ghi bổ sung" + } + MessageId::ElevationOptionFullAccessDesc => { + "Thử lại không giới hạn sandbox; cấp quyền truy cập không hạn chế" + } + MessageId::ElevationOptionAbortDesc => "Hủy thực thi công cụ này", MessageId::CtxInspTitle => "Trình kiểm tra ngữ cảnh", MessageId::CtxInspSessionContext => "Ngữ cảnh phiên", @@ -2179,6 +2279,30 @@ fn traditional_chinese(id: MessageId) -> Option<&'static str> { MessageId::ApprovalChooseAction => "Enter 執行選中項,或直接按 y/a/d", MessageId::ApprovalIntentLabel => "意圖:", MessageId::ApprovalMoreLines => " … (還有 {count} 行)", + // Sandbox elevation dialog. + // Sandbox elevation dialog. + MessageId::ElevationTitleSandboxDenied => " \u{26a0} 沙箱拒絕 ", + MessageId::ElevationTitleRequired => " 沙箱提權 ", + MessageId::ElevationFieldTool => " 工具:", + MessageId::ElevationFieldCmd => " 命令:", + MessageId::ElevationFieldReason => " 原因:", + MessageId::ElevationImpactHeader => " 批准後的影響:", + MessageId::ElevationImpactNetwork => " - 網路重試允許外部下載和 HTTP 請求", + MessageId::ElevationImpactWrite => " - 寫入重試擴大此工具呼叫的檔案系統寫入範圍", + MessageId::ElevationImpactFullAccess => " - 完全訪問解除沙箱限制", + MessageId::ElevationPromptProceed => " 請選擇處理方式:", + MessageId::ElevationOptionNetwork => "允許外部網路訪問", + MessageId::ElevationOptionWrite => "允許額外寫入權限", + MessageId::ElevationOptionFullAccess => "完全訪問(檔案系統 + 網路)", + MessageId::ElevationOptionAbort => "中止", + MessageId::ElevationOptionNetworkDesc => { + "使用外部網路訪問重試此工具呼叫(下載和 HTTP 請求)" + } + MessageId::ElevationOptionWriteDesc => "重試此工具呼叫,擴大可寫入的檔案系統範圍", + MessageId::ElevationOptionFullAccessDesc => { + "無沙箱限制重試(授予無限制的檔案系統和網路訪問權限)" + } + MessageId::ElevationOptionAbortDesc => "取消此工具呼叫", MessageId::CtxInspTitle => "上下文檢查器", MessageId::CtxInspSessionContext => "會話上下文", @@ -2495,7 +2619,7 @@ fn japanese(id: MessageId) -> Option<&'static str> { "メニューを閉じる、リクエストをキャンセル、下書きを破棄、または入力をクリア" } MessageId::KbCancelOrExit => "リクエストをキャンセル、またはアイドル時に終了", - MessageId::KbShellControls => "実行中のフォアグラウンドコマンドのシェル制御を開く", + MessageId::KbShellControls => "実行中のフォアグラウンドコマンドをバックグラウンドへ移す", MessageId::KbExitEmpty => "入力が空の時に終了", MessageId::KbCommandPalette => "コマンドパレットを開く", MessageId::KbFuzzyFilePicker => "ファジーファイルピッカーを開く(Enter で @path を挿入)", @@ -2679,6 +2803,36 @@ fn japanese(id: MessageId) -> Option<&'static str> { MessageId::ApprovalChooseAction => "Enterで選択、または y/a/d を直接入力", MessageId::ApprovalIntentLabel => "意図:", MessageId::ApprovalMoreLines => " … (+{count} 行)", + // Sandbox elevation dialog. + // Sandbox elevation dialog. + MessageId::ElevationTitleSandboxDenied => " \u{26a0} サンドボックス拒否 ", + MessageId::ElevationTitleRequired => " サンドボックス昇格 ", + MessageId::ElevationFieldTool => " ツール:", + MessageId::ElevationFieldCmd => " コマンド:", + MessageId::ElevationFieldReason => " 理由:", + MessageId::ElevationImpactHeader => " 承認された場合の影響:", + MessageId::ElevationImpactNetwork => { + " - ネットワーク再試行で外部ダウンロードとHTTPリクエストが可能" + } + MessageId::ElevationImpactWrite => { + " - 書き込み再試行でファイルシステムの書き込み範囲が拡大" + } + MessageId::ElevationImpactFullAccess => { + " - フルアクセスでサンドボックス制限を完全に解除" + } + MessageId::ElevationPromptProceed => " 方法を選択:", + MessageId::ElevationOptionNetwork => "外部ネットワークを許可", + MessageId::ElevationOptionWrite => "追加の書き込みアクセスを許可", + MessageId::ElevationOptionFullAccess => "フルアクセス(ファイルシステム + ネットワーク)", + MessageId::ElevationOptionAbort => "中止", + MessageId::ElevationOptionNetworkDesc => { + "外部ネットワークアクセスでこのツール呼び出しを再試行(ダウンロードとHTTPリクエスト用)" + } + MessageId::ElevationOptionWriteDesc => "追加の書き込み可能ファイルシステム範囲で再試行", + MessageId::ElevationOptionFullAccessDesc => { + "サンドボックス制限なしで再試行(ファイルシステムとネットワークへの無制限アクセス)" + } + MessageId::ElevationOptionAbortDesc => "このツール実行をキャンセル", MessageId::CtxInspTitle => "コンテキストインスペクタ", MessageId::CtxInspSessionContext => "セッションコンテキスト", @@ -2955,7 +3109,7 @@ fn chinese_simplified(id: MessageId) -> Option<&'static str> { MessageId::KbSendDraft => "发送当前草稿", MessageId::KbCloseMenu => "关闭菜单、取消请求、丢弃草稿或清空输入", MessageId::KbCancelOrExit => "取消请求,或空闲时退出", - MessageId::KbShellControls => "打开正在运行的前台命令的 shell 控制", + MessageId::KbShellControls => "将正在运行的前台命令转入后台", MessageId::KbExitEmpty => "输入框为空时退出", MessageId::KbCommandPalette => "打开命令面板", MessageId::KbFuzzyFilePicker => "打开模糊文件选择器(按 Enter 插入 @path)", @@ -3117,6 +3271,30 @@ fn chinese_simplified(id: MessageId) -> Option<&'static str> { MessageId::ApprovalChooseAction => "Enter 执行选中项,或直接按 y/a/d", MessageId::ApprovalIntentLabel => "意图:", MessageId::ApprovalMoreLines => " … (还有 {count} 行)", + // Sandbox elevation dialog. + // Sandbox elevation dialog. + MessageId::ElevationTitleSandboxDenied => " \u{26a0} 沙箱拒绝 ", + MessageId::ElevationTitleRequired => " 沙箱提权 ", + MessageId::ElevationFieldTool => " 工具:", + MessageId::ElevationFieldCmd => " 命令:", + MessageId::ElevationFieldReason => " 原因:", + MessageId::ElevationImpactHeader => " 批准后的影响:", + MessageId::ElevationImpactNetwork => " - 网络重试允许外部下载和 HTTP 请求", + MessageId::ElevationImpactWrite => " - 写入重试扩大此工具调用的文件系统写入范围", + MessageId::ElevationImpactFullAccess => " - 完全访问解除沙箱限制", + MessageId::ElevationPromptProceed => " 请选择处理方式:", + MessageId::ElevationOptionNetwork => "允许外部网络访问", + MessageId::ElevationOptionWrite => "允许额外写入权限", + MessageId::ElevationOptionFullAccess => "完全访问(文件系统 + 网络)", + MessageId::ElevationOptionAbort => "中止", + MessageId::ElevationOptionNetworkDesc => { + "使用外部网络访问重试此工具调用(下载和 HTTP 请求)" + } + MessageId::ElevationOptionWriteDesc => "重试此工具调用,扩大可写入的文件系统范围", + MessageId::ElevationOptionFullAccessDesc => { + "无沙箱限制重试(授予无限制的文件系统和网络访问权限)" + } + MessageId::ElevationOptionAbortDesc => "取消此工具调用", MessageId::CtxInspTitle => "上下文检查器", MessageId::CtxInspSessionContext => "会话上下文", @@ -3437,7 +3615,7 @@ fn portuguese_brazil(id: MessageId) -> Option<&'static str> { "Fechar menu, cancelar requisição, descartar rascunho ou limpar entrada" } MessageId::KbCancelOrExit => "Cancelar requisição ou sair quando ocioso", - MessageId::KbShellControls => "Abrir controles de shell para comando em primeiro plano", + MessageId::KbShellControls => "Enviar o comando em primeiro plano para segundo plano", MessageId::KbExitEmpty => "Sair quando entrada vazia", MessageId::KbCommandPalette => "Abrir paleta de comandos", MessageId::KbFuzzyFilePicker => { @@ -3631,6 +3809,38 @@ fn portuguese_brazil(id: MessageId) -> Option<&'static str> { MessageId::ApprovalChooseAction => "Enter para selecionar, ou pressione y/a/d diretamente", MessageId::ApprovalIntentLabel => "Intenção: ", MessageId::ApprovalMoreLines => " … (+{count} linhas)", + // Sandbox elevation dialog. + // Sandbox elevation dialog. + MessageId::ElevationTitleSandboxDenied => " \u{26a0} Sandbox Negado ", + MessageId::ElevationTitleRequired => " Elevação de Sandbox Necessária ", + MessageId::ElevationFieldTool => " Ferramenta: ", + MessageId::ElevationFieldCmd => " Comando: ", + MessageId::ElevationFieldReason => " Motivo: ", + MessageId::ElevationImpactHeader => " Impacto se aprovado:", + MessageId::ElevationImpactNetwork => { + " - retry de rede permite downloads externos e requisições HTTP" + } + MessageId::ElevationImpactWrite => { + " - retry de escrita expande o escopo do sistema de arquivos para esta chamada" + } + MessageId::ElevationImpactFullAccess => { + " - acesso total remove todas as restrições de sandbox para este retry" + } + MessageId::ElevationPromptProceed => " Escolha como prosseguir:", + MessageId::ElevationOptionNetwork => "Permitir rede externa", + MessageId::ElevationOptionWrite => "Permitir acesso extra de escrita", + MessageId::ElevationOptionFullAccess => "Acesso total (sistema de arquivos + rede)", + MessageId::ElevationOptionAbort => "Abortar", + MessageId::ElevationOptionNetworkDesc => { + "Retry esta chamada com acesso de rede externa para downloads e requisições HTTP" + } + MessageId::ElevationOptionWriteDesc => { + "Retry esta chamada com escopo adicional de sistema de arquivos gravável" + } + MessageId::ElevationOptionFullAccessDesc => { + "Retry sem limites de sandbox; concede acesso irrestrito ao sistema de arquivos e rede" + } + MessageId::ElevationOptionAbortDesc => "Cancelar esta execução de ferramenta", MessageId::CtxInspTitle => "Inspetor de contexto", MessageId::CtxInspSessionContext => "Contexto da sessão", @@ -3967,7 +4177,7 @@ fn spanish_latin_america(id: MessageId) -> Option<&'static str> { "Cerrar menú, cancelar solicitud, descartar borrador o limpiar entrada" } MessageId::KbCancelOrExit => "Cancelar solicitud o salir cuando está inactivo", - MessageId::KbShellControls => "Abrir controles de shell para comando en primer plano", + MessageId::KbShellControls => "Enviar el comando en primer plano a segundo plano", MessageId::KbExitEmpty => "Salir cuando la entrada está vacía", MessageId::KbCommandPalette => "Abrir paleta de comandos", MessageId::KbFuzzyFilePicker => { @@ -4159,6 +4369,38 @@ fn spanish_latin_america(id: MessageId) -> Option<&'static str> { MessageId::ApprovalChooseAction => "Enter para seleccionar, o presione y/a/d directamente", MessageId::ApprovalIntentLabel => "Intención: ", MessageId::ApprovalMoreLines => " … (+{count} líneas)", + // Sandbox elevation dialog. + // Sandbox elevation dialog. + MessageId::ElevationTitleSandboxDenied => " \u{26a0} Sandbox Denegado ", + MessageId::ElevationTitleRequired => " Elevación de Sandbox Requerida ", + MessageId::ElevationFieldTool => " Herramienta: ", + MessageId::ElevationFieldCmd => " Comando: ", + MessageId::ElevationFieldReason => " Motivo: ", + MessageId::ElevationImpactHeader => " Impacto si se aprueba:", + MessageId::ElevationImpactNetwork => { + " - reintento de red permite descargas y solicitudes HTTP externas" + } + MessageId::ElevationImpactWrite => { + " - reintento de escritura expande el ámbito del sistema de archivos para esta llamada" + } + MessageId::ElevationImpactFullAccess => { + " - acceso total elimina todas las restricciones de sandbox para este reintento" + } + MessageId::ElevationPromptProceed => " Elige cómo proceder:", + MessageId::ElevationOptionNetwork => "Permitir red externa", + MessageId::ElevationOptionWrite => "Permitir acceso extra de escritura", + MessageId::ElevationOptionFullAccess => "Acceso total (sistema de archivos + red)", + MessageId::ElevationOptionAbort => "Abortar", + MessageId::ElevationOptionNetworkDesc => { + "Reintenta esta llamada con acceso de red externa para descargas y solicitudes HTTP" + } + MessageId::ElevationOptionWriteDesc => { + "Reintenta esta llamada con ámbito adicional de sistema de archivos grabable" + } + MessageId::ElevationOptionFullAccessDesc => { + "Reintenta sin límites de sandbox; concede acceso sin restricciones al sistema de archivos y red" + } + MessageId::ElevationOptionAbortDesc => "Cancelar esta ejecución de herramienta", MessageId::CtxInspTitle => "Inspector de contexto", MessageId::CtxInspSessionContext => "Contexto de la sesión", diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs index 8ac5f8f6..a56ffab1 100644 --- a/crates/tui/src/main.rs +++ b/crates/tui/src/main.rs @@ -336,6 +336,19 @@ struct ExecArgs { /// Output format for exec mode #[arg(long, value_enum, default_value_t = ExecOutputFormat::Text)] output_format: ExecOutputFormat, + /// Comma-separated list of tools to allow (all others denied). + /// Lowercase catalog names: read_file, write_file, exec_shell, grep_files, etc. + #[arg(long, value_delimiter = ',')] + allowed_tools: Option>, + /// Comma-separated list of tools to deny (deny wins over allow). + #[arg(long, value_delimiter = ',')] + disallowed_tools: Option>, + /// Maximum number of model steps (tool calls) before the run ends. + #[arg(long, value_parser = clap::value_parser!(u32).range(1..))] + max_turns: Option, + /// Extra text appended to the system prompt for this run. + #[arg(long)] + append_system_prompt: Option, } #[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)] @@ -975,13 +988,28 @@ async fn main() -> Result<()> { let needs_engine = args.auto || yolo || resume_session_id.is_some() - || args.output_format == ExecOutputFormat::StreamJson; + || args.output_format == ExecOutputFormat::StreamJson + || args.max_turns.is_some() + || args.allowed_tools.is_some() + || args.disallowed_tools.is_some() + || args.append_system_prompt.is_some(); if needs_engine { let max_subagents = cli.max_subagents.map_or_else( || config.max_subagents(), |value| value.clamp(1, MAX_SUBAGENTS), ); let auto_mode = args.auto || yolo; + let max_turns = args.max_turns.unwrap_or(100); + let allowed_tools = args.allowed_tools.as_ref().map(|v| { + v.iter() + .map(|s| s.to_ascii_lowercase().trim().to_string()) + .collect::>() + }); + let disallowed_tools = args.disallowed_tools.as_ref().map(|v| { + v.iter() + .map(|s| s.to_ascii_lowercase().trim().to_string()) + .collect::>() + }); run_exec_agent( &config, &model, @@ -993,6 +1021,10 @@ async fn main() -> Result<()> { args.json, resume_session_id, args.output_format, + max_turns, + allowed_tools, + disallowed_tools, + args.append_system_prompt.clone(), ) .await } else if args.json { @@ -1249,6 +1281,10 @@ async fn run_swebench_command( false, None, args.output_format, + 100, + None, + None, + None, ) .await?; @@ -5748,6 +5784,10 @@ async fn run_exec_agent( json_output: bool, resume_session_id: Option, output_format: ExecOutputFormat, + max_turns: u32, + allowed_tools: Option>, + disallowed_tools: Option>, + append_system_prompt: Option, ) -> Result<()> { use crate::compaction::CompactionConfig; use crate::core::engine::{EngineConfig, spawn_engine}; @@ -5799,15 +5839,24 @@ async fn run_exec_agent( notes_path: config.notes_path(), mcp_config_path: config.mcp_config_path(), skills_dir: config.skills_dir(), - instructions: config - .instructions_paths() - .into_iter() - .map(Into::into) - .collect(), + instructions: { + let mut instrs: Vec = config + .instructions_paths() + .into_iter() + .map(Into::into) + .collect(); + if let Some(ref extra) = append_system_prompt { + instrs.push(crate::prompts::InstructionSource::Inline { + name: "cli:append-system-prompt".into(), + content: extra.clone(), + }); + } + instrs + }, project_context_pack_enabled: config.project_context_pack_enabled(), translation_enabled: false, show_thinking: settings.show_thinking, - max_steps: 100, + max_steps: max_turns, max_subagents, features: config.features(), compaction, @@ -5837,7 +5886,8 @@ async fn run_exec_agent( vision_config: config.vision_model_config(), strict_tool_mode: config.strict_tool_mode.unwrap_or(false), goal_objective: None, - allowed_tools: None, + allowed_tools: allowed_tools.clone(), + disallowed_tools: disallowed_tools.clone(), hook_executor: None, locale_tag: crate::localization::resolve_locale(&settings.locale) .tag() @@ -5895,7 +5945,7 @@ async fn run_exec_agent( mode, model: effective_model.clone(), goal_objective: None, - allowed_tools: None, + allowed_tools: allowed_tools.clone(), hook_executor: None, reasoning_effort: effective_reasoning_effort, reasoning_effort_auto: auto_model, @@ -6184,6 +6234,15 @@ async fn run_exec_agent( latest_model = model; latest_workspace = workspace; } + // #3027: surface the engine's max-steps notice in text mode so a + // --max-turns run that stops early says why instead of going quiet. + Event::Status { message } + if output_format == ExecOutputFormat::Text + && !json_output + && message.contains("Reached maximum steps") => + { + eprintln!("{message}"); + } _ => {} } } @@ -6644,6 +6703,45 @@ mod terminal_mode_tests { assert_eq!(args.output_format, ExecOutputFormat::Text); } + #[test] + fn exec_parses_tool_gate_and_hardening_flags() { + let cli = parse_cli(&[ + "codewhale", + "exec", + "--allowed-tools", + "read_file,grep_files", + "--disallowed-tools", + "exec_shell", + "--max-turns", + "7", + "--append-system-prompt", + "extra rules", + "do the thing", + ]); + let Some(Commands::Exec(args)) = cli.command else { + panic!("expected exec command"); + }; + + assert_eq!( + args.allowed_tools.as_deref(), + Some(&["read_file".to_string(), "grep_files".to_string()][..]) + ); + assert_eq!( + args.disallowed_tools.as_deref(), + Some(&["exec_shell".to_string()][..]) + ); + assert_eq!(args.max_turns, Some(7)); + assert_eq!(args.append_system_prompt.as_deref(), Some("extra rules")); + assert_eq!(args.prompt, vec!["do the thing"]); + } + + #[test] + fn exec_rejects_zero_max_turns() { + let err = Cli::try_parse_from(["codewhale", "exec", "--max-turns", "0", "hello"]) + .expect_err("max-turns must be >= 1"); + assert_eq!(err.kind(), clap::error::ErrorKind::ValueValidation); + } + #[test] fn exec_accepts_continue_for_latest_workspace_session() { let cli = parse_cli(&["codewhale", "exec", "--continue", "follow up"]); diff --git a/crates/tui/src/prompt_zones.rs b/crates/tui/src/prompt_zones.rs index 581ac355..e9c9c04a 100644 --- a/crates/tui/src/prompt_zones.rs +++ b/crates/tui/src/prompt_zones.rs @@ -195,16 +195,17 @@ impl std::fmt::Display for PrefixDrift { // ── AppendLog ────────────────────────────────────────────────────────── -/// Append-only conversation history. Only exposes `push`-style mutations. +/// Append-only conversation history. Derefs to `&[Message]` via +/// [`Deref`](std::ops::Deref) for transparent read access; mutations go +/// through explicit methods (`push`, `truncate_to`, `trim_front`, `clear`) +/// whose names make cache impact obvious. /// -/// **Phase 1 scaffolding** — not yet wired into the engine request path. -#[allow(dead_code)] +/// Phase 4: backing store for `Session.messages` (#2264). #[derive(Debug, Clone)] pub struct AppendLog { messages: Vec, } -#[allow(dead_code)] impl AppendLog { pub fn new() -> Self { Self { @@ -216,27 +217,53 @@ impl AppendLog { Self { messages } } + /// Append a message to the log. A single-message push is the cheapest + /// mutation for prefix-cache stability — it extends the byte sequence + /// without disturbing earlier turns. pub fn push(&mut self, message: Message) { self.messages.push(message); } - #[must_use] - pub fn len(&self) -> usize { - self.messages.len() + /// Append multiple messages in one operation (fewer cache-line + /// invalidations than repeated `push`). + pub fn push_batch(&mut self, batch: Vec) { + self.messages.extend(batch); } - #[must_use] - pub fn is_empty(&self) -> bool { - self.messages.is_empty() + /// Truncate to keep only the most recent `new_len` messages. + /// Discards older messages (and their prefix-cache contribution) + /// from the front. + pub fn truncate_to(&mut self, new_len: usize) { + self.messages.truncate(new_len); } - pub fn iter(&self) -> impl Iterator { - self.messages.iter() + /// Remove `count` messages from the front (oldest first). + /// Cache-destroying: drops the prefix that earlier turns share. + pub fn trim_front(&mut self, count: usize) { + if count >= self.messages.len() { + self.messages.clear(); + } else { + self.messages.drain(0..count); + } } + /// Remove all messages. Resets cache state completely. + pub fn clear(&mut self) { + self.messages.clear(); + } + + /// Return a mutable reference to the last message, if any. + /// Prefer this over `last_mut()` on the inner vec — the name signals + /// that only the most recent turn's content is being modified. #[must_use] - pub fn as_slice(&self) -> &[Message] { - &self.messages + pub fn last_mut(&mut self) -> Option<&mut Message> { + self.messages.last_mut() + } + + /// Consume and return the inner `Vec`. + #[must_use] + pub fn into_inner(self) -> Vec { + self.messages } } @@ -246,6 +273,26 @@ impl Default for AppendLog { } } +impl From> for AppendLog { + fn from(messages: Vec) -> Self { + Self { messages } + } +} + +impl From for Vec { + fn from(log: AppendLog) -> Self { + log.messages + } +} + +impl std::ops::Deref for AppendLog { + type Target = Vec; + + fn deref(&self) -> &Self::Target { + &self.messages + } +} + // ── TurnScratch ──────────────────────────────────────────────────────── /// Per-turn ephemeral data. Cleared at every turn boundary. diff --git a/crates/tui/src/runtime_threads.rs b/crates/tui/src/runtime_threads.rs index 862d82e5..724c9d35 100644 --- a/crates/tui/src/runtime_threads.rs +++ b/crates/tui/src/runtime_threads.rs @@ -2082,6 +2082,7 @@ impl RuntimeThreadManager { strict_tool_mode: self.config.strict_tool_mode.unwrap_or(false), goal_objective: None, allowed_tools: None, + disallowed_tools: None, hook_executor: None, locale_tag: crate::localization::resolve_locale(&settings.locale) .tag() diff --git a/crates/tui/src/tools/subagent/mod.rs b/crates/tui/src/tools/subagent/mod.rs index 0b42194d..df8592d9 100644 --- a/crates/tui/src/tools/subagent/mod.rs +++ b/crates/tui/src/tools/subagent/mod.rs @@ -70,6 +70,18 @@ fn release_resident_leases_for(agent_id: &str) { /// the `SubAgentManager`. const DEFAULT_MAX_STEPS: u32 = u32::MAX; const TOOL_TIMEOUT: Duration = Duration::from_secs(30); + +/// Format a step counter for sub-agent progress messages. +/// +/// When `max_steps == u32::MAX` (the default), the denominator is a sentinel +/// meaning "unbounded" — render just `step N` instead of `step N/4294967295`. +fn format_step_counter(steps: u32, max_steps: u32) -> String { + if max_steps == u32::MAX { + format!("step {steps}") + } else { + format!("step {steps}/{max_steps}") + } +} // Non-streaming sub-agents need enough response budget to carry large tool-call // arguments, especially write_file content. The API bills generated tokens, not // the requested ceiling. @@ -1496,8 +1508,19 @@ impl SubAgentManager { .values() .find(|existing| existing.session_name == name) { + // #3020: Include elapsed time so the parent can distinguish a + // live worker from a stale/failed earlier spawn (#2656). + let elapsed = existing.started_at.elapsed(); + let since = if elapsed.as_secs() < 120 { + format!("{}s ago", elapsed.as_secs()) + } else { + let mins = elapsed.as_secs() / 60; + let secs = elapsed.as_secs() % 60; + format!("{mins}m{secs}s ago") + }; return Err(anyhow!( - "Sub-agent session name '{name}' is already in use by agent_id '{}' (status: {}). \ + "Sub-agent session name '{name}' is already in use by agent_id '{}' \ + (status: {}, started {since}). \ Reuse that agent_id with agent_eval/agent_close, or open with a different name.", existing.id, subagent_status_name(&existing.status) @@ -4165,7 +4188,7 @@ async fn run_subagent( record_agent_progress( runtime, &agent_id, - format!("step {steps}/{max_steps}: cancelled"), + format!("{}: cancelled", format_step_counter(steps, max_steps)), ); if let Some(mb) = runtime.mailbox.as_ref() { let _ = mb.send(MailboxMessage::Cancelled { @@ -4217,7 +4240,10 @@ async fn run_subagent( record_agent_progress( runtime, &agent_id, - format!("step {steps}/{max_steps}: requesting model response"), + format!( + "{}: requesting model response", + format_step_counter(steps, max_steps) + ), ); while let Ok(input) = input_rx.try_recv() { @@ -4274,7 +4300,7 @@ async fn run_subagent( record_agent_progress( runtime, &agent_id, - format!("step {steps}/{max_steps}: cancelled mid-request"), + format!("{}: cancelled mid-request", format_step_counter(steps, max_steps)), ); if let Some(mb) = runtime.mailbox.as_ref() { let _ = mb.send(MailboxMessage::Cancelled { @@ -4337,7 +4363,7 @@ async fn run_subagent( record_agent_progress( runtime, &agent_id, - format!("step {steps}/{max_steps}: interrupted; {reason}"), + format!("{}: interrupted; {reason}", format_step_counter(steps, max_steps)), ); let status = SubAgentStatus::Interrupted(reason.clone()); let duration_ms = @@ -4371,7 +4397,7 @@ async fn run_subagent( record_agent_progress( runtime, &agent_id, - format!("step {steps}/{max_steps}: cancelled while interrupted"), + format!("{}: cancelled while interrupted", format_step_counter(steps, max_steps)), ); if let Some(mb) = runtime.mailbox.as_ref() { let _ = mb.send(MailboxMessage::Cancelled { @@ -4495,7 +4521,7 @@ async fn run_subagent( record_agent_progress( runtime, &agent_id, - format!("step {steps}/{max_steps}: {progress}"), + format!("{}: {progress}", format_step_counter(steps, max_steps)), ); messages.push(Message { role: "user".to_string(), @@ -4531,7 +4557,7 @@ async fn run_subagent( record_agent_progress( runtime, &agent_id, - format!("step {steps}/{max_steps}: complete"), + format!("{}: complete", format_step_counter(steps, max_steps)), ); break; } @@ -4542,7 +4568,8 @@ async fn run_subagent( runtime, &agent_id, format!( - "step {steps}/{max_steps}: executing {} tool call(s)", + "{}: executing {} tool call(s)", + format_step_counter(steps, max_steps), tool_uses.len() ), ); @@ -4551,7 +4578,10 @@ async fn run_subagent( record_agent_progress( runtime, &agent_id, - format!("step {steps}/{max_steps}: running tool '{tool_name}'"), + format!( + "{}: running tool '{tool_name}'", + format_step_counter(steps, max_steps) + ), ); if let Some(mb) = runtime.mailbox.as_ref() { let _ = mb.send(MailboxMessage::ToolCallStarted { @@ -4575,7 +4605,10 @@ async fn run_subagent( record_agent_progress( runtime, &agent_id, - format!("step {steps}/{max_steps}: finished tool '{tool_name}'"), + format!( + "{}: finished tool '{tool_name}'", + format_step_counter(steps, max_steps) + ), ); if let Some(mb) = runtime.mailbox.as_ref() { let _ = mb.send(MailboxMessage::ToolCallCompleted { @@ -5690,7 +5723,26 @@ fn annotate_child_model_error(err: &str, model: &str) -> String { "{err}\n(child model `{model}` may be unavailable under the current access profile — \ retry agent_open with a different `model`, or remove `model` to inherit the parent's)" ), - _ => err.to_string(), + _ => { + // #3020 (#2653): Provider rejections like "Model Not Exist" or + // "does not exist or you do not have access" often classify as + // `Internal` rather than `Authorization`/`State`. Catch these + // patterns in the raw error text and annotate anyway. + let lower = err.to_ascii_lowercase(); + if lower.contains("model not exist") + || lower.contains("model_not_found") + || lower.contains("does not exist") + || lower.contains("no such model") + || lower.contains("invalid model") + { + format!( + "{err}\n(child model `{model}` may be unavailable under the current access profile — \ + retry agent_open with a different `model`, or remove `model` to inherit the parent's)" + ) + } else { + err.to_string() + } + } } } diff --git a/crates/tui/src/tools/subagent/tests.rs b/crates/tui/src/tools/subagent/tests.rs index 55a5c659..4b8e02d2 100644 --- a/crates/tui/src/tools/subagent/tests.rs +++ b/crates/tui/src/tools/subagent/tests.rs @@ -1489,6 +1489,12 @@ async fn spawn_duplicate_session_name_error_names_conflicting_agent() { msg.contains("running"), "includes the conflicting status: {msg}" ); + // #3020: elapsed time lets the parent distinguish a live worker from a + // stale earlier spawn. + assert!( + msg.contains("started ") && msg.contains(" ago"), + "includes elapsed time since spawn: {msg}" + ); } #[tokio::test] @@ -2074,6 +2080,23 @@ fn annotate_child_model_error_adds_actionable_hint() { let unrelated = annotate_child_model_error("connection reset by peer", "kimi-k2"); assert_eq!(unrelated, "connection reset by peer"); + + // #3020: provider rejections that classify as Internal (not + // Authorization/State) still get the hint via raw-text matching. + let not_exist = annotate_child_model_error("Model Not Exist", "kimi-k2"); + assert!( + not_exist.contains("retry agent_open"), + "DeepSeek-style rejection gets the hint: {not_exist}" + ); + + let openai_style = annotate_child_model_error( + "The model `gpt-5.5-nano` does not exist or you do not have access to it.", + "gpt-5.5-nano", + ); + assert!( + openai_style.contains("retry agent_open"), + "OpenAI-style rejection gets the hint: {openai_style}" + ); } #[test] @@ -3300,3 +3323,18 @@ fn normalize_requested_subagent_model_is_provider_aware() { "official DeepSeek API rejects foreign ids" ); } + +// ── #3030: step-counter formatting ────────────────────────────────────────── + +#[test] +fn format_step_counter_hides_unbounded_sentinel() { + // DEFAULT_MAX_STEPS is u32::MAX, meaning "unbounded" — rendering the + // sentinel as a denominator produced "step 16/4294967295". + assert_eq!(format_step_counter(16, u32::MAX), "step 16"); +} + +#[test] +fn format_step_counter_keeps_concrete_budgets() { + assert_eq!(format_step_counter(3, 25), "step 3/25"); + assert_eq!(format_step_counter(0, 1), "step 0/1"); +} diff --git a/crates/tui/src/tui/app.rs b/crates/tui/src/tui/app.rs index 37bf84c5..712853a6 100644 --- a/crates/tui/src/tui/app.rs +++ b/crates/tui/src/tui/app.rs @@ -1157,6 +1157,11 @@ pub struct SidebarHoverRow { pub detail: Option, /// Whether the compact row lost information. pub is_truncated: bool, + /// Slash command to execute when this row is clicked (#3028). + /// `shell_*` job ids route through `/jobs` (e.g. `/jobs cancel + /// shell_abc123`); task-manager ids route through `/task` (e.g. + /// `/task show task_abc123`). + pub click_action: Option, } /// Per-section metadata for sidebar hover detection. @@ -1445,6 +1450,16 @@ pub struct App { pub pending_subagent_dispatch: Option, /// Animation anchor for status-strip active sub-agent spinner. pub agent_activity_started_at: Option, + /// Monotonic counter for stable agent labels (#3030). + /// Incremented each time a sub-agent is spawned; used to generate + /// "Agent 1", "Agent 2", etc. + pub agent_counter: u64, + /// Maps raw agent_id to a stable user-facing label (#3030). + /// Populated when `AgentSpawned` fires; read by sidebar rendering. + pub agent_label_map: HashMap, + /// Last time a sub-agent progress event triggered a redraw. + /// Used to throttle redraws under high sub-agent concurrency (#3033). + pub last_agent_progress_redraw: Option, pub ui_theme: UiTheme, /// Active named theme. Drives the cell-level color remap in /// `tui::color_compat::ColorCompatBackend` so community presets @@ -1628,6 +1643,9 @@ pub struct App { pub runtime_turn_id: Option, /// Current runtime turn status (if known). pub runtime_turn_status: Option, + /// Monotonic turn counter for stable user-facing labels (#3030). + /// Incremented each time a new turn starts; displayed as "Turn N". + pub turn_counter: u64, /// When the UI accepted a user message but has not observed `TurnStarted` yet. pub dispatch_started_at: Option, @@ -2029,8 +2047,10 @@ impl App { let allow_shell = allow_shell || initial_mode == AppMode::Yolo; let shell_manager = new_shared_shell_manager(workspace.clone()); - // Initialize hooks executor from config - let hooks_config = config.hooks_config(); + // Initialize hooks executor from config, merged with project-local + // `.codewhale/hooks.toml` (#3026). + let hooks_config = + crate::hooks::HooksConfig::load_with_project(config.hooks_config(), &workspace); let hooks = HookExecutor::new(hooks_config, workspace.clone()); // Initialize plan state @@ -2174,6 +2194,9 @@ impl App { last_fanout_card_index: None, pending_subagent_dispatch: None, agent_activity_started_at: None, + agent_counter: 0, + agent_label_map: HashMap::new(), + last_agent_progress_redraw: None, ui_theme, theme_id, onboarding, @@ -2262,6 +2285,7 @@ impl App { last_balance_fetch: None, runtime_turn_id: None, runtime_turn_status: None, + turn_counter: 0, dispatch_started_at: None, workspace_context: None, workspace_context_cell: std::sync::Arc::new(std::sync::Mutex::new(None)), @@ -2727,6 +2751,28 @@ impl App { self.collapsed_cell_map.clear(); } + /// #3030: return the stable user-facing label for an agent id + /// ("Agent 3"), assigning the next sequential label on first sight. + pub(crate) fn ensure_agent_label(&mut self, agent_id: &str) -> String { + if let Some(label) = self.agent_label_map.get(agent_id) { + return label.clone(); + } + self.agent_counter = self.agent_counter.saturating_add(1); + let label = format!("Agent {}", self.agent_counter); + self.agent_label_map + .insert(agent_id.to_string(), label.clone()); + label + } + + /// #3030: read-only label lookup with raw-id fallback for agents the + /// label map has never seen. + pub(crate) fn agent_display_label(&self, agent_id: &str) -> String { + self.agent_label_map + .get(agent_id) + .cloned() + .unwrap_or_else(|| agent_id.to_string()) + } + pub fn mark_history_updated(&mut self) { self.history_version = self.history_version.wrapping_add(1); // Resync per-cell revisions to history.len(). This is the diff --git a/crates/tui/src/tui/approval.rs b/crates/tui/src/tui/approval.rs index 8c2665e3..c5167028 100644 --- a/crates/tui/src/tui/approval.rs +++ b/crates/tui/src/tui/approval.rs @@ -1026,6 +1026,7 @@ pub enum ElevationOption { impl ElevationOption { /// Get the display label for this option. + #[cfg(test)] pub fn label(&self) -> &'static str { match self { ElevationOption::WithNetwork => "Allow outbound network", @@ -1036,6 +1037,7 @@ impl ElevationOption { } /// Get a short description. + #[cfg(test)] pub fn description(&self) -> &'static str { match self { ElevationOption::WithNetwork => { @@ -1132,13 +1134,15 @@ impl ElevationRequest { pub struct ElevationView { request: ElevationRequest, selected: usize, + locale: Locale, } impl ElevationView { - pub fn new(request: ElevationRequest) -> Self { + pub fn new(request: ElevationRequest, locale: Locale) -> Self { Self { request, selected: 0, + locale, } } @@ -1213,7 +1217,7 @@ impl ModalView for ElevationView { } fn render(&self, area: ratatui::layout::Rect, buf: &mut ratatui::buffer::Buffer) { - let elevation_widget = ElevationWidget::new(&self.request, self.selected); + let elevation_widget = ElevationWidget::new(&self.request, self.selected, self.locale); elevation_widget.render(area, buf); } } @@ -1991,7 +1995,7 @@ mod tests { fn test_elevation_view_initial_state() { let request = ElevationRequest::for_shell("test-id", "cargo build", "network blocked", true, false); - let view = ElevationView::new(request); + let view = ElevationView::new(request, Locale::En); assert_eq!(view.selected, 0); } @@ -1999,7 +2003,7 @@ mod tests { fn test_elevation_view_keybindings() { let request = ElevationRequest::for_shell("test-id", "cargo test", "write blocked", false, true); - let mut view = ElevationView::new(request); + let mut view = ElevationView::new(request, Locale::En); let action = view.handle_key(create_key_event(KeyCode::Char('n'))); assert!(matches!( @@ -2012,7 +2016,7 @@ mod tests { let request = ElevationRequest::for_shell("test-id", "cargo build", "write blocked", false, true); - let mut view = ElevationView::new(request); + let mut view = ElevationView::new(request, Locale::En); let action = view.handle_key(create_key_event(KeyCode::Char('w'))); assert!(matches!( action, @@ -2024,7 +2028,7 @@ mod tests { let request = ElevationRequest::for_shell("test-id", "cargo build", "blocked", false, false); - let mut view = ElevationView::new(request); + let mut view = ElevationView::new(request, Locale::En); let action = view.handle_key(create_key_event(KeyCode::Char('f'))); assert!(matches!( action, @@ -2036,7 +2040,7 @@ mod tests { let request = ElevationRequest::for_shell("test-id", "cargo build", "blocked", false, false); - let mut view = ElevationView::new(request); + let mut view = ElevationView::new(request, Locale::En); let action = view.handle_key(create_key_event(KeyCode::Esc)); assert!(matches!( action, @@ -2048,7 +2052,7 @@ mod tests { let request = ElevationRequest::for_shell("test-id", "cargo build", "blocked", false, false); - let mut view = ElevationView::new(request); + let mut view = ElevationView::new(request, Locale::En); let action = view.handle_key(create_key_event(KeyCode::Char('a'))); assert!(matches!( action, @@ -2062,7 +2066,7 @@ mod tests { #[test] fn test_elevation_view_navigation() { let request = ElevationRequest::for_shell("test-id", "cargo build", "blocked", true, false); - let mut view = ElevationView::new(request); + let mut view = ElevationView::new(request, Locale::En); assert_eq!(view.selected, 0); @@ -2082,7 +2086,7 @@ mod tests { #[test] fn test_elevation_view_enter_uses_selected_option() { let request = ElevationRequest::for_shell("test-id", "cargo build", "blocked", true, false); - let mut view = ElevationView::new(request); + let mut view = ElevationView::new(request, Locale::En); view.handle_key(create_key_event(KeyCode::Down)); assert_eq!(view.selected, 1); @@ -2097,6 +2101,136 @@ mod tests { )); } + fn render_elevation_lines(view: &ElevationView, w: u16, h: u16) -> Vec { + use ratatui::buffer::Buffer; + use ratatui::layout::Rect; + let mut buf = Buffer::empty(Rect::new(0, 0, w, h)); + view.render(Rect::new(0, 0, w, h), &mut buf); + (0..h) + .map(|row| { + (0..w) + .map(|col| buf[(col, row)].symbol().to_string()) + .collect::() + }) + .collect() + } + + fn compact_elevation_text(lines: &[String]) -> String { + lines.join("\n").replace(' ', "") + } + + fn elevation_shell_request() -> ElevationRequest { + ElevationRequest::for_shell("test-id", "cargo build", "network blocked", true, false) + } + + #[test] + fn test_elevation_render_en_has_expected_strings() { + let view = ElevationView::new(elevation_shell_request(), Locale::En); + let lines = render_elevation_lines(&view, 70, 22); + let joined = compact_elevation_text(&lines); + assert!( + joined.contains("SandboxDenied"), + "missing en title:\n{joined}" + ); + assert!(joined.contains("Tool:"), "missing en tool label:\n{joined}"); + assert!(joined.contains("Cmd:"), "missing en cmd label:\n{joined}"); + assert!( + joined.contains("Reason:"), + "missing en reason label:\n{joined}" + ); + } + + #[test] + fn test_elevation_render_zh_hans_localizes_copy() { + let view = ElevationView::new(elevation_shell_request(), Locale::ZhHans); + let lines = render_elevation_lines(&view, 70, 22); + let joined = compact_elevation_text(&lines); + assert!(joined.contains("沙箱拒绝"), "missing zh title:\n{joined}"); + assert!( + joined.contains("工具:"), + "missing zh tool label:\n{joined}" + ); + assert!(joined.contains("命令:"), "missing zh cmd label:\n{joined}"); + assert!( + joined.contains("原因:"), + "missing zh reason label:\n{joined}" + ); + assert!( + joined.contains("批准后的影响"), + "missing zh impact header:\n{joined}" + ); + let en_artifacts = [ + "SandboxDenied", + "Tool:", + "Cmd:", + "Reason:", + "Impactifapproved", + "Choosehowtoproceed", + "Allowoutboundnetwork", + "Allowextrawriteaccess", + "Fullaccess", + "Abort", + ]; + for artifact in &en_artifacts { + assert!( + !joined.contains(artifact), + "English leak '{artifact}' in zh rendering:\n{joined}" + ); + } + } + + #[test] + fn test_elevation_render_ja_has_translated_copy() { + let view = ElevationView::new(elevation_shell_request(), Locale::Ja); + let lines = render_elevation_lines(&view, 70, 22); + let joined = compact_elevation_text(&lines); + assert!( + joined.contains("サンドボックス拒否"), + "missing ja title:\n{joined}" + ); + assert!( + joined.contains("ツール:"), + "missing ja tool label:\n{joined}" + ); + assert!( + joined.contains("コマンド:"), + "missing ja cmd label:\n{joined}" + ); + assert!( + joined.contains("理由:"), + "missing ja reason label:\n{joined}" + ); + for eng in &["SandboxDenied", "Tool:", "Cmd:", "Reason:"] as &[&str] { + assert!( + !joined.contains(eng), + "English leak '{eng}' in ja:\n{joined}" + ); + } + } + + #[test] + fn test_elevation_render_zh_hant_has_translated_copy() { + let view = ElevationView::new(elevation_shell_request(), Locale::ZhHant); + let lines = render_elevation_lines(&view, 70, 22); + let joined = compact_elevation_text(&lines); + assert!( + joined.contains("沙箱拒絕"), + "missing zh-Hant title:\n{joined}" + ); + assert!( + joined.contains("工具:"), + "missing zh-Hant tool label:\n{joined}" + ); + assert!( + joined.contains("命令:"), + "missing zh-Hant cmd label:\n{joined}" + ); + assert!( + joined.contains("原因:"), + "missing zh-Hant reason label:\n{joined}" + ); + } + // ======================================================================== // ElevationOption Tests // ======================================================================== diff --git a/crates/tui/src/tui/color_compat.rs b/crates/tui/src/tui/color_compat.rs index 0a8107ec..0082830f 100644 --- a/crates/tui/src/tui/color_compat.rs +++ b/crates/tui/src/tui/color_compat.rs @@ -129,8 +129,52 @@ impl Backend for ColorCompatBackend { if let Some(render_debug) = &mut self.render_debug { render_debug.record(viewport, &adapted); } - self.inner - .draw(adapted.iter().map(|(x, y, cell)| (*x, *y, cell))) + // #3029: Emit OSC 8 hyperlinks out-of-band through the backend's + // Write impl. ratatui's buffer pipeline strips ESC bytes, so the + // open/close sequences must be interleaved with the cell stream + // here. OSC 8 is stateful and last-writer-wins: every cell painted + // between an open and the next close links to that open's target, + // so each region's cells must be bracketed by their OWN open/close + // pair — never batched. + let mut frame_links = crate::tui::osc8::take_frame_links(); + if frame_links.is_empty() || !crate::tui::osc8::enabled() { + self.inner + .draw(adapted.iter().map(|(x, y, cell)| (*x, *y, cell)))?; + return Ok(()); + } + // Deterministic region lookup when regions are adjacent/overlapping: + // the first (top-left-most) region wins. + frame_links.sort_unstable_by_key(|link| (link.row, link.col_start)); + let region_for = |x: u16, y: u16| -> Option { + frame_links + .iter() + .position(|link| y == link.row && x >= link.col_start && x <= link.col_end) + }; + + // Walk the diff in its original order and split it into runs at + // region boundaries, so the visible byte stream stays identical to + // a no-link render apart from the inserted OSC 8 sequences. + let mut idx = 0; + while idx < adapted.len() { + let current_region = region_for(adapted[idx].0, adapted[idx].1); + let run_start = idx; + while idx < adapted.len() + && region_for(adapted[idx].0, adapted[idx].1) == current_region + { + idx += 1; + } + let run = &adapted[run_start..idx]; + if let Some(region_idx) = current_region { + crate::tui::osc8::write_osc8_open(self, &frame_links[region_idx].target)?; + self.inner + .draw(run.iter().map(|(x, y, cell)| (*x, *y, cell)))?; + crate::tui::osc8::write_osc8_close(self)?; + } else { + self.inner + .draw(run.iter().map(|(x, y, cell)| (*x, *y, cell)))?; + } + } + Ok(()) } fn append_lines(&mut self, n: u16) -> io::Result<()> { @@ -540,4 +584,128 @@ mod tests { backend.force_size(Size::new(80, 25)); assert_eq!(backend.size().unwrap(), Size::new(80, 25)); } + + // ── #3029: OSC 8 emission through the backend byte stream ────────────── + + fn row_cells(symbols: &str) -> Vec<(u16, u16, Cell)> { + symbols + .chars() + .enumerate() + .map(|(i, ch)| { + let mut cell = Cell::default(); + cell.set_symbol(&ch.to_string()); + (u16::try_from(i).unwrap(), 0u16, cell) + }) + .collect() + } + + #[test] + fn osc8_open_close_bracket_only_their_region_cells() { + use crate::tui::osc8::LinkRegion; + + // Baseline: identical cells, no link regions. + let baseline_writer = SharedWriter::default(); + let baseline_capture = baseline_writer.0.clone(); + let mut baseline = + ColorCompatBackend::new(baseline_writer, ColorDepth::TrueColor, PaletteMode::Dark); + let cells = row_cells("ABCDE"); + baseline + .draw(cells.iter().map(|(x, y, cell)| (*x, *y, cell))) + .unwrap(); + let baseline_out = String::from_utf8_lossy(&baseline_capture.borrow()).to_string(); + + // Linked render: columns 2..=3 ("CD") carry one link region. + crate::tui::osc8::set_frame_links(vec![LinkRegion { + row: 0, + col_start: 2, + col_end: 3, + target: "https://example.test/1".to_string(), + }]); + let writer = SharedWriter::default(); + let capture = writer.0.clone(); + let mut backend = ColorCompatBackend::new(writer, ColorDepth::TrueColor, PaletteMode::Dark); + let cells = row_cells("ABCDE"); + backend + .draw(cells.iter().map(|(x, y, cell)| (*x, *y, cell))) + .unwrap(); + let out = String::from_utf8_lossy(&capture.borrow()).to_string(); + + let open = "\x1b]8;;https://example.test/1\x1b\\"; + let close = "\x1b]8;;\x1b\\"; + assert_eq!(out.matches(open).count(), 1, "exactly one open: {out:?}"); + assert_eq!(out.matches(close).count(), 1, "exactly one close: {out:?}"); + + // The open must precede the first linked glyph and the close must sit + // between the last linked glyph and the first glyph after the region. + let open_at = out.find(open).expect("open present"); + let close_at = out.find(close).expect("close present"); + let c_at = out.find('C').expect("glyph C"); + let d_at = out.find('D').expect("glyph D"); + let e_at = out.find('E').expect("glyph E"); + assert!(open_at < c_at, "open before linked cells: {out:?}"); + assert!(d_at < close_at, "close after linked cells: {out:?}"); + assert!( + close_at < e_at, + "cells after the region must not inherit the link: {out:?}" + ); + + // Visible glyph stream is unchanged by link insertion. + let mut baseline_visible = String::new(); + crate::tui::osc8::strip_ansi_into(&baseline_out, &mut baseline_visible); + let mut linked_visible = String::new(); + crate::tui::osc8::strip_ansi_into(&out, &mut linked_visible); + assert_eq!( + baseline_visible, linked_visible, + "link emission must not move or alter visible cells" + ); + } + + #[test] + fn osc8_two_regions_link_to_their_own_targets() { + use crate::tui::osc8::LinkRegion; + + crate::tui::osc8::set_frame_links(vec![ + LinkRegion { + row: 0, + col_start: 0, + col_end: 1, + target: "https://example.test/first".to_string(), + }, + LinkRegion { + row: 0, + col_start: 3, + col_end: 4, + target: "https://example.test/second".to_string(), + }, + ]); + let writer = SharedWriter::default(); + let capture = writer.0.clone(); + let mut backend = ColorCompatBackend::new(writer, ColorDepth::TrueColor, PaletteMode::Dark); + let cells = row_cells("ABZCD"); + backend + .draw(cells.iter().map(|(x, y, cell)| (*x, *y, cell))) + .unwrap(); + let out = String::from_utf8_lossy(&capture.borrow()).to_string(); + + let first = "\x1b]8;;https://example.test/first\x1b\\"; + let second = "\x1b]8;;https://example.test/second\x1b\\"; + let close = "\x1b]8;;\x1b\\"; + assert_eq!(out.matches(first).count(), 1, "{out:?}"); + assert_eq!(out.matches(second).count(), 1, "{out:?}"); + assert_eq!(out.matches(close).count(), 2, "{out:?}"); + + // Pre-#3029-audit bug: both opens were emitted before any cell, so + // the whole frame linked to the LAST region's target. Each region's + // open must close before the next region's open begins. + let first_at = out.find(first).expect("first open"); + let first_close_at = out[first_at..].find(close).expect("first close") + first_at; + let second_at = out.find(second).expect("second open"); + assert!( + first_close_at < second_at, + "region one must close before region two opens: {out:?}" + ); + // The unlinked middle glyph sits between the two link spans. + let z_at = out.find('Z').expect("unlinked glyph"); + assert!(first_close_at < z_at && z_at < second_at, "{out:?}"); + } } diff --git a/crates/tui/src/tui/history.rs b/crates/tui/src/tui/history.rs index 9ca7fb54..08e802f9 100644 --- a/crates/tui/src/tui/history.rs +++ b/crates/tui/src/tui/history.rs @@ -957,11 +957,14 @@ impl ExecCell { )); } else if self.status == ToolStatus::Running && self.source == ExecSource::Assistant { lines.extend(wrap_plain_line( - " Ctrl+B opens shell controls.", + " Ctrl+B backgrounds this command.", Style::default().fg(palette::TEXT_MUTED), width, )); - } else if self.status != ToolStatus::Running { + } else if self.status != ToolStatus::Running && mode == RenderMode::Transcript { + // #3031: Suppress "(no output)" in compact/Live mode; + // the success header is enough signal. Transcript still + // records it for exports/clipboard/pager. lines.push(Line::from(Span::styled( " (no output)", Style::default().fg(palette::TEXT_MUTED).italic(), @@ -970,13 +973,17 @@ impl ExecCell { } if let Some(duration_ms) = self.duration_ms { - let seconds = f64::from(u32::try_from(duration_ms).unwrap_or(u32::MAX)) / 1000.0; - lines.extend(render_compact_kv( - "time", - &format!("{seconds:.2}s"), - Style::default().fg(palette::TEXT_DIM), - width, - )); + // #3031: Suppress sub-second timing in compact mode. + // Transcript mode always shows exact timing. + if mode == RenderMode::Transcript || duration_ms >= 1000 { + let seconds = f64::from(u32::try_from(duration_ms).unwrap_or(u32::MAX)) / 1000.0; + lines.extend(render_compact_kv( + "time", + &format!("{seconds:.2}s"), + Style::default().fg(palette::TEXT_DIM), + width, + )); + } } wrap_card_rail(lines) @@ -2840,10 +2847,15 @@ fn render_preserved_output_mode( ) -> Vec> { let mut lines = Vec::new(); if output.trim().is_empty() { - lines.push(Line::from(Span::styled( - " (no output)", - Style::default().fg(palette::TEXT_MUTED).italic(), - ))); + // #3031: In compact/Live mode, suppress "(no output)" — the tool + // header already carries the success/failure status. Transcript + // mode still records it for exports/clipboard/pager. + if mode == RenderMode::Transcript { + lines.push(Line::from(Span::styled( + " (no output)", + Style::default().fg(palette::TEXT_MUTED).italic(), + ))); + } return lines; } @@ -5075,7 +5087,7 @@ mod tests { assert!(text.contains("running line 1")); assert!(text.contains("running line 2")); - assert!(!text.contains("Ctrl+B opens shell controls")); + assert!(!text.contains("Ctrl+B backgrounds this command")); } #[test] diff --git a/crates/tui/src/tui/mouse_ui.rs b/crates/tui/src/tui/mouse_ui.rs index 47a3e542..0cda7ba9 100644 --- a/crates/tui/src/tui/mouse_ui.rs +++ b/crates/tui/src/tui/mouse_ui.rs @@ -384,6 +384,16 @@ pub(crate) fn handle_mouse_event(app: &mut App, mouse: MouseEvent) -> Vec Vec Option { + for section in &app.sidebar_hover.sections { + if mouse.column >= section.content_area.x + && mouse.column + < section + .content_area + .x + .saturating_add(section.content_area.width) + && mouse.row >= section.content_area.y + && mouse.row + < section + .content_area + .y + .saturating_add(section.content_area.height) + && let Some(row) = section.rows.iter().find(|row| row.row_y == mouse.row) + { + return row.click_action.clone(); + } + } + None +} + pub(crate) fn mouse_hits_transcript_scrollbar(app: &App, mouse: MouseEvent) -> bool { let Some(area) = app.viewport.last_transcript_area else { return false; @@ -989,3 +1023,116 @@ pub(crate) fn selection_to_text(app: &App) -> Option { } Some(selected) } + +#[cfg(test)] +mod tests { + use super::sidebar_click_action; + use crate::config::Config; + use crate::tui::app::{App, SidebarHoverRow, SidebarHoverSection, TuiOptions}; + use crossterm::event::{KeyModifiers, MouseButton, MouseEvent, MouseEventKind}; + use ratatui::layout::Rect; + use std::path::PathBuf; + + fn create_test_app() -> App { + let options = TuiOptions { + model: "deepseek-v4-pro".to_string(), + workspace: PathBuf::from("."), + config_path: None, + config_profile: None, + allow_shell: false, + use_alt_screen: true, + use_mouse_capture: false, + use_bracketed_paste: true, + max_subagents: 1, + skills_dir: PathBuf::from("."), + memory_path: PathBuf::from("memory.md"), + notes_path: PathBuf::from("notes.txt"), + mcp_config_path: PathBuf::from("mcp.json"), + use_memory: false, + start_in_agent_mode: false, + skip_onboarding: true, + yolo: false, + resume_session_id: None, + initial_input: None, + }; + App::new(options, &Config::default()) + } + + fn hover_row(row_y: u16, action: Option<&str>) -> SidebarHoverRow { + SidebarHoverRow { + row_y, + display_text: "row".to_string(), + full_text: "row".to_string(), + detail: None, + is_truncated: false, + click_action: action.map(str::to_string), + } + } + + fn left_click(column: u16, row: u16) -> MouseEvent { + MouseEvent { + kind: MouseEventKind::Down(MouseButton::Left), + column, + row, + modifiers: KeyModifiers::NONE, + } + } + + #[test] + fn sidebar_click_resolves_row_actions_inside_section() { + let mut app = create_test_app(); + app.sidebar_hover.sections.push(SidebarHoverSection { + content_area: Rect::new(60, 4, 20, 6), + lines: vec![ + "header".to_string(), + "job row".to_string(), + "job detail".to_string(), + "agent row".to_string(), + ], + rows: vec![ + hover_row(4, None), + hover_row(5, Some("/jobs show shell_x")), + hover_row(6, Some("/jobs cancel shell_x")), + hover_row(7, Some("/subagents")), + ], + }); + + assert_eq!( + sidebar_click_action(&app, left_click(65, 5)).as_deref(), + Some("/jobs show shell_x"), + "job label row resolves to its show action" + ); + assert_eq!( + sidebar_click_action(&app, left_click(79, 6)).as_deref(), + Some("/jobs cancel shell_x"), + "job detail row resolves to its cancel action" + ); + assert_eq!( + sidebar_click_action(&app, left_click(60, 7)).as_deref(), + Some("/subagents"), + "agent row opens the agents view" + ); + assert_eq!( + sidebar_click_action(&app, left_click(65, 4)), + None, + "header row has no action" + ); + } + + #[test] + fn sidebar_click_outside_section_resolves_to_none() { + let mut app = create_test_app(); + app.sidebar_hover.sections.push(SidebarHoverSection { + content_area: Rect::new(60, 4, 20, 6), + lines: vec!["job row".to_string()], + rows: vec![hover_row(4, Some("/jobs show shell_x"))], + }); + + // Left of the sidebar (transcript area). + assert_eq!(sidebar_click_action(&app, left_click(10, 4)), None); + // Below the section's content area. + assert_eq!(sidebar_click_action(&app, left_click(65, 30)), None); + // Inside the section but on an empty row without metadata. + assert_eq!(sidebar_click_action(&app, left_click(65, 8)), None); + } +} diff --git a/crates/tui/src/tui/osc8.rs b/crates/tui/src/tui/osc8.rs index 156733be..b13e9834 100644 --- a/crates/tui/src/tui/osc8.rs +++ b/crates/tui/src/tui/osc8.rs @@ -21,6 +21,28 @@ use std::sync::atomic::{AtomicBool, Ordering}; const OSC8_PREFIX: &str = "\x1b]8;;"; const OSC8_TERMINATOR: &str = "\x1b\\"; +const OSC8_CLOSE: &str = "\x1b]8;;\x1b\\"; + +/// A contiguous run of cells on one terminal row that share a hyperlink target. +#[derive(Debug, Clone)] +pub struct LinkRegion { + pub row: u16, + pub col_start: u16, + pub col_end: u16, + pub target: String, +} + +/// Write an OSC 8 hyperlink open sequence for `target` to `w`. +pub fn write_osc8_open(w: &mut impl std::io::Write, target: &str) -> std::io::Result<()> { + w.write_all(OSC8_PREFIX.as_bytes())?; + w.write_all(target.as_bytes())?; + w.write_all(OSC8_TERMINATOR.as_bytes()) +} + +/// Write an OSC 8 hyperlink close sequence to `w`. +pub fn write_osc8_close(w: &mut impl std::io::Write) -> std::io::Result<()> { + w.write_all(OSC8_CLOSE.as_bytes()) +} /// Process-wide enable flag. `true` by default. Set once at app init from /// `[ui] osc8_links` (when present) and read by the renderer. @@ -38,6 +60,30 @@ pub fn enabled() -> bool { ENABLED.load(Ordering::Relaxed) } +// --- Thread-local link region accumulator (#3029) --- + +use std::cell::RefCell; + +thread_local! { + /// Link regions collected during the current render frame. + /// Populated by the render closure after scanning the ratatui buffer; + /// consumed and cleared by `ColorCompatBackend::draw()`. + pub static FRAME_LINKS: RefCell> = const { RefCell::new(Vec::new()) }; +} + +/// Replace the thread-local frame link buffer with `links`. +#[allow(dead_code)] // called from render closure (future integration) +pub fn set_frame_links(links: Vec) { + FRAME_LINKS.with(|cell| { + *cell.borrow_mut() = links; + }); +} + +/// Take the thread-local frame links, leaving an empty vec behind. +pub fn take_frame_links() -> Vec { + FRAME_LINKS.with(|cell| std::mem::take(&mut *cell.borrow_mut())) +} + /// Wrap `label` so it links to `target` in OSC 8-aware terminals. The returned /// string contains the full `\x1b]8;;TARGET\x1b\LABEL\x1b]8;;\x1b\` payload. /// diff --git a/crates/tui/src/tui/sidebar.rs b/crates/tui/src/tui/sidebar.rs index 65e10b9b..8d435f85 100644 --- a/crates/tui/src/tui/sidebar.rs +++ b/crates/tui/src/tui/sidebar.rs @@ -739,7 +739,29 @@ fn render_sidebar_work(f: &mut Frame, area: Rect, app: &mut App) { ); let full_texts = work_panel_hover_texts(&summary, content_width.max(1), usable_rows); - render_sidebar_section(f, area, "Work", lines, full_texts, app); + render_sidebar_section(f, area, "Work", lines, full_texts, Vec::new(), app); +} + +/// Click actions for one background job row pair (#3028). +/// +/// Returns `(show, detail)` where `show` opens the job and `detail` cancels +/// it while it is still running (finished jobs make the detail row a second +/// show target instead — cancel would only error). `shell_*` ids belong to +/// the shell job manager and route through `/jobs`; everything else routes +/// through `/task`. +fn background_task_click_actions(task: &TaskPanelEntry) -> (String, String) { + let namespace = if task.id.starts_with("shell_") { + "jobs" + } else { + "task" + }; + let show = format!("/{namespace} show {}", task.id); + let detail = if matches!(task.status.as_str(), "running" | "queued") { + format!("/{namespace} cancel {}", task.id) + } else { + show.clone() + }; + (show, detail) } fn render_sidebar_tasks(f: &mut Frame, area: Rect, app: &mut App) { @@ -749,10 +771,10 @@ fn render_sidebar_tasks(f: &mut Frame, area: Rect, app: &mut App) { let content_width = area.width.saturating_sub(4) as usize; let usable_rows = area.height.saturating_sub(3) as usize; - let lines = task_panel_lines(app, content_width.max(1), usable_rows.max(1)); + let (lines, row_actions) = task_panel_rows(app, content_width.max(1), usable_rows.max(1)); let full_texts = task_panel_hover_texts(app, usable_rows.max(1)); - render_sidebar_section(f, area, "Tasks", lines, full_texts, app); + render_sidebar_section(f, area, "Tasks", lines, full_texts, row_actions, app); } #[derive(Debug, Clone)] @@ -763,25 +785,39 @@ struct SidebarToolRow { duration_ms: Option, } +#[cfg(test)] fn task_panel_lines(app: &App, content_width: usize, max_rows: usize) -> Vec> { + task_panel_rows(app, content_width, max_rows).0 +} + +/// Build the Tasks panel lines together with a parallel per-line click-action +/// vector (#3028). Producing both in a single pass keeps the action indices +/// aligned with the rendered lines no matter how the layout evolves. +fn task_panel_rows( + app: &App, + content_width: usize, + max_rows: usize, +) -> (Vec>, Vec>) { let theme = &app.ui_theme; let mut lines: Vec> = Vec::with_capacity(max_rows.max(4)); + let mut actions: Vec> = Vec::with_capacity(max_rows.max(4)); - if let Some(turn_id) = app.runtime_turn_id.as_ref() { + if app.runtime_turn_id.is_some() { let status = app .runtime_turn_status .as_deref() .unwrap_or("unknown") .to_string(); - // Show enough of the turn id prefix to identify it for - // task_read / task_cancel. A UUID needs ~13 chars before the - // first hyphen; 16 chars gives a safe prefix for disambiguation. - let turn_prefix = truncate_line_to_width(turn_id, 16); + // #3030: Use a stable turn number ("Turn 1") instead of the raw + // UUID prefix. The full UUID is preserved in the hover text + // (task_panel_hover_texts) for inspection. + let turn_label = if app.turn_counter > 0 { + format!("Turn {} ({status})", app.turn_counter) + } else { + format!("Current turn ({status})") + }; lines.push(Line::from(Span::styled( - truncate_line_to_width( - &format!("turn {turn_prefix} ({status})",), - content_width.max(1), - ), + truncate_line_to_width(&turn_label, content_width.max(1)), Style::default().fg(theme.accent_primary), ))); } @@ -793,6 +829,9 @@ fn task_panel_lines(app: &App, content_width: usize, max_rows: usize) -> Vec Vec Vec Vec Vec Vec Vec { @@ -1765,7 +1812,7 @@ fn render_sidebar_subagents(f: &mut Frame, area: Rect, app: &mut App) { role_counts, }; let rows = sidebar_agent_rows(app); - let lines = subagent_panel_lines( + let (lines, row_actions) = subagent_panel_rows( &summary, &rows, content_width, @@ -1774,7 +1821,7 @@ fn render_sidebar_subagents(f: &mut Frame, area: Rect, app: &mut App) { ); let full_texts = subagent_panel_hover_texts(&summary, &rows, usable_rows.max(1)); - render_sidebar_section(f, area, "Agents", lines, full_texts, app); + render_sidebar_section(f, area, "Agents", lines, full_texts, row_actions, app); } /// Minimal projection of the data the sub-agent sidebar needs. Lifted out @@ -1834,9 +1881,17 @@ fn sidebar_agent_rows(app: &App) -> Vec { .map(summarize_tool_output) .filter(|summary| !summary.trim().is_empty()) }); + // #3030: Prefer the user-assigned nickname > stable label + // ("Agent 1") > raw name. Every spawned agent gets a label-map + // entry, so the generated label must not shadow nicknames. + let display_name = agent + .nickname + .clone() + .or_else(|| app.agent_label_map.get(&agent.agent_id).cloned()) + .unwrap_or_else(|| agent.name.clone()); SidebarAgentRow { id: agent.agent_id.clone(), - name: agent.nickname.clone().unwrap_or_else(|| agent.name.clone()), + name: display_name, role: agent.agent_type.as_str().to_string(), status: subagent_status_text(&agent.status).to_string(), git_branch: agent.git_branch.clone(), @@ -1856,17 +1911,25 @@ fn sidebar_agent_rows(app: &App) -> Vec { app.agent_progress .iter() .filter(|(id, _)| !cached_ids.contains(id.as_str())) - .map(|(id, progress)| SidebarAgentRow { - id: id.clone(), - name: id.clone(), - role: "agent".to_string(), - status: "running".to_string(), - git_branch: None, - progress: Some(progress.clone()), - steps_taken: 0, - duration_ms: app.agent_activity_started_at.map(|started| { - u64::try_from(started.elapsed().as_millis()).unwrap_or(u64::MAX) - }), + .map(|(id, progress)| { + // #3030: Prefer stable label for progress-only agents too. + let display_name = app + .agent_label_map + .get(id.as_str()) + .cloned() + .unwrap_or_else(|| id.clone()); + SidebarAgentRow { + id: id.clone(), + name: display_name, + role: "agent".to_string(), + status: "running".to_string(), + git_branch: None, + progress: Some(progress.clone()), + steps_taken: 0, + duration_ms: app.agent_activity_started_at.map(|started| { + u64::try_from(started.elapsed().as_millis()).unwrap_or(u64::MAX) + }), + } }), ); @@ -1885,6 +1948,7 @@ fn subagent_status_text(status: &SubAgentStatus) -> &'static str { /// Build sub-agent sidebar lines from summary + per-agent rows. Public /// for the snapshot tests in this module. +#[cfg(test)] pub fn subagent_panel_lines( summary: &SidebarSubagentSummary, rows: &[SidebarAgentRow], @@ -1892,7 +1956,21 @@ pub fn subagent_panel_lines( max_rows: usize, theme: &palette::UiTheme, ) -> Vec> { + subagent_panel_rows(summary, rows, content_width, max_rows, theme).0 +} + +/// Build the Agents panel lines together with a parallel per-line +/// click-action vector (#3028). Agent label rows open the agents view via +/// `/subagents`; header, role-mix, detail, and RLM lines are not clickable. +fn subagent_panel_rows( + summary: &SidebarSubagentSummary, + rows: &[SidebarAgentRow], + content_width: usize, + max_rows: usize, + theme: &palette::UiTheme, +) -> (Vec>, Vec>) { let mut lines: Vec> = Vec::with_capacity(max_rows.max(4)); + let mut actions: Vec> = Vec::with_capacity(max_rows.max(4)); let fanout_total = summary.fanout_total.unwrap_or(0); if summary.cached_total == 0 @@ -1904,7 +1982,8 @@ pub fn subagent_panel_lines( "No agents", Style::default().fg(theme.text_muted), ))); - return lines; + actions.push(None); + return (lines, actions); } let (live_running, total) = if let Some(total) = summary.fanout_total { @@ -1931,6 +2010,7 @@ pub fn subagent_panel_lines( )] }; lines.push(Line::from(header)); + actions.push(None); if !summary.role_counts.is_empty() { let mix: Vec = summary @@ -1943,6 +2023,7 @@ pub fn subagent_panel_lines( truncate_line_to_width(&role_line, content_width.max(1)), Style::default().fg(theme.text_dim), ))); + actions.push(None); } for row in rows { @@ -1955,6 +2036,7 @@ pub fn subagent_panel_lines( truncate_line_to_width(&label, content_width.max(1)), Style::default().fg(color), ))); + actions.push(Some("/subagents".to_string())); // Auto-collapse finished sub-agents: hide detail lines for completed // agents so the sidebar stays compact when work is done. @@ -1965,8 +2047,9 @@ pub fn subagent_panel_lines( if lines.len() >= max_rows { break; } + // #3030: keep raw agent ids out of the compact detail line — the + // full id remains available in the hover text. let mut detail_parts = Vec::new(); - detail_parts.push(truncate_line_to_width(&row.id, 10)); if row.steps_taken > 0 { detail_parts.push(format!("{} step(s)", row.steps_taken)); } @@ -1981,6 +2064,9 @@ pub fn subagent_panel_lines( if let Some(duration) = row.duration_ms { detail_parts.push(format_duration_ms(duration)); } + if detail_parts.is_empty() { + detail_parts.push(row.status.clone()); + } lines.push(Line::from(Span::styled( format!( " {}", @@ -1991,6 +2077,7 @@ pub fn subagent_panel_lines( ), Style::default().fg(theme.text_dim), ))); + actions.push(None); } if summary.foreground_rlm_running { @@ -2001,9 +2088,11 @@ pub fn subagent_panel_lines( Style::default().fg(theme.text_dim), ), ])); + actions.push(None); } - lines + debug_assert_eq!(lines.len(), actions.len()); + (lines, actions) } fn subagent_panel_hover_texts( @@ -2225,7 +2314,7 @@ fn render_context_panel(f: &mut Frame, area: Rect, app: &mut App) { ))); } - render_sidebar_section(f, area, "Session", lines, Vec::new(), app); + render_sidebar_section(f, area, "Session", lines, Vec::new(), Vec::new(), app); } fn spans_to_text(spans: &[Span<'_>]) -> String { @@ -2242,6 +2331,7 @@ fn render_sidebar_section( title: &str, lines: Vec>, full_texts: Vec, + row_actions: Vec>, app: &mut App, ) { if area.width < 4 || area.height < 3 { @@ -2277,7 +2367,7 @@ fn render_sidebar_section( .unwrap_or_else(|| display.clone()) }) .collect(); - let rows = sidebar_hover_rows(content_area, &display_texts, &hover_texts); + let rows = sidebar_hover_rows(content_area, &display_texts, &hover_texts, &row_actions); app.sidebar_hover.sections.push(SidebarHoverSection { content_area, lines: hover_texts, @@ -2325,6 +2415,7 @@ fn sidebar_hover_rows( content_area: Rect, display_texts: &[String], hover_texts: &[String], + row_actions: &[Option], ) -> Vec { display_texts .iter() @@ -2334,6 +2425,7 @@ fn sidebar_hover_rows( let row_y = content_area.y.saturating_add(idx as u16); let display_width = unicode_width::UnicodeWidthStr::width(display_text.as_str()); let full_width = unicode_width::UnicodeWidthStr::width(full_text.as_str()); + let click_action = row_actions.get(idx).and_then(|a| a.clone()); SidebarHoverRow { row_y, display_text: display_text.clone(), @@ -2342,6 +2434,7 @@ fn sidebar_hover_rows( is_truncated: display_width > content_area.width as usize || full_width > content_area.width as usize || display_text != full_text, + click_action, } }) .collect() @@ -2355,7 +2448,8 @@ mod tests { SidebarSubagentSummary, SidebarToolRow, SidebarWorkChecklistItem, SidebarWorkStrategyStep, SidebarWorkSummary, ToolRowOrder, auto_sidebar_panels, editorial_tool_rows, normalize_activity_text, sidebar_hover_rows, sidebar_work_summary, - subagent_panel_hover_texts, subagent_panel_lines, task_panel_lines, work_panel_empty_hint, + subagent_panel_hover_texts, subagent_panel_lines, subagent_panel_rows, + task_panel_hover_texts, task_panel_lines, task_panel_rows, work_panel_empty_hint, work_panel_hover_texts, work_panel_lines, }; use crate::config::Config; @@ -2969,6 +3063,249 @@ mod tests { ); } + #[test] + fn task_panel_actions_make_single_background_job_clickable() { + let mut app = create_test_app(); + app.task_panel.push(TaskPanelEntry { + id: "shell_only".to_string(), + status: "running".to_string(), + prompt_summary: "shell: cargo build".to_string(), + duration_ms: Some(1_000), + }); + + let (lines, actions) = task_panel_rows(&app, 80, 12); + let text = lines_to_text(&lines); + assert_eq!(lines.len(), actions.len()); + + let label_idx = text + .iter() + .position(|line| line.contains("cargo build")) + .expect("background job label row"); + assert_eq!( + actions[label_idx].as_deref(), + Some("/jobs show shell_only"), + "single-job label row must be clickable: {actions:?}" + ); + assert_eq!( + actions[label_idx + 1].as_deref(), + Some("/jobs cancel shell_only"), + "single-job detail row must cancel that job: {actions:?}" + ); + } + + #[test] + fn task_panel_actions_route_each_job_to_its_own_id() { + let mut app = create_test_app(); + app.task_panel.push(TaskPanelEntry { + id: "shell_aaa".to_string(), + status: "running".to_string(), + prompt_summary: "shell: cargo test --workspace".to_string(), + duration_ms: Some(2_000), + }); + app.task_panel.push(TaskPanelEntry { + id: "task_bbb".to_string(), + status: "running".to_string(), + prompt_summary: "summarize the release notes".to_string(), + duration_ms: Some(3_000), + }); + + let (lines, actions) = task_panel_rows(&app, 96, 16); + let text = lines_to_text(&lines); + assert_eq!(lines.len(), actions.len()); + + let header_idx = text + .iter() + .position(|line| line.starts_with("Background commands")) + .expect("background header row"); + assert!(actions[header_idx].is_none(), "header is not clickable"); + + let shell_idx = text + .iter() + .position(|line| line.contains("cargo test --workspace")) + .expect("shell job label row"); + assert_eq!( + actions[shell_idx].as_deref(), + Some("/jobs show shell_aaa"), + "shell jobs route through /jobs: {actions:?}" + ); + assert_eq!( + actions[shell_idx + 1].as_deref(), + Some("/jobs cancel shell_aaa"), + "shell job detail row cancels the SAME job: {actions:?}" + ); + + let task_idx = text + .iter() + .position(|line| line.contains("task_bbb")) + .expect("task job label row"); + assert_eq!( + actions[task_idx].as_deref(), + Some("/task show task_bbb"), + "task-manager jobs route through /task: {actions:?}" + ); + assert_eq!( + actions[task_idx + 1].as_deref(), + Some("/task cancel task_bbb"), + "task job detail row cancels the SAME job: {actions:?}" + ); + + let hint_idx = text + .iter() + .position(|line| line.contains("Ctrl+K")) + .expect("cancel-all hint row"); + assert_eq!(actions[hint_idx].as_deref(), Some("/jobs cancel-all")); + } + + #[test] + fn task_panel_finished_job_detail_row_shows_instead_of_cancels() { + let mut app = create_test_app(); + app.task_panel.push(TaskPanelEntry { + id: "shell_done".to_string(), + status: "completed".to_string(), + prompt_summary: "shell: cargo fmt".to_string(), + duration_ms: Some(500), + }); + + let (lines, actions) = task_panel_rows(&app, 80, 12); + let text = lines_to_text(&lines); + + let label_idx = text + .iter() + .position(|line| line.contains("cargo fmt")) + .expect("completed job label row"); + assert_eq!(actions[label_idx].as_deref(), Some("/jobs show shell_done")); + assert_eq!( + actions[label_idx + 1].as_deref(), + Some("/jobs show shell_done"), + "finished jobs must not expose a cancel click target: {actions:?}" + ); + } + + #[test] + fn task_panel_actions_align_with_lines_when_live_tools_present() { + let mut app = create_test_app(); + app.runtime_turn_id = Some("0196f0a3-aaaa-bbbb-cccc-ddddeeee0000".to_string()); + let mut active = ActiveCell::new(); + active.push_tool( + "shell-1", + HistoryCell::Tool(ToolCell::Exec(ExecCell { + command: "sleep 600".to_string(), + status: ToolStatus::Running, + output: None, + live_output: None, + shell_task_id: None, + started_at: Some(Instant::now()), + duration_ms: None, + source: ExecSource::Assistant, + interaction: None, + output_summary: None, + })), + ); + app.active_cell = Some(active); + app.task_panel.push(TaskPanelEntry { + id: "task_q".to_string(), + status: "running".to_string(), + prompt_summary: "investigate flaky test".to_string(), + duration_ms: Some(9_000), + }); + + let (lines, actions) = task_panel_rows(&app, 96, 16); + let text = lines_to_text(&lines); + assert_eq!( + lines.len(), + actions.len(), + "actions must stay index-aligned with lines: {text:?}" + ); + + // Turn label and live-tool rows are not clickable. + assert!(actions[0].is_none(), "turn label row has no action"); + let live_idx = text + .iter() + .position(|line| line == "Live tools") + .expect("live tools header"); + assert!(actions[live_idx].is_none()); + + let task_idx = text + .iter() + .position(|line| line.contains("task_q")) + .expect("background job label row"); + assert_eq!(actions[task_idx].as_deref(), Some("/task show task_q")); + } + + #[test] + fn subagent_panel_actions_mark_agent_rows_with_role_mix_header() { + let mut role_counts = std::collections::BTreeMap::new(); + role_counts.insert("worker".to_string(), 1); + let summary = SidebarSubagentSummary { + cached_total: 1, + cached_running: 1, + role_counts, + ..SidebarSubagentSummary::default() + }; + let rows = vec![SidebarAgentRow { + id: "agent_0123456789".to_string(), + name: "investigator".to_string(), + role: "worker".to_string(), + status: "running".to_string(), + git_branch: None, + progress: Some("scanning".to_string()), + steps_taken: 2, + duration_ms: Some(1_000), + }]; + + let (lines, actions) = subagent_panel_rows(&summary, &rows, 48, 8, &palette::UI_THEME); + let text = lines_to_text(&lines); + assert_eq!(lines.len(), actions.len()); + + assert!(actions[0].is_none(), "count header has no action"); + assert!(actions[1].is_none(), "role-mix header has no action"); + let agent_idx = text + .iter() + .position(|line| line.contains("investigator")) + .expect("agent label row"); + assert_eq!(actions[agent_idx].as_deref(), Some("/subagents")); + assert!( + actions[agent_idx + 1].is_none(), + "agent detail row has no action" + ); + } + + #[test] + fn subagent_panel_actions_skip_role_mix_slot_for_progress_only_agents() { + // Progress-only agents have no cached role counts, so there is no + // role-mix line — the first agent row sits directly under the count + // header and must still resolve to /subagents (#3028 audit fix). + let summary = SidebarSubagentSummary { + progress_only_count: 1, + ..SidebarSubagentSummary::default() + }; + let rows = vec![SidebarAgentRow { + id: "agent_fedcba987654".to_string(), + name: "scout".to_string(), + role: "explorer".to_string(), + status: "running".to_string(), + git_branch: None, + progress: Some("reading".to_string()), + steps_taken: 1, + duration_ms: None, + }]; + + let (lines, actions) = subagent_panel_rows(&summary, &rows, 48, 8, &palette::UI_THEME); + let text = lines_to_text(&lines); + assert_eq!(lines.len(), actions.len()); + + assert!(actions[0].is_none(), "count header has no action"); + let agent_idx = text + .iter() + .position(|line| line.contains("scout")) + .expect("agent label row"); + assert_eq!( + agent_idx, 1, + "no role-mix line should be emitted without role counts: {text:?}" + ); + assert_eq!(actions[agent_idx].as_deref(), Some("/subagents")); + } + #[test] fn tasks_panel_collapses_repeated_low_value_recent_tools_after_failures() { let mut app = create_test_app(); @@ -3468,7 +3805,7 @@ mod tests { use ratatui::layout::Rect; let display = vec!["[~] agent imple…".to_string()]; let full = vec!["[~] agent implementation-worker-for-sidebar-detail-popover".to_string()]; - let rows = sidebar_hover_rows(Rect::new(62, 5, 16, 4), &display, &full); + let rows = sidebar_hover_rows(Rect::new(62, 5, 16, 4), &display, &full, &[]); let expected = SidebarHoverRow { row_y: 5, @@ -3476,6 +3813,7 @@ mod tests { full_text: full[0].clone(), detail: None, is_truncated: true, + click_action: None, }; assert_eq!(rows, vec![expected]); } @@ -3514,4 +3852,104 @@ mod tests { "hover text should include the full progress before popover wrapping: {hover:?}" ); } + + // ── #3030: stable labels instead of raw internal ids ─────────────────── + + #[test] + fn tasks_panel_shows_stable_turn_label_not_uuid() { + let mut app = create_test_app(); + app.runtime_turn_id = Some("0196f0a3-1111-2222-3333-444455556666".to_string()); + app.runtime_turn_status = Some("in_progress".to_string()); + app.turn_counter = 3; + + let text = lines_to_text(&task_panel_lines(&app, 64, 8)); + assert!( + text[0].contains("Turn 3 (in_progress)"), + "compact row must show the stable turn label: {text:?}" + ); + assert!( + !text[0].contains("0196f0a3"), + "raw turn UUID must stay out of the compact row: {text:?}" + ); + + let hover = task_panel_hover_texts(&app, 8); + assert!( + hover[0].contains("0196f0a3-1111-2222-3333-444455556666"), + "full turn UUID must remain available in hover text: {hover:?}" + ); + } + + #[test] + fn tasks_panel_turn_label_falls_back_before_first_counted_turn() { + let mut app = create_test_app(); + app.runtime_turn_id = Some("0196f0a3-1111-2222-3333-444455556666".to_string()); + app.runtime_turn_status = Some("in_progress".to_string()); + app.turn_counter = 0; + + let text = lines_to_text(&task_panel_lines(&app, 64, 8)); + assert!( + text[0].contains("Current turn (in_progress)"), + "zero counter falls back to a generic label: {text:?}" + ); + } + + #[test] + fn ensure_agent_label_assigns_stable_sequential_labels() { + let mut app = create_test_app(); + assert_eq!(app.ensure_agent_label("agent_aaa111"), "Agent 1"); + assert_eq!(app.ensure_agent_label("agent_bbb222"), "Agent 2"); + // Re-seeing a known agent keeps its original label. + assert_eq!(app.ensure_agent_label("agent_aaa111"), "Agent 1"); + assert_eq!(app.agent_counter, 2); + // Read-only lookup falls back to the raw id for unknown agents. + assert_eq!(app.agent_display_label("agent_bbb222"), "Agent 2"); + assert_eq!(app.agent_display_label("agent_zzz999"), "agent_zzz999"); + } + + fn cached_agent( + agent_id: &str, + nickname: Option<&str>, + ) -> crate::tools::subagent::SubAgentResult { + crate::tools::subagent::SubAgentResult { + name: "implementation-worker".to_string(), + agent_id: agent_id.to_string(), + context_mode: "fresh".to_string(), + fork_context: false, + workspace: None, + git_branch: None, + agent_type: crate::tools::subagent::SubAgentType::General, + assignment: crate::tools::subagent::SubAgentAssignment { + objective: "task".to_string(), + role: Some("worker".to_string()), + }, + model: String::new(), + nickname: nickname.map(str::to_string), + status: crate::tools::subagent::SubAgentStatus::Running, + result: None, + steps_taken: 1, + checkpoint: None, + duration_ms: 100, + from_prior_session: false, + } + } + + #[test] + fn sidebar_agent_rows_prefer_nickname_over_generated_label() { + let mut app = create_test_app(); + let agent_id = "agent_cafe0123"; + app.ensure_agent_label(agent_id); + app.subagent_cache + .push(cached_agent(agent_id, Some("doc-fixer"))); + + let rows = super::sidebar_agent_rows(&app); + assert_eq!( + rows[0].name, "doc-fixer", + "user nickname must beat the generated Agent-N label" + ); + + // Without a nickname the generated label is used. + app.subagent_cache[0].nickname = None; + let rows = super::sidebar_agent_rows(&app); + assert_eq!(rows[0].name, "Agent 1"); + } } diff --git a/crates/tui/src/tui/tool_routing.rs b/crates/tui/src/tui/tool_routing.rs index 2981e59b..e7f8ee81 100644 --- a/crates/tui/src/tui/tool_routing.rs +++ b/crates/tui/src/tui/tool_routing.rs @@ -449,6 +449,19 @@ fn record_spillover_artifact_if_any( )); } +/// #3031: shell/tasks tools embed the literal `"(no output)"` into successful +/// `ToolResult` content (the model-facing transcript needs a non-empty tool +/// result). Treat it as no output on the TUI side so the compact-mode +/// suppression gate in `history.rs` actually fires; the raw content remains +/// available through the tool-detail store. +fn visible_tool_output(content: &str) -> Option { + if content.trim() == "(no output)" { + None + } else { + Some(content.to_string()) + } +} + pub(super) fn handle_tool_call_complete( app: &mut App, id: &str, @@ -549,9 +562,11 @@ pub(super) fn handle_tool_call_complete( .and_then(|m| m.get("duration_ms")) .and_then(serde_json::Value::as_u64); if status != ToolStatus::Running && exec.interaction.is_none() { - exec.output = Some(tool_result.content.clone()); - exec.output_summary = - Some(super::history::summarize_tool_output(&tool_result.content)); + exec.output = visible_tool_output(&tool_result.content); + exec.output_summary = exec + .output + .as_deref() + .map(super::history::summarize_tool_output); exec.live_output = None; } else if status == ToolStatus::Running && exec.interaction.is_none() @@ -642,8 +657,9 @@ pub(super) fn handle_tool_call_complete( generic.status = status; match result.as_ref() { Ok(tool_result) => { - generic.output = Some(tool_result.content.clone()); - generic.output_summary = Some(summarize_tool_output(&tool_result.content)); + generic.output = visible_tool_output(&tool_result.content); + generic.output_summary = + generic.output.as_deref().map(summarize_tool_output); generic.is_diff = output_looks_like_diff(&tool_result.content); } Err(err) => { @@ -1273,4 +1289,64 @@ mod tests { assert_eq!(snapshot.items[0].step, "render all fields"); assert_eq!(snapshot.items[0].status, StepStatus::Pending); } + + // ── #3031: "(no output)" placeholder must not defeat compact rendering ─ + + #[test] + fn visible_tool_output_maps_no_output_placeholder_to_none() { + assert_eq!(visible_tool_output("(no output)"), None); + assert_eq!(visible_tool_output(" (no output)\n"), None); + } + + #[test] + fn visible_tool_output_preserves_real_content() { + assert_eq!( + visible_tool_output("compiled 3 crates").as_deref(), + Some("compiled 3 crates") + ); + // Output that merely CONTAINS the placeholder is real output. + assert_eq!( + visible_tool_output("step 1: (no output) — continuing").as_deref(), + Some("step 1: (no output) — continuing") + ); + assert_eq!(visible_tool_output("").as_deref(), Some("")); + } + + #[test] + fn exec_cell_without_output_suppresses_placeholder_in_live_mode() { + use crate::tui::history::{ExecCell, ExecSource, ToolCell, ToolStatus}; + + let cell = ToolCell::Exec(ExecCell { + command: "true".to_string(), + status: ToolStatus::Success, + output: None, + live_output: None, + shell_task_id: None, + started_at: None, + duration_ms: Some(120), + source: ExecSource::Assistant, + interaction: None, + output_summary: None, + }); + + let live: String = cell + .lines(80) + .iter() + .flat_map(|line| line.spans.iter().map(|s| s.content.to_string())) + .collect(); + assert!( + !live.contains("(no output)"), + "Live mode must suppress the placeholder: {live:?}" + ); + + let transcript: String = cell + .transcript_lines(80) + .iter() + .flat_map(|line| line.spans.iter().map(|s| s.content.to_string())) + .collect(); + assert!( + transcript.contains("(no output)"), + "Transcript mode still records the placeholder: {transcript:?}" + ); + } } diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs index 3481d43e..c7614f8a 100644 --- a/crates/tui/src/tui/ui.rs +++ b/crates/tui/src/tui/ui.rs @@ -134,7 +134,7 @@ use super::slash_menu::{ apply_slash_menu_selection, partial_inline_skill_mention_at_cursor, try_autocomplete_slash_command, visible_slash_menu_entries, }; -use super::views::{ConfigView, HelpView, ModalKind, ShellControlView, ViewEvent}; +use super::views::{ConfigView, HelpView, ModalKind, ViewEvent}; use super::widgets::pending_input_preview::{ContextPreviewItem, PendingInputPreview}; use super::widgets::{ChatWidget, ComposerWidget, HeaderData, HeaderWidget, Renderable}; @@ -894,6 +894,7 @@ fn build_engine_config(app: &App, config: &Config) -> EngineConfig { ), max_spawn_depth: crate::tools::subagent::DEFAULT_MAX_SPAWN_DEPTH, allowed_tools: app.active_allowed_tools.clone(), + disallowed_tools: None, hook_executor: app.runtime_services.hook_executor.clone(), network_policy: config.network.clone().map(|toml_cfg| { crate::network_policy::NetworkPolicyDecider::with_default_audit(toml_cfg.into_runtime()) @@ -1430,6 +1431,11 @@ async fn run_event_loop( break; } }; + // #3033: remember whether an EARLIER event in this drain batch + // already requested a redraw. The AgentProgress throttle below + // may opt the current event out of repainting, but it must not + // cancel redraws owed to other events in the same batch. + let redraw_requested_before_event = received_engine_event; received_engine_event = true; if app.suppress_stream_events_until_turn_complete { if matches!(event, EngineEvent::TurnStarted { .. }) { @@ -1791,6 +1797,7 @@ async fn run_event_loop( } app.runtime_turn_id = Some(turn_id); app.runtime_turn_status = Some("in_progress".to_string()); + app.turn_counter = app.turn_counter.saturating_add(1); app.reasoning_buffer.clear(); app.reasoning_header = None; app.last_reasoning = None; @@ -2301,8 +2308,10 @@ async fn run_event_loop( if app.agent_activity_started_at.is_none() { app.agent_activity_started_at = Some(Instant::now()); } - app.status_message = - Some(format!("Sub-agent {id} starting: {prompt_summary}")); + // #3030: Assign a stable user-facing label for this + // agent and keep the raw id out of the status bar. + let label = app.ensure_agent_label(&id); + app.status_message = Some(format!("{label} starting: {prompt_summary}")); let _ = engine_handle.send(Op::ListSubAgents).await; } EngineEvent::AgentProgress { id, status } => { @@ -2317,7 +2326,27 @@ async fn run_event_loop( if app.agent_activity_started_at.is_none() { app.agent_activity_started_at = Some(Instant::now()); } - app.status_message = Some(format!("Sub-agent {id}: {display}")); + // #3030: progress can arrive before AgentSpawned is + // observed — assign the stable label on first sight. + let label = app.ensure_agent_label(&id); + app.status_message = Some(format!("{label}: {display}")); + // #3033: Throttle redraws from rapid AgentProgress events. + // When 4+ sub-agents are running concurrently, each firing + // progress events, the per-event `needs_redraw = true` saturates + // the render loop and starves terminal input. Limit + // progress-driven repaints to at most one per 100ms; the + // status-animation timer (80ms cadence) provides a guaranteed + // floor for sidebar updates. Data is still recorded immediately; + // the sidebar picks it up on the next permitted redraw. + if !agent_progress_redraw_permitted( + &mut app.last_agent_progress_redraw, + Instant::now(), + ) { + // Restore the pre-event accumulator value: a + // throttled progress event contributes no redraw of + // its own, but earlier events' redraws survive. + received_engine_event = redraw_requested_before_event; + } } EngineEvent::AgentComplete { id, result } => { execute_subagent_observer_hook( @@ -2339,8 +2368,10 @@ async fn run_event_loop( && matches!(agent.status, SubAgentStatus::Running) }); app.agent_progress.remove(&id); + // #3030: stable label with raw-id fallback. + let label = app.agent_display_label(&id); app.status_message = Some(format!( - "Sub-agent {id} completed: {}", + "{label} completed: {}", summarize_tool_output(&result) )); let should_recapture_terminal = @@ -2554,7 +2585,8 @@ async fn run_event_loop( blocked_network, blocked_write, ); - app.view_stack.push(ElevationView::new(request)); + app.view_stack + .push(ElevationView::new(request, app.ui_locale)); if let Some((method, _, _)) = crate::tui::notifications::settings(config) { @@ -3340,7 +3372,12 @@ async fn run_event_loop( && key.modifiers.contains(KeyModifiers::CONTROL) && app.view_stack.is_empty() { - open_shell_control(app); + // #3032: Ctrl+B directly backgrounds the active foreground + // shell command instead of opening a two-step shell-control + // menu. When nothing is backgroundable, the status message + // tells the user what's going on. + request_foreground_shell_background(app); + app.needs_redraw = true; continue; } @@ -4650,6 +4687,21 @@ fn reconcile_turn_liveness(app: &mut App, now: Instant, has_running_agents: bool false } +/// #3033: gate progress-driven repaints to at most one per 100ms. +/// +/// Returns whether the current `AgentProgress` event may request a redraw, +/// updating the last-redraw timestamp when it may. Data updates are never +/// throttled — only the repaint request is. +fn agent_progress_redraw_permitted(last_redraw: &mut Option, now: Instant) -> bool { + match *last_redraw { + Some(last) if now.duration_since(last) < Duration::from_millis(100) => false, + _ => { + *last_redraw = Some(now); + true + } + } +} + fn recover_engine_event_disconnect(app: &mut App) -> bool { let had_live_work = app.is_loading || app.is_compacting @@ -6469,7 +6521,10 @@ fn spawn_external_url_command(mut command: Command) -> Result<()> { fn apply_workspace_runtime_state(app: &mut App, config: &Config, workspace: PathBuf) { app.workspace = workspace.clone(); - app.hooks = HookExecutor::new(config.hooks_config(), workspace.clone()); + app.hooks = HookExecutor::new( + crate::hooks::HooksConfig::load_with_project(config.hooks_config(), &workspace), + workspace.clone(), + ); app.skills_dir = crate::tui::app::resolve_skills_dir(&workspace, &config.skills_dir(), config); app.refresh_skill_cache(); app.workspace_context = None; @@ -7919,15 +7974,6 @@ async fn handle_view_events( ViewEvent::ContextMenuSelected { action } => { handle_context_menu_action(app, action); } - ViewEvent::ShellControlBackground => { - request_foreground_shell_background(app); - } - ViewEvent::ShellControlCancel => { - app.backtrack.reset(); - engine_handle.cancel(); - mark_active_turn_cancelled_locally(app); - app.status_message = Some("Request cancelled".to_string()); - } } } @@ -8774,19 +8820,29 @@ fn render_toast_stack_overlay( } } -pub(crate) fn open_shell_control(app: &mut App) { - if !app.is_loading || !active_foreground_shell_running(app) { - app.status_message = Some("No foreground shell command to control".to_string()); +pub(crate) fn request_foreground_shell_background(app: &mut App) { + if !app.is_loading { + app.status_message = Some("No foreground shell command to background".to_string()); return; } - - app.view_stack.push(ShellControlView::new()); - app.status_message = Some("Shell control opened".to_string()); -} - -pub(crate) fn request_foreground_shell_background(app: &mut App) { - if !app.is_loading || !active_foreground_shell_running(app) { - app.status_message = Some("No foreground shell command to background".to_string()); + if !active_foreground_shell_running(app) { + // #3032 AC3: name the reason backgrounding is unavailable — + // interactive execs and non-shell blocking tools are visibly running + // but cannot be detached, and a generic shrug reads like a bug. + let reason = if terminal_pause_has_live_owner(app) { + "the running command is interactive" + } else if app + .active_cell + .as_ref() + .is_some_and(|active| !active.is_empty()) + { + "the running tool is not a foreground shell command" + } else { + "no foreground shell command is running" + }; + app.status_message = Some(format!( + "Cannot background: {reason}. Press Ctrl+C to cancel the turn, or wait for completion." + )); return; } diff --git a/crates/tui/src/tui/ui/tests.rs b/crates/tui/src/tui/ui/tests.rs index 832a8f9c..2172e432 100644 --- a/crates/tui/src/tui/ui/tests.rs +++ b/crates/tui/src/tui/ui/tests.rs @@ -9303,3 +9303,64 @@ mod work_sidebar_projection_tests { assert_ne!(entry.status, "running"); } } + +// ── #3033: AgentProgress redraw throttle ─────────────────────────────────── + +#[test] +fn agent_progress_redraw_throttle_permits_first_and_spaced_events() { + let mut last_redraw = None; + let t0 = Instant::now(); + + assert!( + agent_progress_redraw_permitted(&mut last_redraw, t0), + "first progress event always repaints" + ); + assert!( + !agent_progress_redraw_permitted(&mut last_redraw, t0 + Duration::from_millis(50)), + "events inside the 100ms window are throttled" + ); + assert!( + !agent_progress_redraw_permitted(&mut last_redraw, t0 + Duration::from_millis(99)), + "throttled events must not advance the window" + ); + assert!( + agent_progress_redraw_permitted(&mut last_redraw, t0 + Duration::from_millis(150)), + "events past the window repaint again" + ); +} + +#[test] +fn throttled_progress_event_does_not_cancel_other_events_redraw() { + // Repro for the #3033 audit finding: `received_engine_event` is a shared + // accumulator for the whole drain batch. A throttled AgentProgress event + // must restore the PRE-EVENT value instead of clearing the flag, so + // redraws owed to other events (AgentSpawned, AgentList, cross-agent + // AgentComplete...) survive. + let t0 = Instant::now(); + let mut last_redraw = Some(t0); + + // Batch: AgentSpawned (requests redraw), then a throttled AgentProgress. + let mut received_engine_event = true; // AgentSpawned drained + let redraw_requested_before_event = received_engine_event; + received_engine_event = true; // AgentProgress drained + if !agent_progress_redraw_permitted(&mut last_redraw, t0 + Duration::from_millis(10)) { + received_engine_event = redraw_requested_before_event; + } + assert!( + received_engine_event, + "redraw owed to AgentSpawned must survive a throttled progress event" + ); + + // Same batch shape but with NO earlier redraw-worthy event: the lone + // throttled progress event contributes nothing. + let mut received_engine_event = false; + let redraw_requested_before_event = received_engine_event; + received_engine_event = true; // AgentProgress drained + if !agent_progress_redraw_permitted(&mut last_redraw, t0 + Duration::from_millis(20)) { + received_engine_event = redraw_requested_before_event; + } + assert!( + !received_engine_event, + "a lone throttled progress event must not trigger a repaint" + ); +} diff --git a/crates/tui/src/tui/views/mod.rs b/crates/tui/src/tui/views/mod.rs index d69ccf3f..ea8fafd5 100644 --- a/crates/tui/src/tui/views/mod.rs +++ b/crates/tui/src/tui/views/mod.rs @@ -38,7 +38,6 @@ pub enum ModalKind { FeedbackPicker, ThemePicker, ContextMenu, - ShellControl, } #[derive(Debug, Clone)] @@ -195,8 +194,6 @@ pub enum ViewEvent { ContextMenuSelected { action: ContextMenuAction, }, - ShellControlBackground, - ShellControlCancel, /// Emitted by the pager (`c` / `y`) to copy its body to the system /// clipboard. The host handler writes via `app.clipboard` and surfaces a /// status message — modal views cannot reach `app` directly. `label` is @@ -363,142 +360,6 @@ impl fmt::Debug for ViewStack { } } -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum ShellControlChoice { - Background, - Cancel, -} - -impl ShellControlChoice { - fn event(self) -> ViewEvent { - match self { - ShellControlChoice::Background => ViewEvent::ShellControlBackground, - ShellControlChoice::Cancel => ViewEvent::ShellControlCancel, - } - } -} - -pub struct ShellControlView { - selected: ShellControlChoice, -} - -impl ShellControlView { - pub fn new() -> Self { - Self { - selected: ShellControlChoice::Background, - } - } - - fn toggle(&mut self) { - self.selected = match self.selected { - ShellControlChoice::Background => ShellControlChoice::Cancel, - ShellControlChoice::Cancel => ShellControlChoice::Background, - }; - } -} - -impl ModalView for ShellControlView { - fn kind(&self) -> ModalKind { - ModalKind::ShellControl - } - - fn as_any_mut(&mut self) -> &mut dyn std::any::Any { - self - } - - fn handle_key(&mut self, key: KeyEvent) -> ViewAction { - match key.code { - KeyCode::Esc | KeyCode::Char('q') | KeyCode::Char('Q') => ViewAction::Close, - KeyCode::Up | KeyCode::Down | KeyCode::Left | KeyCode::Right | KeyCode::Tab => { - self.toggle(); - ViewAction::None - } - KeyCode::Char('b') | KeyCode::Char('B') => { - ViewAction::EmitAndClose(ViewEvent::ShellControlBackground) - } - KeyCode::Char('c') | KeyCode::Char('C') => { - ViewAction::EmitAndClose(ViewEvent::ShellControlCancel) - } - KeyCode::Enter => ViewAction::EmitAndClose(self.selected.event()), - _ => ViewAction::None, - } - } - - fn render(&self, area: Rect, buf: &mut Buffer) { - use ratatui::{ - style::Style, - text::{Line, Span}, - widgets::{Block, Borders, Clear, Padding, Paragraph, Widget}, - }; - - let popup_width = 62.min(area.width.saturating_sub(4)); - let popup_height = 11.min(area.height.saturating_sub(2)); - - let popup_area = Rect { - x: (area.width - popup_width) / 2, - y: (area.height - popup_height) / 2, - width: popup_width, - height: popup_height, - }; - - Clear.render(popup_area, buf); - - let option_line = |choice: ShellControlChoice, key: &'static str, label: &'static str| { - let selected = self.selected == choice; - let style = if selected { - Style::default() - .fg(palette::SELECTION_TEXT) - .bg(palette::SELECTION_BG) - } else { - Style::default().fg(palette::TEXT_PRIMARY) - }; - Line::from(vec![ - Span::styled(if selected { "> " } else { " " }, style), - Span::styled(format!("{key:<3}"), style.bold()), - Span::styled(label, style), - ]) - }; - - let lines = vec![ - Line::from(Span::styled( - "Foreground shell command is still running.", - Style::default().fg(palette::TEXT_PRIMARY), - )), - Line::from(""), - option_line( - ShellControlChoice::Background, - "B", - "Background - detach and keep the command running", - ), - option_line( - ShellControlChoice::Cancel, - "C", - "Cancel - stop the command and interrupt this turn", - ), - ]; - - let view = Paragraph::new(lines) - .block( - Block::default() - .title(Line::from(vec![Span::styled( - " Shell command ", - Style::default().fg(palette::DEEPSEEK_BLUE).bold(), - )])) - .title_bottom(Line::from(Span::styled( - " Enter select | Esc close ", - Style::default().fg(palette::TEXT_MUTED), - ))) - .borders(Borders::ALL) - .border_style(Style::default().fg(palette::BORDER_COLOR)) - .style(Style::default().bg(palette::DEEPSEEK_INK)) - .padding(Padding::uniform(1)), - ) - .style(Style::default().fg(palette::TEXT_PRIMARY)); - - view.render(popup_area, buf); - } -} - #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum ConfigScope { Session, @@ -2174,8 +2035,8 @@ fn truncate_view_text(text: &str, max_chars: usize) -> String { #[cfg(test)] mod tests { use super::{ - ConfigListItem, ConfigSection, ConfigView, ModalKind, ModalView, ShellControlView, - ViewAction, ViewEvent, ViewStack, subagent_view_agents, truncate_view_text, + ConfigListItem, ConfigSection, ConfigView, HelpView, ModalKind, ModalView, ViewAction, + ViewEvent, ViewStack, subagent_view_agents, truncate_view_text, }; use crate::config::Config; use crate::localization::Locale; @@ -2811,30 +2672,6 @@ base_url = "https://api.xiaomimimo.com/v1" assert_eq!(view.status.as_deref(), Some("Edit cancelled")); } - #[test] - fn shell_control_view_defaults_to_background() { - let mut view = ShellControlView::new(); - - let action = view.handle_key(KeyEvent::new(KeyCode::Enter, KeyModifiers::NONE)); - - assert!(matches!( - action, - ViewAction::EmitAndClose(ViewEvent::ShellControlBackground) - )); - } - - #[test] - fn shell_control_view_can_select_cancel() { - let mut view = ShellControlView::new(); - - let action = view.handle_key(KeyEvent::new(KeyCode::Char('c'), KeyModifiers::NONE)); - - assert!(matches!( - action, - ViewAction::EmitAndClose(ViewEvent::ShellControlCancel) - )); - } - /// A modal that doesn't override `handle_paste` must report /// "not consumed" so the host can fall through to the composer. /// Regression: views/mod.rs previously inverted the boolean, swallowing @@ -2842,9 +2679,9 @@ base_url = "https://api.xiaomimimo.com/v1" #[test] fn default_modal_does_not_consume_paste() { let mut stack = ViewStack::new(); - stack.push(ShellControlView::new()); + stack.push(HelpView::new_for_locale(crate::localization::Locale::En)); assert!(!stack.handle_paste("hello")); - assert_eq!(stack.top_kind(), Some(ModalKind::ShellControl)); + assert_eq!(stack.top_kind(), Some(ModalKind::Help)); } fn buffer_text(buf: &Buffer, area: Rect) -> String { diff --git a/crates/tui/src/tui/widgets/mod.rs b/crates/tui/src/tui/widgets/mod.rs index 3ae1475a..e80eeccc 100644 --- a/crates/tui/src/tui/widgets/mod.rs +++ b/crates/tui/src/tui/widgets/mod.rs @@ -1615,16 +1615,24 @@ fn option_abort(locale: Locale) -> &'static str { pub struct ElevationWidget<'a> { request: &'a ElevationRequest, selected: usize, + locale: Locale, } impl<'a> ElevationWidget<'a> { - pub fn new(request: &'a ElevationRequest, selected: usize) -> Self { - Self { request, selected } + pub fn new(request: &'a ElevationRequest, selected: usize, locale: Locale) -> Self { + Self { + request, + selected, + locale, + } } } impl Renderable for ElevationWidget<'_> { fn render(&self, area: Rect, buf: &mut Buffer) { + use crate::localization::MessageId; + use crate::localization::tr; + let popup_width = 70.min(area.width.saturating_sub(4)); let popup_height = 22.min(area.height.saturating_sub(4)); let popup_area = Rect { @@ -1639,14 +1647,14 @@ impl Renderable for ElevationWidget<'_> { let mut lines = vec![ Line::from(""), Line::from(vec![Span::styled( - " ⚠ Sandbox Denied ", + tr(self.locale, MessageId::ElevationTitleSandboxDenied), Style::default() .fg(palette::STATUS_ERROR) .add_modifier(Modifier::BOLD), )]), Line::from(""), Line::from(vec![ - Span::raw(" Tool: "), + Span::raw(tr(self.locale, MessageId::ElevationFieldTool)), Span::styled( &self.request.tool_name, Style::default() @@ -1656,18 +1664,17 @@ impl Renderable for ElevationWidget<'_> { ]), ]; - // Show command if it's a shell command if let Some(ref command) = self.request.command { let cmd_display = crate::utils::truncate_with_ellipsis(command, 45, "..."); lines.push(Line::from(vec![ - Span::raw(" Cmd: "), + Span::raw(tr(self.locale, MessageId::ElevationFieldCmd)), Span::styled(cmd_display, Style::default().fg(palette::TEXT_MUTED)), ])); } lines.push(Line::from("")); lines.push(Line::from(vec![ - Span::raw(" Reason: "), + Span::raw(tr(self.locale, MessageId::ElevationFieldReason)), Span::styled( &self.request.denial_reason, Style::default().fg(palette::STATUS_WARNING), @@ -1676,7 +1683,7 @@ impl Renderable for ElevationWidget<'_> { lines.push(Line::from("")); lines.push(Line::from(Span::styled( - " Impact if approved:", + tr(self.locale, MessageId::ElevationImpactHeader), Style::default().fg(palette::TEXT_MUTED), ))); if self @@ -1686,7 +1693,7 @@ impl Renderable for ElevationWidget<'_> { .any(|option| matches!(option, ElevationOption::WithNetwork)) { lines.push(Line::from(Span::styled( - " - network retry enables outbound downloads and HTTP requests", + tr(self.locale, MessageId::ElevationImpactNetwork), Style::default().fg(palette::TEXT_PRIMARY), ))); } @@ -1697,22 +1704,21 @@ impl Renderable for ElevationWidget<'_> { .any(|option| matches!(option, ElevationOption::WithWriteAccess(_))) { lines.push(Line::from(Span::styled( - " - write retry expands writable filesystem scope for this tool call", + tr(self.locale, MessageId::ElevationImpactWrite), Style::default().fg(palette::TEXT_PRIMARY), ))); } lines.push(Line::from(Span::styled( - " - full access removes sandbox restrictions entirely for this retry", + tr(self.locale, MessageId::ElevationImpactFullAccess), Style::default().fg(palette::TEXT_PRIMARY), ))); lines.push(Line::from("")); lines.push(Line::from(Span::styled( - " Choose how to proceed:", + tr(self.locale, MessageId::ElevationPromptProceed), Style::default().fg(palette::TEXT_MUTED), ))); lines.push(Line::from("")); - // Render options for (i, option) in self.request.options.iter().enumerate() { let is_selected = i == self.selected; let style = if is_selected { @@ -1723,11 +1729,27 @@ impl Renderable for ElevationWidget<'_> { Style::default() }; - let key = match option { - ElevationOption::WithNetwork => "n", - ElevationOption::WithWriteAccess(_) => "w", - ElevationOption::FullAccess => "f", - ElevationOption::Abort => "a", + let (key, label_id, desc_id) = match option { + ElevationOption::WithNetwork => ( + "n", + MessageId::ElevationOptionNetwork, + MessageId::ElevationOptionNetworkDesc, + ), + ElevationOption::WithWriteAccess(_) => ( + "w", + MessageId::ElevationOptionWrite, + MessageId::ElevationOptionWriteDesc, + ), + ElevationOption::FullAccess => ( + "f", + MessageId::ElevationOptionFullAccess, + MessageId::ElevationOptionFullAccessDesc, + ), + ElevationOption::Abort => ( + "a", + MessageId::ElevationOptionAbort, + MessageId::ElevationOptionAbortDesc, + ), }; let label_color = match option { @@ -1742,18 +1764,18 @@ impl Renderable for ElevationWidget<'_> { format!("[{key}] "), Style::default().fg(palette::STATUS_SUCCESS), ), - Span::styled(option.label(), style.fg(label_color)), + Span::styled(tr(self.locale, label_id), style.fg(label_color)), ])); lines.push(Line::from(vec![ Span::raw(" "), Span::styled( - option.description(), + tr(self.locale, desc_id), Style::default().fg(palette::TEXT_MUTED), ), ])); } - let title = " Sandbox Elevation Required "; + let title = tr(self.locale, MessageId::ElevationTitleRequired); let block = Block::default() .title(title) .borders(Borders::ALL) diff --git a/docs/AGENT_RUNNER.md b/docs/AGENT_RUNNER.md new file mode 100644 index 00000000..8e26dd40 --- /dev/null +++ b/docs/AGENT_RUNNER.md @@ -0,0 +1,135 @@ +# Agent Runner Protocol + +How a headless agent (DeepSeek V4 on a DigitalOcean droplet, or any codewhale exec caller) picks up, implements, verifies, and delivers a milestone issue — fully autonomously. + +## Prerequisites + +- `gh` CLI authenticated with a fine-grained PAT scoped to `Hmbown/CodeWhale` (Contents RW, Issues RW, PRs RW, Metadata R) +- `codewhale` binary on `$PATH` (v0.8.57+) +- `DEEPSEEK_API_KEY` (or equivalent provider key) exported in the agent user's shell +- A `git worktree` per issue (never commit directly to `main`) + +--- + +## The loop + +### 1. Pick + +```bash +gh issue list \ + --repo Hmbown/CodeWhale \ + --milestone v0.8.58 \ + --label agent-ready \ + --state open \ + --json number,title,url +``` + +Choose an issue. Prefer `release-blocker` → `bug` → `enhancement` order. +Do not pick an issue already labeled `agent-in-progress`. + +### 2. Claim + +```bash +gh issue edit --add-label agent-in-progress --remove-label agent-ready +``` + +This prevents other agents from picking the same issue. + +### 3. Isolate + +```bash +cd /opt/whalebro/codewhale +git fetch origin +git worktree add ../worktrees/issue- -b agent/- origin/main +cd ../worktrees/issue- +``` + +Every issue gets its own branch and worktree. The branch name convention is `agent/-`. + +### 4. Execute + +```bash +gh issue view --json body -q .body | \ + codewhale exec --auto --output-format stream-json "$(cat)" +``` + +The agent reads the issue body and implements the fix. Use a tmux session per issue so the run survives SSH disconnects: + +```bash +tmux new-session -d -s "issue-" \ + "gh issue view --json body -q .body | \ + codewhale exec --auto --output-format stream-json \"\$(cat)\" 2>&1 | tee /tmp/issue-.log" +``` + +For resuming an interrupted run (`--continue` picks up the most recent +session for this workspace; `--resume latest` only exists in the interactive +TUI): + +```bash +codewhale exec --auto --output-format stream-json --continue "..." +``` + +### 5. Verify + +Run the exact commands from the issue's **Verification** section. If they pass, proceed. If they fail, loop back to step 4 with the error output as context, or label `needs-human`. + +### 6. Deliver + +```bash +gh pr create \ + --repo Hmbown/CodeWhale \ + --base main \ + --title "" \ + --body "Closes #" \ + --label v0.8.58 +``` + +All delivery is via PR — never push to `main` directly. Human review is required before merge. + +### 7. On blockage + +```bash +gh issue edit --add-label needs-human --remove-label agent-in-progress +gh issue comment --body "Blocked: . Human decision needed." +``` + +Common blockers: missing credentials, ambiguous scope, test environment unavailable, network outage. + +--- + +## Label semantics + +| Label | Meaning | Auto-applied? | +|---|---|---| +| `agent-ready` | Body has all six template sections; a remote agent may claim it | Yes (template) | +| `agent-in-progress` | Claimed by an agent run; do not double-pick | Manual (step 2) | +| `needs-human` | Agent blocked; requires human decision or credentials | Manual (step 7) | +| `autonomous-ready` | Legacy nightly-loop label; distinct from `agent-ready` | No | + +The `autonomous-ready` label is for the legacy nightly loop (external automation). +New work uses `agent-ready`. + +--- + +## Safety rules + +1. **PR-only delivery.** Never commit to `main`. Every change is a branch + PR. +2. **No force-push.** `git push --force` is forbidden. +3. **Secrets never in argv, history, or logs.** API keys, PATs, and credentials live in `/etc/codewhale/*.env` and are sourced into the agent user's shell. The runtime API listens on `127.0.0.1:7878` only. Telegram bridge chats are allowlisted. +4. **Human reviews every PR.** The droplet loop delivers PRs; a human on the laptop reviews and merges. +5. **One issue per worktree.** No cross-contamination between concurrent agent runs. + +--- + +## Issue body format + +Every `agent-ready` issue must have these six sections (enforced by `.github/ISSUE_TEMPLATE/agent-task.yml`): + +1. **Goal / Why** — what problem, why now +2. **Scope / Plan** — numbered steps with file paths +3. **Key files** — paths to read first +4. **Acceptance criteria** — behavior-level checkboxes +5. **Verification** — exact shell commands +6. **Out of scope** — explicit non-goals + +The body must be self-sufficient: a fresh clone agent with no conversation context must be able to execute it. diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index f0e0338f..33efc879 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -612,7 +612,94 @@ receives the text produced by the previous hook. Hooks marked `background = true` are observer-only and cannot transform or block the message. Existing environment variables remain available. `shell_env` hooks keep their existing `KEY=VALUE` stdout contract; -the JSON stdout contract applies only to `message_submit`. +JSON stdout contracts exist for `message_submit` (above) and +`tool_call_before` (below). + +### `tool_call_before` decision hooks + +`tool_call_before` hooks run before each tool call executes. In +addition to the legacy hard deny (exit code `2`, which always wins +regardless of stdout), a foreground hook may print a JSON decision on +stdout with exit code `0`: + +```json +{ + "decision": "allow" | "deny" | "ask", + "reason": "human-readable explanation (used for deny)", + "updatedInput": { "command": "ls -la" }, + "additionalContext": "text appended to the tool result for the model" +} +``` + +All fields are optional. Empty stdout, non-JSON stdout, and JSON +without a `decision` field behave exactly as before (allow). An +unrecognized `decision` string logs a warning and is treated as allow. + +- `deny` blocks the tool; the model receives a permission-denied tool + result containing `reason`. +- `ask` forces the interactive approval prompt even for tools that + would otherwise auto-run. +- `updatedInput` must be a JSON object; it replaces the tool input + before execution. When several hooks supply it, the last hook wins. +- `additionalContext` is appended to the tool result sent back to the + model as `[hook context] ...`. Multiple hooks' contexts are + concatenated. + +When multiple hooks match, precedence is deny > ask > allow. Hooks +marked `background = true` cannot steer tool calls — they exit +immediately without a captured result. + +Example deny hook: + +```toml +[[hooks.hooks]] +event = "tool_call_before" +command = '''echo '{"decision":"deny","reason":"blocked by project policy"}' ''' +condition = { type = "tool_name", name = "exec_shell" } +``` + +Example ask hook (force approval for every MCP tool): + +```toml +[[hooks.hooks]] +event = "tool_call_before" +command = '''echo '{"decision":"ask"}' ''' +condition = { type = "tool_name", name = "mcp__*" } +``` + +Example input rewrite: + +```toml +[[hooks.hooks]] +event = "tool_call_before" +command = "~/.codewhale/hooks/clamp-shell-timeout.sh" +condition = { type = "tool_name", name = "exec_shell" } +``` + +where the script reads the hook context, then prints +`{"updatedInput": {...}}` with the adjusted arguments. + +`tool_name` conditions support `*` globs: `mcp__*` matches every MCP +tool (e.g. `mcp__github__create_issue`) but not built-ins like +`read_file`; exact names keep matching exactly. Other regex +metacharacters in the pattern are matched literally. + +### Project-local hooks + +Repositories can ship policy in `/.codewhale/hooks.toml`, +using the same shape as the `[hooks]` table (top-level fields plus +`[[hooks]]` entries). Project hooks are appended after global hooks +from `config.toml`, so they run last and, for `updatedInput`, win +ties. A malformed project file logs a warning and startup falls back +to global hooks only. + +```toml +# .codewhale/hooks.toml +[[hooks]] +event = "tool_call_before" +command = '''echo '{"decision":"deny","reason":"no shell in this repo"}' ''' +condition = { type = "tool_name", name = "exec_shell" } +``` ### Turn-end observer hooks diff --git a/docs/KEYBINDINGS.md b/docs/KEYBINDINGS.md index 6782e4ee..e3710bc7 100644 --- a/docs/KEYBINDINGS.md +++ b/docs/KEYBINDINGS.md @@ -11,6 +11,7 @@ Bindings are not (yet) user-configurable — tracked for a future release (#436, | `F1` or `Ctrl-/` | Toggle the help overlay | | `Ctrl-K` | Open the command palette (slash-command finder) | | `Ctrl-C` | Cancel current turn / dismiss modal / arm-then-confirm quit | +| `Ctrl-B` | Background the running foreground shell command (turn continues; the command becomes a `/jobs` background job) | | `Ctrl-D` | Quit (only when the composer is empty) | | `Tab` | Cycle TUI mode: Plan → Agent → YOLO → Plan | | `Shift-Tab` | Cycle reasoning effort: off → high → max → off | diff --git a/docs/OPERATIONS_RUNBOOK.md b/docs/OPERATIONS_RUNBOOK.md index d53a8a45..8e98ac8a 100644 --- a/docs/OPERATIONS_RUNBOOK.md +++ b/docs/OPERATIONS_RUNBOOK.md @@ -28,7 +28,7 @@ Checks: 3. Confirm no local sandbox/permission deadlock in tool output Actions: -1. If a foreground shell command is running, press `Ctrl+B` and choose whether to background it or cancel the current turn. +1. If a foreground shell command is running, press `Ctrl+B` to move it to the background (the turn keeps running and the command becomes a background job under `/jobs`); use `Ctrl+C` instead if you want to cancel the turn. 2. If the command was started in the background, ask the assistant to cancel it with `exec_shell_cancel` and the returned task id. 3. Use `Esc` or `Ctrl+C` to interrupt the current turn when you want to stop the request itself. 4. Retry prompt; if still failing, restart TUI. diff --git a/scripts/remote-smoke/README.md b/scripts/remote-smoke/README.md new file mode 100644 index 00000000..e5f02f27 --- /dev/null +++ b/scripts/remote-smoke/README.md @@ -0,0 +1,153 @@ +# Remote-workbench smoke lab (EXPERIMENTAL) + +Status: experimental smoke-lab scripts for the US-first remote-workbench lane +(issue #1990). Not part of the supported install paths until the smoke passes +and this graduates into a documented setup. + +This concretizes `docs/REMOTE_VM_US.md`: a cheap US VPS running the CodeWhale +runtime on `127.0.0.1` plus the Telegram long-polling bridge, reusing the +provider-agnostic Ubuntu scripts under `scripts/tencent-lighthouse/` (audited: +nothing in them is Tencent-specific). + +## Layout + +- `setup-vm.sh` — provider-agnostic. Run on any fresh Ubuntu 24.04 VM: + bootstrap + prebuilt v0.8.57 release binaries (sha256-verified, no Rust + build) + `gh` CLI + 4G swapfile + Telegram bridge services + secrets + + validator + doctor. +- `digitalocean/provision.sh`, `digitalocean/teardown.sh` — active lane. + Chosen over AWS Lightsail for auth simplicity: one API token vs IAM + credential setup (#1990 allows "a clearly documented better alternative"). +- `aws-lightsail/provision.sh`, `aws-lightsail/teardown.sh` — kept as the + AWS alternative; same flow, needs `aws configure` first. +- `agent-session.sh` — sourceable helper for interactive/tmux agent sessions + as the `codewhale` user. Sources `/etc/codewhale/runtime.env` so the + provider key is available outside of systemd. + +Both provisioners print the API-reported monthly price and require a typed +`yes` before creating anything billable, and both teardowns end with a +leftover-billable-resources check. + +## Who this lane is for (China note) + +Telegram is blocked in mainland China and DigitalOcean has no China +datacenters (cross-border routes are slow; DO IP ranges are frequently +GFW-affected). Mainland-based users should use the existing Tencent +Lighthouse HK + Feishu/Lark lane (`docs/TENCENT_CLOUD_REMOTE_FIRST.md`) +instead — that is exactly why it exists. This lane is for users outside +mainland China. + +## Security model + +- Runtime API binds `127.0.0.1:7878` only; the only inbound port anywhere is + SSH (cloud firewall + ufw, both default to caller-IP /32 where supported). +- Telegram uses outbound long polling — no webhook, no public ingress. +- Telegram chats are allowlisted (`TELEGRAM_CHAT_ALLOWLIST`); unlisted chats + are refused. `TELEGRAM_ALLOW_UNLISTED=true` only for first pairing. +- Secrets travel as a chmod-600 file over scp, land in `/etc/codewhale/*.env` + (0640 root:codewhale), and the transfer file is shredded. Never in argv, + shell history, or logs. + +## Run order — DigitalOcean (from the laptop) + +```bash +# 0. once: create an API token (Web UI -> API -> Generate New Token, write +# scope), then in a real terminal: doctl auth init (paste token) + +# 1. provision (asks before billing starts) +bash scripts/remote-smoke/digitalocean/provision.sh +# defaults: sfo3, s-1vcpu-2gb (~$12/mo), ubuntu-24-04-x64, ~/.ssh/id_ed25519.pub + +# 2. secrets file (never commit; values from BotFather / provider console) +umask 077 && cat > /tmp/cw-secrets.env <<'EOF' +TELEGRAM_BOT_TOKEN=... +CODEWHALE_PROVIDER=deepseek +PROVIDER_KEY_NAME=DEEPSEEK_API_KEY +PROVIDER_KEY_VALUE=... +TELEGRAM_CHAT_ALLOWLIST=... # optional; empty enables first-pairing mode +EOF + +# 3. push secrets + installer, run it (DO Ubuntu images log in as root) +scp /tmp/cw-secrets.env scripts/remote-smoke/setup-vm.sh root@:/tmp/ +rm /tmp/cw-secrets.env +ssh root@ 'SECRETS_FILE=/tmp/cw-secrets.env bash /tmp/setup-vm.sh' + +# 4. phone smoke per docs/REMOTE_VM_US.md "First Smoke Test" + +# 5. teardown when done (stops billing) +bash scripts/remote-smoke/digitalocean/teardown.sh +``` + +For AWS Lightsail substitute step 0 with `aws configure`, step 1/5 with the +`aws-lightsail/` scripts, and ssh as `ubuntu@` with `sudo` in step 3. + +## Cost + +Billed hourly until destroyed. DO `s-1vcpu-2gb` ≈ $12/mo (~$0.018/h); +1 vCPU / 2 GB is enough because the VM downloads release binaries instead of +compiling Rust. A same-day smoke costs well under $1. Bigger options for a +longer-lived host: `s-2vcpu-2gb` (~$18/mo), `s-2vcpu-4gb` (~$24/mo, the +docs/REMOTE_VM_US.md default spec). + +## Known sharp edges (from the 2026-06-09 audit) + +- The Rust binary reads only `DEEPSEEK_RUNTIME_TOKEN`/`--auth-token` and + `--port`; the `CODEWHALE_RUNTIME_*` names in `/etc/codewhale/runtime.env` + work because the systemd unit expands them into flags. Don't start + `codewhale serve` by hand and expect the env file to apply. +- `codewhale-runtime.service` hard-fails activation if + `/home/codewhale/.codewhale` or `/home/codewhale/.deepseek` don't exist + (`ReadWritePaths`); `setup-vm.sh` pre-creates them. +- Both binaries are required (`codewhale` delegates to `codewhale-tui`). +- Exactly one bridge process per bot token — a second poller causes endless + Telegram 409s. Stop any local bridge before starting the VM one. +- `/interrupt` is queued behind an active streaming turn (known limitation, + documented in `docs/REMOTE_SETUP_DESIGN.md` hardening table). + +## Autonomous agent loop (#3022) + +Once the droplet is provisioned and `gh` is authenticated with a +fine-grained PAT (scoped to Hmbown/CodeWhale: Contents RW, Issues RW, +PRs RW, Metadata R), an agent can work the full pick→PR loop headless. + +One-time git wiring after `gh auth login` so pushes use the PAT and +commits have a stable identity: + +```bash +gh auth setup-git +git config --global user.name "whalebro-agent" +git config --global user.email "whalebro-agent@users.noreply.github.com" +``` + +```bash +# 1. Pick an agent-ready issue +gh issue list --repo Hmbown/CodeWhale --milestone v0.8.58 \ + --label agent-ready --state open --json number,title,url + +# 2. Claim it +gh issue edit --add-label agent-in-progress --remove-label agent-ready + +# 3. Isolate in a worktree +git -C /opt/whalebro/codewhale fetch origin +git -C /opt/whalebro/codewhale worktree add \ + /opt/whalebro/worktrees/issue- -b agent/- origin/main +cd /opt/whalebro/worktrees/issue- + +# 4. Execute (run inside a tmux session for SSH-disconnect safety) +. /opt/whalebro/codewhale/scripts/remote-smoke/agent-session.sh +gh issue view --json body -q .body | \ + codewhale exec --auto --output-format stream-json "$(cat)" + +# 5. Verify (run the issue's Verification block verbatim) +# 6. Deliver +gh pr create --repo Hmbown/CodeWhale --base main \ + --title "" --body "Closes #<N>" --label v0.8.58 + +# 7. On blockage: swap label to needs-human + comment +gh issue edit <N> --add-label needs-human --remove-label agent-in-progress +``` + +See `docs/AGENT_RUNNER.md` (added by #3043; until that lands, the design +background lives in `docs/rfcs/REMOTE_SETUP_DESIGN.md`) for the full +protocol including safety rules (PR-only delivery, no force-push, secrets +never in argv/history/logs, one worktree per issue). diff --git a/scripts/remote-smoke/agent-session.sh b/scripts/remote-smoke/agent-session.sh new file mode 100755 index 00000000..5cf7a304 --- /dev/null +++ b/scripts/remote-smoke/agent-session.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +# Source into an interactive agent shell (tmux, ssh) to export the provider +# key and set defaults that systemd normally handles via EnvironmentFile=. +# +# Usage (as the codewhale user): +# . /opt/whalebro/codewhale/scripts/remote-smoke/agent-session.sh +# codewhale models # should list deepseek-v4-pro +# gh auth status # should show the fine-grained PAT +# +# The runtime.env file is 0640 root:codewhale, readable by the codewhale user. +set -a +# shellcheck disable=SC1091 +. /etc/codewhale/runtime.env +set +a +export CODEWHALE_MODEL="${CODEWHALE_MODEL:-deepseek-v4-pro}" diff --git a/scripts/remote-smoke/aws-lightsail/provision.sh b/scripts/remote-smoke/aws-lightsail/provision.sh new file mode 100755 index 00000000..536e216d --- /dev/null +++ b/scripts/remote-smoke/aws-lightsail/provision.sh @@ -0,0 +1,104 @@ +#!/usr/bin/env bash +# EXPERIMENTAL — AWS Lightsail smoke-lab provisioning for the CodeWhale +# remote workbench (issue #1990). Creates ONE Ubuntu 24.04 Lightsail +# instance with SSH-only firewall. Prints every step and the monthly price +# from the Lightsail API, then requires an explicit "yes" before creating +# anything that costs money. +# +# Usage: +# AWS_REGION=us-east-1 bash scripts/aws-lightsail/provision.sh +# +# Tunables (env): +# INSTANCE_NAME default codewhale-smoke +# BUNDLE_ID default medium_3_0 (2 vCPU / 4 GB — docs/REMOTE_VM_US.md default) +# BLUEPRINT_ID default ubuntu_24_04 +# SSH_PUBKEY default ~/.ssh/id_ed25519.pub (imported as key pair) +# RESTRICT_SSH_TO_MY_IP default true (firewall cidr = caller IP /32) +set -euo pipefail + +INSTANCE_NAME="${INSTANCE_NAME:-codewhale-smoke}" +BUNDLE_ID="${BUNDLE_ID:-medium_3_0}" +BLUEPRINT_ID="${BLUEPRINT_ID:-ubuntu_24_04}" +SSH_PUBKEY="${SSH_PUBKEY:-$HOME/.ssh/id_ed25519.pub}" +KEY_PAIR_NAME="${KEY_PAIR_NAME:-${INSTANCE_NAME}-key}" +RESTRICT_SSH_TO_MY_IP="${RESTRICT_SSH_TO_MY_IP:-true}" + +command -v aws >/dev/null || { echo "aws CLI is required" >&2; exit 1; } +aws sts get-caller-identity >/dev/null || { echo "aws is not authenticated; run 'aws configure' or 'aws sso login'" >&2; exit 1; } + +REGION="${AWS_REGION:-$(aws configure get region || true)}" +[[ -n "${REGION}" ]] || { echo "Set AWS_REGION (e.g. us-east-1)" >&2; exit 1; } + +echo "== Preflight ==" +aws lightsail get-blueprints --region "$REGION" \ + --query "blueprints[?blueprintId=='${BLUEPRINT_ID}'].[blueprintId,name]" --output text \ + | grep -q . || { echo "Blueprint ${BLUEPRINT_ID} not found in ${REGION}" >&2; exit 1; } + +PRICE=$(aws lightsail get-bundles --region "$REGION" \ + --query "bundles[?bundleId=='${BUNDLE_ID}'].price | [0]" --output text) +SPECS=$(aws lightsail get-bundles --region "$REGION" \ + --query "bundles[?bundleId=='${BUNDLE_ID}'].[cpuCount,ramSizeInGb,diskSizeInGb] | [0]" --output text) +[[ "$PRICE" != "None" && -n "$PRICE" ]] || { echo "Bundle ${BUNDLE_ID} not found in ${REGION}" >&2; exit 1; } + +[[ -f "$SSH_PUBKEY" ]] || { echo "SSH public key not found: $SSH_PUBKEY" >&2; exit 1; } + +echo "Region: $REGION" +echo "Instance: $INSTANCE_NAME" +echo "Blueprint: $BLUEPRINT_ID" +echo "Bundle: $BUNDLE_ID (vCPU/RAM-GB/Disk-GB: $SPECS)" +echo "Monthly price: \$$PRICE USD (billed hourly until deleted)" +echo "SSH key: $SSH_PUBKEY -> key pair '$KEY_PAIR_NAME'" +echo +read -r -p "Create this instance and start billing? Type 'yes' to proceed: " CONFIRM +[[ "$CONFIRM" == "yes" ]] || { echo "Aborted; nothing created."; exit 1; } + +echo "== Import SSH key pair ==" +if ! aws lightsail get-key-pair --region "$REGION" --key-pair-name "$KEY_PAIR_NAME" >/dev/null 2>&1; then + aws lightsail import-key-pair --region "$REGION" \ + --key-pair-name "$KEY_PAIR_NAME" \ + --public-key-base64 "$(base64 < "$SSH_PUBKEY")" >/dev/null + echo "imported $KEY_PAIR_NAME" +else + echo "key pair $KEY_PAIR_NAME already exists; reusing" +fi + +echo "== Create instance ==" +AZ=$(aws lightsail get-regions --include-availability-zones --region "$REGION" \ + --query "regions[?name=='${REGION}'].availabilityZones[0].zoneName | [0]" --output text) +aws lightsail create-instances --region "$REGION" \ + --instance-names "$INSTANCE_NAME" \ + --availability-zone "$AZ" \ + --blueprint-id "$BLUEPRINT_ID" \ + --bundle-id "$BUNDLE_ID" \ + --key-pair-name "$KEY_PAIR_NAME" >/dev/null +echo "created $INSTANCE_NAME in $AZ; waiting for running state..." + +for _ in $(seq 1 60); do + STATE=$(aws lightsail get-instance-state --region "$REGION" --instance-name "$INSTANCE_NAME" \ + --query 'state.name' --output text 2>/dev/null || echo pending) + [[ "$STATE" == "running" ]] && break + sleep 5 +done +[[ "${STATE:-}" == "running" ]] || { echo "instance did not reach running state" >&2; exit 1; } + +echo "== Firewall: SSH only ==" +CIDR="0.0.0.0/0" +if [[ "$RESTRICT_SSH_TO_MY_IP" == "true" ]]; then + MYIP=$(curl -fsS https://checkip.amazonaws.com | tr -d '\n') + CIDR="${MYIP}/32" +fi +aws lightsail put-instance-public-ports --region "$REGION" \ + --instance-name "$INSTANCE_NAME" \ + --port-infos "fromPort=22,toPort=22,protocol=tcp,cidrs=${CIDR}" >/dev/null +echo "open ports replaced with: 22/tcp from ${CIDR} (everything else closed)" + +IP=$(aws lightsail get-instance --region "$REGION" --instance-name "$INSTANCE_NAME" \ + --query 'instance.publicIpAddress' --output text) +echo +echo "== Done ==" +echo "Instance: $INSTANCE_NAME ($REGION, $STATE)" +echo "Public IP: $IP" +echo "SSH: ssh -i ${SSH_PUBKEY%.pub} ubuntu@${IP}" +echo +echo "Teardown when finished (stops billing):" +echo " AWS_REGION=$REGION bash scripts/aws-lightsail/teardown.sh" diff --git a/scripts/remote-smoke/aws-lightsail/teardown.sh b/scripts/remote-smoke/aws-lightsail/teardown.sh new file mode 100755 index 00000000..dc1242d5 --- /dev/null +++ b/scripts/remote-smoke/aws-lightsail/teardown.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# EXPERIMENTAL — tears down the CodeWhale Lightsail smoke lab and verifies +# nothing billable is left behind (instance, key pair, static IPs, disks, +# snapshots). Safe to re-run; prints what it finds before deleting. +set -euo pipefail + +INSTANCE_NAME="${INSTANCE_NAME:-codewhale-smoke}" +KEY_PAIR_NAME="${KEY_PAIR_NAME:-${INSTANCE_NAME}-key}" +REGION="${AWS_REGION:-$(aws configure get region || true)}" +[[ -n "${REGION}" ]] || { echo "Set AWS_REGION" >&2; exit 1; } + +echo "== Current Lightsail resources in ${REGION} ==" +aws lightsail get-instances --region "$REGION" \ + --query 'instances[].[name,state.name,bundleId]' --output table || true + +if aws lightsail get-instance --region "$REGION" --instance-name "$INSTANCE_NAME" >/dev/null 2>&1; then + read -r -p "Delete instance '${INSTANCE_NAME}'? Type 'yes': " CONFIRM + [[ "$CONFIRM" == "yes" ]] || { echo "Aborted."; exit 1; } + aws lightsail delete-instance --region "$REGION" --instance-name "$INSTANCE_NAME" >/dev/null + echo "deleted instance ${INSTANCE_NAME}" +else + echo "instance ${INSTANCE_NAME} not found (already deleted?)" +fi + +if aws lightsail get-key-pair --region "$REGION" --key-pair-name "$KEY_PAIR_NAME" >/dev/null 2>&1; then + aws lightsail delete-key-pair --region "$REGION" --key-pair-name "$KEY_PAIR_NAME" >/dev/null + echo "deleted key pair ${KEY_PAIR_NAME}" +fi + +echo "== Leftover billable resources check ==" +echo "-- static IPs (billed when unattached):" +aws lightsail get-static-ips --region "$REGION" --query 'staticIps[].[name,isAttached]' --output table +echo "-- extra disks:" +aws lightsail get-disks --region "$REGION" --query 'disks[].[name,state]' --output table +echo "-- instance snapshots:" +aws lightsail get-instance-snapshots --region "$REGION" --query 'instanceSnapshots[].[name,state]' --output table +echo "-- remaining instances:" +aws lightsail get-instances --region "$REGION" --query 'instances[].[name,state.name]' --output table +echo +echo "If all tables above are empty, Lightsail billing for this lab is fully stopped." diff --git a/scripts/remote-smoke/digitalocean/provision.sh b/scripts/remote-smoke/digitalocean/provision.sh new file mode 100755 index 00000000..89076e27 --- /dev/null +++ b/scripts/remote-smoke/digitalocean/provision.sh @@ -0,0 +1,107 @@ +#!/usr/bin/env bash +# EXPERIMENTAL — DigitalOcean smoke-lab provisioning for the CodeWhale +# remote workbench (issue #1990 "clearly documented better alternative" +# clause). Creates ONE Ubuntu 24.04 droplet plus a cloud firewall that +# allows inbound SSH only. Prints the monthly price from the DO API and +# requires a typed "yes" before creating anything billable. +# +# Auth: doctl must be authenticated. Either +# doctl auth init # paste token interactively, or +# export DIGITALOCEAN_ACCESS_TOKEN=... # doctl reads this env var +# +# Usage: +# bash scripts/remote-smoke/digitalocean/provision.sh +# +# Tunables (env): +# DROPLET_NAME default codewhale-smoke +# DO_REGION default sfo3 (San Francisco) +# DROPLET_SIZE default s-1vcpu-2gb (~$12/mo; prebuilt binaries mean no +# Rust build, so 1 vCPU / 2 GB is enough for the smoke. +# Use s-2vcpu-2gb/s-2vcpu-4gb for a longer-lived host.) +# DROPLET_IMAGE default ubuntu-24-04-x64 +# SSH_PUBKEY default ~/.ssh/id_ed25519.pub (imported if not present) +# RESTRICT_SSH_TO_MY_IP default true (firewall source = caller IP /32) +set -euo pipefail + +DROPLET_NAME="${DROPLET_NAME:-codewhale-smoke}" +DO_REGION="${DO_REGION:-sfo3}" +DROPLET_SIZE="${DROPLET_SIZE:-s-1vcpu-2gb}" +DROPLET_IMAGE="${DROPLET_IMAGE:-ubuntu-24-04-x64}" +SSH_PUBKEY="${SSH_PUBKEY:-$HOME/.ssh/id_ed25519.pub}" +SSH_KEY_NAME="${SSH_KEY_NAME:-${DROPLET_NAME}-key}" +FIREWALL_NAME="${FIREWALL_NAME:-${DROPLET_NAME}-ssh-only}" +RESTRICT_SSH_TO_MY_IP="${RESTRICT_SSH_TO_MY_IP:-true}" + +command -v doctl >/dev/null || { echo "doctl is required (brew install doctl)" >&2; exit 1; } +doctl account get >/dev/null || { echo "doctl is not authenticated; run 'doctl auth init' or set DIGITALOCEAN_ACCESS_TOKEN" >&2; exit 1; } +[[ -f "$SSH_PUBKEY" ]] || { echo "SSH public key not found: $SSH_PUBKEY" >&2; exit 1; } + +echo "== Preflight ==" +[[ "$(doctl compute region list --format Slug,Available --no-header | awk -v r="$DO_REGION" '$1 == r {print $2}')" == "true" ]] \ + || { echo "Region ${DO_REGION} not available" >&2; exit 1; } + +read -r PRICE VCPUS MEM DISK < <(doctl compute size list \ + --format Slug,PriceMonthly,VCPUs,Memory,Disk --no-header \ + | awk -v s="$DROPLET_SIZE" '$1 == s {print $2, $3, $4, $5}') +[[ -n "${PRICE:-}" ]] || { echo "Size ${DROPLET_SIZE} not found" >&2; exit 1; } + +echo "Region: $DO_REGION" +echo "Droplet: $DROPLET_NAME" +echo "Image: $DROPLET_IMAGE" +echo "Size: $DROPLET_SIZE (${VCPUS} vCPU / ${MEM} MB RAM / ${DISK} GB disk)" +echo "Monthly price: \$$PRICE USD (billed hourly until destroyed)" +echo "SSH key: $SSH_PUBKEY -> '$SSH_KEY_NAME'" +echo "Firewall: $FIREWALL_NAME (inbound 22/tcp only)" +echo +read -r -p "Create this droplet and start billing? Type 'yes' to proceed: " CONFIRM +[[ "$CONFIRM" == "yes" ]] || { echo "Aborted; nothing created."; exit 1; } + +echo "== Import SSH key ==" +KEY_ID=$(doctl compute ssh-key list --format ID,Name --no-header | awk -v n="$SSH_KEY_NAME" '$2 == n {print $1; exit}') +if [[ -z "$KEY_ID" ]]; then + KEY_ID=$(doctl compute ssh-key import "$SSH_KEY_NAME" --public-key-file "$SSH_PUBKEY" --format ID --no-header) + echo "imported $SSH_KEY_NAME (id $KEY_ID)" +else + echo "key $SSH_KEY_NAME already exists (id $KEY_ID); reusing" +fi + +echo "== Create droplet ==" +doctl compute droplet create "$DROPLET_NAME" \ + --region "$DO_REGION" \ + --image "$DROPLET_IMAGE" \ + --size "$DROPLET_SIZE" \ + --ssh-keys "$KEY_ID" \ + --tag-name codewhale-smoke \ + --wait >/dev/null +DROPLET_ID=$(doctl compute droplet list --format ID,Name --no-header | awk -v n="$DROPLET_NAME" '$2 == n {print $1; exit}') +IP=$(doctl compute droplet get "$DROPLET_ID" --format PublicIPv4 --no-header) +echo "created $DROPLET_NAME (id $DROPLET_ID, $IP)" + +echo "== Cloud firewall: SSH only ==" +SRC="0.0.0.0/0,address:::/0" +if [[ "$RESTRICT_SSH_TO_MY_IP" == "true" ]]; then + MYIP=$(curl -fsS https://api.ipify.org) + SRC="${MYIP}/32" +fi +if ! doctl compute firewall list --format Name --no-header | grep -qx "$FIREWALL_NAME"; then + doctl compute firewall create \ + --name "$FIREWALL_NAME" \ + --inbound-rules "protocol:tcp,ports:22,address:${SRC}" \ + --outbound-rules "protocol:tcp,ports:all,address:0.0.0.0/0,address:::/0 protocol:udp,ports:all,address:0.0.0.0/0,address:::/0 protocol:icmp,address:0.0.0.0/0,address:::/0" \ + --droplet-ids "$DROPLET_ID" >/dev/null + echo "firewall $FIREWALL_NAME created: inbound 22/tcp from ${SRC}, all else blocked" +else + FW_ID=$(doctl compute firewall list --format ID,Name --no-header | awk -v n="$FIREWALL_NAME" '$2 == n {print $1; exit}') + doctl compute firewall add-droplets "$FW_ID" --droplet-ids "$DROPLET_ID" + echo "existing firewall $FIREWALL_NAME attached" +fi + +echo +echo "== Done ==" +echo "Droplet: $DROPLET_NAME ($DO_REGION)" +echo "Public IP: $IP" +echo "SSH: ssh -i ${SSH_PUBKEY%.pub} root@${IP}" +echo " (DO Ubuntu images log in as root, not ubuntu)" +echo +echo "Teardown when finished (stops billing):" +echo " bash scripts/remote-smoke/digitalocean/teardown.sh" diff --git a/scripts/remote-smoke/digitalocean/teardown.sh b/scripts/remote-smoke/digitalocean/teardown.sh new file mode 100755 index 00000000..0d791acc --- /dev/null +++ b/scripts/remote-smoke/digitalocean/teardown.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash +# EXPERIMENTAL — tears down the CodeWhale DigitalOcean smoke lab and lists +# anything billable that remains (droplets, volumes, snapshots, reserved +# IPs). Safe to re-run; prints what it finds before deleting. +set -euo pipefail + +DROPLET_NAME="${DROPLET_NAME:-codewhale-smoke}" +SSH_KEY_NAME="${SSH_KEY_NAME:-${DROPLET_NAME}-key}" +FIREWALL_NAME="${FIREWALL_NAME:-${DROPLET_NAME}-ssh-only}" + +command -v doctl >/dev/null || { echo "doctl is required" >&2; exit 1; } +doctl account get >/dev/null || { echo "doctl is not authenticated" >&2; exit 1; } + +echo "== Current droplets ==" +doctl compute droplet list --format ID,Name,Status,Region,SizeSlug + +DROPLET_ID=$(doctl compute droplet list --format ID,Name --no-header | awk -v n="$DROPLET_NAME" '$2 == n {print $1; exit}') +if [[ -n "$DROPLET_ID" ]]; then + read -r -p "Destroy droplet '${DROPLET_NAME}' (id ${DROPLET_ID})? Type 'yes': " CONFIRM + [[ "$CONFIRM" == "yes" ]] || { echo "Aborted."; exit 1; } + doctl compute droplet delete "$DROPLET_ID" --force + echo "destroyed droplet ${DROPLET_NAME}" +else + echo "droplet ${DROPLET_NAME} not found (already destroyed?)" +fi + +FW_ID=$(doctl compute firewall list --format ID,Name --no-header | awk -v n="$FIREWALL_NAME" '$2 == n {print $1; exit}') +if [[ -n "$FW_ID" ]]; then + doctl compute firewall delete "$FW_ID" --force + echo "deleted firewall ${FIREWALL_NAME}" +fi + +KEY_ID=$(doctl compute ssh-key list --format ID,Name --no-header | awk -v n="$SSH_KEY_NAME" '$2 == n {print $1; exit}') +if [[ -n "$KEY_ID" ]]; then + doctl compute ssh-key delete "$KEY_ID" --force + echo "deleted ssh key ${SSH_KEY_NAME}" +fi + +echo "== Leftover billable resources check ==" +echo "-- droplets:" +doctl compute droplet list --format ID,Name,Status +echo "-- volumes:" +doctl compute volume list --format ID,Name,Size +echo "-- snapshots:" +doctl compute snapshot list --format ID,Name,ResourceType +echo "-- reserved IPs (billed when unassigned):" +doctl compute reserved-ip list --format IP,DropletID +echo +echo "If all lists above are empty, DigitalOcean billing for this lab is fully stopped." diff --git a/scripts/remote-smoke/setup-vm.sh b/scripts/remote-smoke/setup-vm.sh new file mode 100755 index 00000000..33312166 --- /dev/null +++ b/scripts/remote-smoke/setup-vm.sh @@ -0,0 +1,162 @@ +#!/usr/bin/env bash +# EXPERIMENTAL — one-shot CodeWhale + Telegram bridge setup for a fresh +# AWS Lightsail Ubuntu 24.04 VM (issue #1990 smoke lane). +# +# Run ON THE VM as root: +# sudo SECRETS_FILE=/tmp/cw-secrets.env bash setup-vm.sh +# +# SECRETS_FILE is a chmod-600 env file you scp'd up, containing: +# TELEGRAM_BOT_TOKEN=... # from @BotFather +# CODEWHALE_PROVIDER=deepseek # or arcee / xiaomi-mimo / ... +# PROVIDER_KEY_NAME=DEEPSEEK_API_KEY +# PROVIDER_KEY_VALUE=... +# TELEGRAM_CHAT_ALLOWLIST=123456789 # optional; empty = first-pairing mode +# The file is shredded after the values land in /etc/codewhale/*.env. +# +# Reuses the repo's existing provider-agnostic scripts: +# scripts/tencent-lighthouse/bootstrap-ubuntu.sh +# scripts/tencent-lighthouse/install-services.sh (CODEWHALE_BRIDGE=telegram) +# scripts/tencent-lighthouse/doctor.sh +# Uses prebuilt release binaries instead of a Rust build. +set -euo pipefail + +RELEASE_TAG="${RELEASE_TAG:-v0.8.57}" +REPO_URL="${REPO_URL:-https://github.com/Hmbown/CodeWhale.git}" +REPO_BRANCH="${REPO_BRANCH:-main}" +SECRETS_FILE="${SECRETS_FILE:-/tmp/cw-secrets.env}" + +[[ "$EUID" -eq 0 ]] || { echo "run as root (sudo)" >&2; exit 1; } +[[ -f "$SECRETS_FILE" ]] || { echo "SECRETS_FILE not found: $SECRETS_FILE" >&2; exit 1; } + +# shellcheck disable=SC1090 +. "$SECRETS_FILE" +: "${TELEGRAM_BOT_TOKEN:?missing in SECRETS_FILE}" +: "${CODEWHALE_PROVIDER:?missing in SECRETS_FILE}" +: "${PROVIDER_KEY_NAME:?missing in SECRETS_FILE}" +: "${PROVIDER_KEY_VALUE:?missing in SECRETS_FILE}" +TELEGRAM_CHAT_ALLOWLIST="${TELEGRAM_CHAT_ALLOWLIST:-}" + +echo "== [1/8] clone repo (${REPO_BRANCH}) ==" +apt-get update -q +apt-get install -y -q git curl ca-certificates +if [[ ! -d /tmp/codewhale/.git ]]; then + git clone --depth 1 --branch "$REPO_BRANCH" "$REPO_URL" /tmp/codewhale +fi + +echo "== [2/8] bootstrap (user, dirs, packages, ufw, env skeletons) ==" +CODEWHALE_REPO_URL="$REPO_URL" CODEWHALE_REPO_BRANCH="$REPO_BRANCH" \ + bash /tmp/codewhale/scripts/tencent-lighthouse/bootstrap-ubuntu.sh + +echo "== [3/8] install prebuilt ${RELEASE_TAG} binaries (no Rust build) ==" +# The systemd unit hardcodes /home/codewhale/.cargo/bin/codewhale, so we put +# the release binaries exactly there. +BIN_DIR=/home/codewhale/.cargo/bin +install -d -o codewhale -g codewhale "$BIN_DIR" +BASE="https://github.com/Hmbown/CodeWhale/releases/download/${RELEASE_TAG}" +TMP=$(mktemp -d) +curl -fsSL -o "$TMP/codewhale" "$BASE/codewhale-linux-x64" +curl -fsSL -o "$TMP/codewhale-tui" "$BASE/codewhale-tui-linux-x64" +curl -fsSL -o "$TMP/sha256.txt" "$BASE/codewhale-artifacts-sha256.txt" +( cd "$TMP" + grep -E ' (codewhale|codewhale-tui)-linux-x64$' sha256.txt \ + | sed 's/codewhale-linux-x64/codewhale/; s/codewhale-tui-linux-x64/codewhale-tui/' \ + | sha256sum -c - ) +install -m 0755 -o codewhale -g codewhale "$TMP/codewhale" "$BIN_DIR/codewhale" +install -m 0755 -o codewhale -g codewhale "$TMP/codewhale-tui" "$BIN_DIR/codewhale-tui" +rm -rf "$TMP" +sudo -u codewhale "$BIN_DIR/codewhale" --version +sudo -u codewhale "$BIN_DIR/codewhale-tui" --version + +echo "== [4/8] install services (telegram bridge) ==" +CODEWHALE_BRIDGE=telegram bash /tmp/codewhale/scripts/tencent-lighthouse/install-services.sh + +echo "== [5/8] write secrets into /etc/codewhale/*.env ==" +RUNTIME_ENV=/etc/codewhale/runtime.env +BRIDGE_ENV=/etc/codewhale/telegram-bridge.env +RUNTIME_TOKEN="dst_$(openssl rand -hex 24)" + +set_kv() { # file key value (replace or append; never echoes the value) + local file="$1" key="$2" value="$3" + if grep -qE "^${key}=" "$file"; then + # use | delimiter; tokens never contain | + sed -i "s|^${key}=.*|${key}=${value}|" "$file" + else + printf '%s=%s\n' "$key" "$value" >> "$file" + fi +} +set_kv "$RUNTIME_ENV" CODEWHALE_RUNTIME_TOKEN "$RUNTIME_TOKEN" +set_kv "$RUNTIME_ENV" CODEWHALE_PROVIDER "$CODEWHALE_PROVIDER" +set_kv "$RUNTIME_ENV" "$PROVIDER_KEY_NAME" "$PROVIDER_KEY_VALUE" +set_kv "$BRIDGE_ENV" CODEWHALE_RUNTIME_TOKEN "$RUNTIME_TOKEN" +set_kv "$BRIDGE_ENV" TELEGRAM_BOT_TOKEN "$TELEGRAM_BOT_TOKEN" +if [[ -n "$TELEGRAM_CHAT_ALLOWLIST" ]]; then + set_kv "$BRIDGE_ENV" TELEGRAM_CHAT_ALLOWLIST "$TELEGRAM_CHAT_ALLOWLIST" + set_kv "$BRIDGE_ENV" TELEGRAM_ALLOW_UNLISTED false +else + echo "[warn] no TELEGRAM_CHAT_ALLOWLIST given: enabling first-pairing mode" + echo "[warn] (TELEGRAM_ALLOW_UNLISTED=true). DM the bot /status, copy the" + echo "[warn] chat_id into TELEGRAM_CHAT_ALLOWLIST, set ALLOW_UNLISTED=false," + echo "[warn] then: systemctl restart codewhale-telegram-bridge" + set_kv "$BRIDGE_ENV" TELEGRAM_ALLOW_UNLISTED true +fi +chmod 0640 "$RUNTIME_ENV" "$BRIDGE_ENV" +chown root:codewhale "$RUNTIME_ENV" "$BRIDGE_ENV" +shred -u "$SECRETS_FILE" +echo "secrets written; $SECRETS_FILE shredded" + +echo "== [5b/8] install gh CLI (for autonomous agent PR workflow) ==" +if ! command -v gh &>/dev/null; then + apt-get install -y -q software-properties-common + # cli.github.com recommends the official APT repo for Ubuntu + (type -p wget &>/dev/null || apt-get install -y -q wget) + mkdir -p -m 755 /etc/apt/keyrings + wget -qO- https://cli.github.com/packages/githubcli-archive-keyring.gpg \ + | tee /etc/apt/keyrings/githubcli-archive-keyring.gpg >/dev/null + chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg + echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" \ + | tee /etc/apt/sources.list.d/github-cli.list >/dev/null + apt-get update -q + apt-get install -y -q gh +fi + +echo "== [5c/8] create 4G swapfile (idempotent) ==" +if [[ ! -f /swapfile ]]; then + fallocate -l 4G /swapfile + chmod 600 /swapfile + mkswap /swapfile + swapon /swapfile + echo '/swapfile none swap sw 0 0' >> /etc/fstab + echo "swapfile created and activated" +else + echo "swapfile already exists, skipping" +fi + +echo "== [6/8] pre-create runtime ReadWritePaths (unit fails without them) ==" +install -d -o codewhale -g codewhale -m 0700 \ + /home/codewhale/.codewhale /home/codewhale/.deepseek + +echo "== [7/8] validate config ==" +sudo -u codewhale node /opt/codewhale/telegram-bridge/scripts/validate-config.mjs \ + --env "$BRIDGE_ENV" --runtime-env "$RUNTIME_ENV" \ + --workspace-root /opt/whalebro --check-filesystem + +echo "== [8/8] start + doctor ==" +systemctl start codewhale-runtime +for _ in $(seq 1 20); do + curl -fsS --max-time 2 http://127.0.0.1:7878/health >/dev/null 2>&1 && break + sleep 1 +done +curl -fsS http://127.0.0.1:7878/health; echo +systemctl start codewhale-telegram-bridge +sleep 3 +CODEWHALE_BRIDGE=telegram bash /tmp/codewhale/scripts/tencent-lighthouse/doctor.sh + +echo +echo "== Setup complete. Phone smoke checklist (docs/REMOTE_VM_US.md): ==" +echo " 1. DM the bot: /status" +echo " 2. /menu (tappable controls)" +echo " 3. prompt: summarize git status in /opt/whalebro/codewhale" +echo " 4. /threads then a Resume button" +echo " 5. trigger a shell approval; test Allow/Deny buttons and /allow|/deny" +echo " 6. /interrupt during an active turn" +echo " 7. sudo reboot; confirm both services return: systemctl status codewhale-runtime codewhale-telegram-bridge"