diff --git a/CHANGELOG.md b/CHANGELOG.md index 139c39e5..764ed487 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -80,6 +80,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 checkpoint instead of ending as a null failed result, and `agent_eval` can explicitly continue a live checkpointed interrupted child while normal completed/failed/cancelled follow-up behavior stays unchanged (#2029). +- Durable task recovery no longer requeues tasks that were `running` when the + previous CodeWhale process exited. On restart those records are marked failed + with a recovery note, and any running tool-call summaries are marked failed + too, so stale shell/task state cannot silently become live work again (#1786). - Auto-generated project instructions now reuse the bounded Project Context Pack data instead of running an unbounded summary/tree scan when no `.codewhale/instructions.md` file exists. The fallback keeps later @@ -104,6 +108,7 @@ dense tool-call transcript collapse/sidebar detail direction (#2738, #2734, **@h3c-hexin** for the tool-agent model inheritance and configured `skills_dir` fixes (#2736, #2737). Thanks also to **@qiyuanlicn** for the checkpoint/resume report that shaped the sub-agent recovery slice (#2029), +to **@bevis-wong** for the long-running shell/task liveness report (#1786), and to **@NASLXTO** and **@wuxixing** for the large-workspace startup reports (#697, #1827), and to **@linzhiqin2003** and **@merchloubna70-dot** for earlier context-cap and diff --git a/crates/tui/src/task_manager.rs b/crates/tui/src/task_manager.rs index 4920917f..95db71f8 100644 --- a/crates/tui/src/task_manager.rs +++ b/crates/tui/src/task_manager.rs @@ -1512,14 +1512,34 @@ fn load_state( ); } if task.status == TaskStatus::Running { - task.status = TaskStatus::Queued; - task.started_at = None; - task.ended_at = None; - task.duration_ms = None; + let now = Utc::now(); + let duration_ms = task.started_at.and_then(|started| { + u64::try_from(now.signed_duration_since(started).num_milliseconds()).ok() + }); + task.status = TaskStatus::Failed; + task.ended_at = Some(now); + task.duration_ms = duration_ms; + task.error = Some( + "Interrupted by process restart; prior process is not attached".to_string(), + ); + for tool in &mut task.tool_calls { + if tool.status == TaskToolStatus::Running { + tool.status = TaskToolStatus::Failed; + tool.ended_at = Some(now); + tool.duration_ms = duration_ms.or_else(|| { + u64::try_from( + now.signed_duration_since(tool.started_at) + .num_milliseconds(), + ) + .ok() + }); + } + } task.timeline.push(TaskTimelineEntry { - timestamp: Utc::now(), + timestamp: now, kind: "recovered".to_string(), - summary: "Recovered from restart and re-queued".to_string(), + summary: "Interrupted by process restart; prior process is not attached" + .to_string(), detail_path: None, }); } @@ -1790,6 +1810,98 @@ mod tests { Ok(()) } + #[test] + fn running_tasks_are_not_requeued_after_restart() -> Result<()> { + let root = std::env::temp_dir().join(format!("deepseek-task-test-{}", Uuid::new_v4())); + let tasks_dir = root.join("tasks"); + fs::create_dir_all(&tasks_dir)?; + let queue_path = root.join("queue.json"); + let task_id = "task_stale_running".to_string(); + let started_at = Utc::now() - chrono::Duration::seconds(30); + let task = TaskRecord { + schema_version: CURRENT_TASK_SCHEMA_VERSION, + id: task_id.clone(), + prompt: "long-running shell work".to_string(), + model: "deepseek-v4-flash".to_string(), + workspace: PathBuf::from("."), + mode: "agent".to_string(), + allow_shell: true, + trust_mode: false, + auto_approve: false, + status: TaskStatus::Running, + created_at: started_at, + started_at: Some(started_at), + ended_at: None, + duration_ms: None, + result_summary: None, + result_detail_path: None, + error: None, + thread_id: Some("thr_stale".to_string()), + turn_id: Some("turn_stale".to_string()), + runtime_event_count: 0, + checklist: TaskChecklistState::default(), + gates: Vec::new(), + attempts: Vec::new(), + artifacts: Vec::new(), + github_events: Vec::new(), + tool_calls: vec![TaskToolCallSummary { + id: "tool_shell".to_string(), + name: "task_shell_start".to_string(), + status: TaskToolStatus::Running, + started_at, + ended_at: None, + duration_ms: None, + input_summary: Some("shell: sleep 999".to_string()), + output_summary: None, + detail_path: None, + patch_ref: None, + }], + timeline: vec![TaskTimelineEntry { + timestamp: started_at, + kind: "running".to_string(), + summary: "Task started".to_string(), + detail_path: None, + }], + }; + fs::write( + tasks_dir.join(format!("{task_id}.json")), + serde_json::to_string_pretty(&task)?, + )?; + fs::write( + &queue_path, + serde_json::to_string_pretty(&QueueFile { + queue: vec![task_id.clone()], + })?, + )?; + + let (tasks, queue) = load_state(&tasks_dir, &queue_path)?; + let recovered = tasks.get(&task_id).expect("task loaded"); + + assert!(queue.is_empty(), "stale running task must not be requeued"); + assert_eq!(recovered.status, TaskStatus::Failed); + assert!( + recovered + .error + .as_deref() + .is_some_and(|err| err.contains("prior process is not attached")), + "recovered task should explain stale process ownership: {recovered:?}" + ); + assert!(recovered.ended_at.is_some()); + assert!(recovered.duration_ms.is_some()); + assert_eq!(recovered.tool_calls[0].status, TaskToolStatus::Failed); + assert!(recovered.tool_calls[0].ended_at.is_some()); + assert!( + recovered + .timeline + .iter() + .any(|entry| entry.kind == "recovered" + && entry.summary.contains("prior process is not attached")), + "recovery timeline should explain why the task is terminal: {:?}", + recovered.timeline + ); + Ok(()) + } + #[tokio::test] async fn default_workspace_updates_for_future_tasks() -> Result<()> { let root = std::env::temp_dir().join(format!("deepseek-task-test-{}", Uuid::new_v4())); diff --git a/docs/V0_9_0_EXECUTION_MAP.md b/docs/V0_9_0_EXECUTION_MAP.md index 678e52cc..3faae403 100644 --- a/docs/V0_9_0_EXECUTION_MAP.md +++ b/docs/V0_9_0_EXECUTION_MAP.md @@ -53,6 +53,7 @@ harvest/stewardship commits: | #2734 sidebar detail popovers | Locally harvested as the mouse-hover slice for #2694. | Work/Tasks/Agents hover metadata now stores row hitboxes, compact display text, and full source text so truncated checklist items, task/turn ids, and sub-agent ids/progress expand into a bordered wrapping popover. The harvest fixes reviewer risks from the PR by treating row metadata as authoritative, sizing by display width instead of bytes, and keeping source text untruncated. `cargo test -p codewhale-tui --bin codewhale-tui --locked sidebar_hover -- --nocapture`, `... work_hover_text_preserves_full_checklist_item ...`, and `... subagent_hover_text_preserves_full_agent_id_and_progress ...` passed. Credit @idling11; keep #2694 open for keyboard access, richer Work/Tasks/Agents metadata, redaction expansion, and clipping/snapshot coverage. | | #2532 pending-input delivery-mode labels | Locally re-harvested for #2054. | Pending-input preview rows now label steer-pending, rejected-steer, and queued-follow-up delivery modes, and wrapped continuation rows align under the label. `cargo test -p codewhale-tui --bin codewhale-tui --locked pending_input_preview -- --nocapture` passed. Credit @cyq1017; #2054 remains open for cancel/edit-mode affordance clarity. | | #2029 sub-agent checkpoint continuation | Locally implemented as the live-timeout recovery slice. | Sub-agents now persist `SubAgentCheckpoint` metadata through state, results, projections, and transcript handles. The runner checkpoints local messages before API calls and after model/tool cycles; per-step API timeout marks the child interrupted with `continuable=true`; `agent_eval { continue: true }` resumes only live checkpointed interrupted children. Reload preserves checkpoint metadata, but cold-restart continuation is intentionally not claimed because the child task/input channel is not rehydrated yet. `cargo test -p codewhale-tui --bin codewhale-tui --locked subagent -- --nocapture`, `cargo fmt --all -- --check`, `git diff --check`, and `cargo clippy -p codewhale-tui --locked -- -D warnings` passed. Credit @qiyuanlicn for the recovery report; keep #2029 open only if cold-restart continuation or broader checkpoint UX remains required. | +| #1786 stale running task recovery | Locally implemented as the durable restart-safety slice. | `TaskManager::load_state` now marks tasks that were persisted as `running` in a prior process as failed with an explicit restart/interrupted error instead of requeueing them. Running tool-call summaries inside those stale tasks are also marked failed. `cargo test -p codewhale-tui --bin codewhale-tui --locked running_tasks_are_not_requeued_after_restart -- --nocapture` and `cargo test -p codewhale-tui --bin codewhale-tui --locked task_manager -- --nocapture` passed. Credit @bevis-wong; keep #1786 open for foreground shell hang root cause and careful LIVE-state watchdog work that does not abort legitimate foreground commands. | | #697/#1827 bounded auto-generated project context | Locally implemented from the stabilization audit. | When no project instructions exist, startup now writes `.codewhale/instructions.md` from the bounded Project Context Pack data instead of an unbounded summary/tree scan. The generated file avoids the dynamic `` marker when that setting is disabled, keeps later top-level folders visible, and omits noisy directory tails. `cargo test -p codewhale-tui --bin codewhale-tui --locked auto_generated_context_is_bounded_for_many_file_workspace -- --nocapture` and `cargo test -p codewhale-tui --bin codewhale-tui --locked project_context_pack -- --nocapture` passed. Credit reporters @NASLXTO and @wuxixing, plus earlier context-cap/startup work from @linzhiqin2003 and @merchloubna70-dot; leave #697/#1827 open pending real massive-repo/manual startup verification. | | #2636 project-context mtime cache | Defer direct merge; harvest only after cache key/signature is widened. | Must include constitution changes, auto-generated context deletion, canonical path equivalence, and overwrite detection before landing. | | #2634 HarmonyOS port | Locally harvested with additional Nix-chain clearance; keep credited and do not close until the integration branch is public. | User-supplied MatePad Edge demo (`https://bilibili.com/video/av116689597368905`) confirms real-device interest. Added env-driven OpenHarmony SDK setup, OHOS platform guards/fallbacks, self-update disablement, and OHOS target gating for Starlark execpolicy parsing plus PTY support so published OHOS builds do not pull `nix` 0.28 through `rustyline` or `portable-pty`. `cargo check --workspace --all-features --locked`, focused PTY/clipboard tests, and `cargo tree --locked -p codewhale-tui --target aarch64-unknown-linux-ohos -i nix@0.28.0` passed; full OHOS target check is blocked on this host because `OHOS_NATIVE_SDK`/target CC/sysroot are not configured and `ring` cannot find `assert.h`. | @@ -77,7 +78,7 @@ v0.9 branch so the remaining Windows/manual checks are explicit. | Large-repo context startup (#697/#1827 class) | Partially covered. | Project-context pack ordering/budget/noise tests passed, and the auto-generated fallback now has a synthetic 1000-file startup smoke with `cargo test -p codewhale-tui --bin codewhale-tui --locked auto_generated_context_is_bounded_for_many_file_workspace -- --nocapture`. Still needs a real massive-repo/manual startup benchmark before closing #697 or #1827. | | Sub-agent timeout and trust model (#1806, #719) | Fixed or covered in current branch. | `heartbeat_timeout_secs` clamp/default test passed, and `agent_open_description_explains_fresh_vs_forked_context_and_trust_model` asserts that sub-agent results are self-reports. | | Sub-agent checkpoint/resume (#2029) | Partially covered. | Live per-step API timeout now preserves a continuable checkpoint and `agent_eval { continue: true }` resumes the parked child; `cargo test -p codewhale-tui --bin codewhale-tui --locked subagent -- --nocapture` passed with checkpoint/projection/persistence/continuation coverage. Cold-restart continuation is not implemented because persisted child tasks are not rehydrated; decide whether #2029 can close as live-timeout recovery or should remain open for restart-resume UX. | -| Live shell/session liveness (#1786) | Partially fixed, still release-blocking. | Shell containment and turn-liveness tests exist, but orphaned PID/session-load reaping and long-running shell LIVE-state recovery remain open. Needs stale PID reaping and live-state regression coverage. | +| Live shell/session liveness (#1786) | Partially fixed, still release-blocking. | Durable task restart recovery now fails stale persisted `running` tasks instead of requeueing them, covered by `running_tasks_are_not_requeued_after_restart` and broader `task_manager` tests. Foreground shell hang root cause and LIVE-state watchdog recovery remain open; avoid aborting legitimate foreground `exec_shell` commands while adding stale-card recovery. | | Queued/live input feedback (#2054) | Partially covered; UX clarity still blocking. | Queued-message recovery/editing and pending-input delivery-mode labels are covered by `queued` and `pending_input_preview` focused tests. Still needs cancel/edit-mode affordance clarity and a repro for accidentally entering queued-draft edit while a turn is loading. | | Prompt/UI calmness (#1191) | Defer or narrow. | No release-blocking regression evidence yet; keep as polish unless a current user-facing prompt/UI failure is identified. |