diff --git a/crates/tui/src/commands/cycle.rs b/crates/tui/src/commands/cycle.rs new file mode 100644 index 00000000..c965d8eb --- /dev/null +++ b/crates/tui/src/commands/cycle.rs @@ -0,0 +1,222 @@ +//! Cycle commands: `/cycles` (list past cycle boundaries) and +//! `/cycle ` (show one cycle's briefing in detail). + +use std::fmt::Write; + +use crate::tui::app::App; + +use super::CommandResult; + +/// `/cycles` — list past cycle handoffs in compact form. +pub fn list_cycles(app: &App) -> CommandResult { + if app.cycle_briefings.is_empty() { + let msg = format!( + "No cycle boundaries have fired yet (current cycle: 1, threshold: {} tokens for {}).", + app.cycle.threshold_for(&app.model), + app.model + ); + return CommandResult::message(msg); + } + + let mut out = String::new(); + let _ = writeln!( + out, + "Cycle handoffs in this session ({} total). Active cycle: {}.", + app.cycle_briefings.len(), + app.cycle_count.saturating_add(1), + ); + out.push('\n'); + for brief in &app.cycle_briefings { + let preview = first_line(&brief.briefing_text, 80); + let _ = writeln!( + out, + " cycle {n} @ {ts} briefing: {tokens} tokens ─ {preview}", + n = brief.cycle, + ts = brief.timestamp.to_rfc3339(), + tokens = brief.token_estimate, + preview = preview, + ); + } + out.push('\n'); + out.push_str("Use `/cycle ` to show the full briefing for a specific cycle.\n"); + CommandResult::message(out) +} + +/// `/cycle ` — print the full briefing for cycle `n`. +pub fn show_cycle(app: &App, arg: Option<&str>) -> CommandResult { + let Some(raw) = arg.map(str::trim) else { + return CommandResult::error( + "Usage: /cycle — n is the cycle number from /cycles".to_string(), + ); + }; + if raw.is_empty() { + return CommandResult::error("Usage: /cycle ".to_string()); + } + let Ok(n) = raw.parse::() else { + return CommandResult::error(format!( + "Cycle number must be a positive integer (got '{raw}')." + )); + }; + + let Some(brief) = app.cycle_briefings.iter().find(|b| b.cycle == n) else { + let known: Vec = app + .cycle_briefings + .iter() + .map(|b| b.cycle.to_string()) + .collect(); + let known_str = if known.is_empty() { + "(none)".to_string() + } else { + known.join(", ") + }; + return CommandResult::error(format!( + "Cycle {n} not found in this session. Known cycles: {known_str}." + )); + }; + + let mut out = String::new(); + let _ = writeln!( + out, + "── Cycle {n} ({ts}) briefing: {tokens} tokens ──", + n = brief.cycle, + ts = brief.timestamp.to_rfc3339(), + tokens = brief.token_estimate, + ); + out.push('\n'); + out.push_str(brief.briefing_text.trim()); + out.push('\n'); + CommandResult::message(out) +} + +/// `/recall ` — user-initiated BM25 search of cycle archives. +/// +/// Synchronous wrapper around `tools::recall_archive::RecallArchiveTool` so +/// users can probe the archive without invoking the model. Output is the +/// same JSON payload the agent would see; the assistant pretty-prints +/// short results and dumps long ones inline. +pub fn recall_archive(app: &App, arg: Option<&str>) -> CommandResult { + use crate::tools::recall_archive::RecallArchiveTool; + use crate::tools::spec::{ToolContext, ToolSpec}; + + let Some(raw) = arg.map(str::trim) else { + return CommandResult::error("Usage: /recall ".to_string()); + }; + if raw.is_empty() { + return CommandResult::error("Usage: /recall ".to_string()); + } + + let session_id = app + .current_session_id + .clone() + .unwrap_or_else(|| "workspace".to_string()); + + let context = ToolContext::new(app.workspace.clone()).with_state_namespace(session_id); + let tool = RecallArchiveTool; + let input = serde_json::json!({"query": raw}); + + let result = tokio::task::block_in_place(|| { + tokio::runtime::Handle::current().block_on(tool.execute(input, &context)) + }); + + match result { + Ok(res) => CommandResult::message(res.content), + Err(err) => CommandResult::error(format!("recall_archive failed: {err}")), + } +} + +/// Truncate `text` to its first non-empty line, capped at `max_chars`. +fn first_line(text: &str, max_chars: usize) -> String { + let line = text + .lines() + .map(str::trim) + .find(|l| !l.is_empty()) + .unwrap_or(""); + if line.chars().count() <= max_chars { + line.to_string() + } else { + let prefix: String = line.chars().take(max_chars).collect(); + format!("{prefix}…") + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::cycle_manager::CycleBriefing; + use crate::tui::app::{App, TuiOptions}; + use chrono::Utc; + use std::path::PathBuf; + + fn test_options() -> TuiOptions { + TuiOptions { + model: "deepseek-v4-pro".to_string(), + workspace: PathBuf::from("."), + allow_shell: false, + use_alt_screen: true, + use_mouse_capture: false, + use_bracketed_paste: true, + max_subagents: 1, + skills_dir: PathBuf::from("."), + memory_path: PathBuf::from("memory.md"), + notes_path: PathBuf::from("notes.txt"), + mcp_config_path: PathBuf::from("mcp.json"), + use_memory: false, + start_in_agent_mode: false, + skip_onboarding: true, + yolo: false, + resume_session_id: None, + } + } + + #[test] + fn list_cycles_reports_no_boundaries_yet() { + let app = App::new(test_options(), &crate::config::Config::default()); + let res = list_cycles(&app); + assert!(res.message.is_some()); + assert!( + res.message + .as_deref() + .unwrap() + .contains("No cycle boundaries") + ); + } + + #[test] + fn show_cycle_rejects_nonexistent_cycle() { + let app = App::new(test_options(), &crate::config::Config::default()); + let res = show_cycle(&app, Some("3")); + let msg = res.message.expect("error message"); + assert!(msg.contains("Cycle 3 not found"), "got: {msg}"); + } + + #[test] + fn list_and_show_cycles_render_briefings() { + let mut app = App::new(test_options(), &crate::config::Config::default()); + app.cycle_briefings.push(CycleBriefing { + cycle: 1, + timestamp: Utc::now(), + briefing_text: "Decision: chose A; constraint: no async.".to_string(), + token_estimate: 12, + }); + app.cycle_count = 1; + + let listed = list_cycles(&app).message.expect("list message"); + assert!(listed.contains("cycle 1")); + assert!(listed.contains("12 tokens")); + + let shown = show_cycle(&app, Some("1")).message.expect("show message"); + assert!(shown.contains("Decision: chose A")); + } + + #[test] + fn show_cycle_validates_argument() { + let app = App::new(test_options(), &crate::config::Config::default()); + let res = show_cycle(&app, None); + let msg = res.message.expect("error message"); + assert!(msg.contains("Usage: /cycle")); + + let res = show_cycle(&app, Some("not-a-number")); + let msg = res.message.expect("error message"); + assert!(msg.contains("must be a positive integer")); + } +} diff --git a/crates/tui/src/commands/mod.rs b/crates/tui/src/commands/mod.rs index f60786b9..fbf7f98c 100644 --- a/crates/tui/src/commands/mod.rs +++ b/crates/tui/src/commands/mod.rs @@ -6,6 +6,7 @@ mod attachment; mod config; mod core; +mod cycle; mod debug; mod init; mod note; @@ -204,9 +205,27 @@ pub const COMMANDS: &[CommandInfo] = &[ CommandInfo { name: "compact", aliases: &[], - description: "Trigger context compaction to free up space", + description: "Trigger context compaction to free up space (legacy; v0.6.6 prefers cycle restart)", usage: "/compact", }, + CommandInfo { + name: "cycles", + aliases: &[], + description: "List checkpoint-restart cycle handoffs in this session", + usage: "/cycles", + }, + CommandInfo { + name: "cycle", + aliases: &[], + description: "Show the carry-forward briefing for a specific cycle", + usage: "/cycle ", + }, + CommandInfo { + name: "recall", + aliases: &[], + description: "Search prior cycle archives (BM25 over message text)", + usage: "/recall ", + }, CommandInfo { name: "export", aliases: &[], @@ -357,6 +376,9 @@ pub fn execute(cmd: &str, app: &mut App) -> CommandResult { "sessions" | "resume" => session::sessions(app), "load" => session::load(app, arg), "compact" => session::compact(app), + "cycles" => cycle::list_cycles(app), + "cycle" => cycle::show_cycle(app, arg), + "recall" => cycle::recall_archive(app, arg), "export" => session::export(app, arg), // Config commands diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index ae68e81b..49d84a4a 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -25,6 +25,10 @@ use crate::compaction::{ CompactionConfig, compact_messages_safe, estimate_tokens, merge_system_prompts, should_compact, }; use crate::config::{Config, DEFAULT_MAX_SUBAGENTS, DEFAULT_TEXT_MODEL}; +use crate::cycle_manager::{ + CycleBriefing, CycleConfig, StructuredState, archive_cycle, build_seed_messages, + estimate_briefing_tokens, produce_briefing, should_advance_cycle, +}; use crate::features::{Feature, Features}; use crate::llm_client::LlmClient; use crate::mcp::McpPool; @@ -83,7 +87,15 @@ pub struct EngineConfig { /// Feature flags controlling tool availability. pub features: Features, /// Auto-compaction settings for long conversations. + /// + /// As of v0.6.6 the high-level summarization compaction (`compact_messages_safe`) + /// is **disabled by default**; the checkpoint-restart cycle architecture + /// (`cycle_manager`) replaces it. The compaction config is still wired through + /// for the per-tool-result truncation path (`compact_tool_result_for_context`) + /// and for users who explicitly opt back in via `[compaction] enabled = true`. pub compaction: CompactionConfig, + /// Checkpoint-restart cycle settings (issue #124). + pub cycle: CycleConfig, /// Capacity-controller settings. pub capacity: CapacityControllerConfig, /// Shared Todo list state. @@ -109,6 +121,7 @@ impl Default for EngineConfig { max_subagents: DEFAULT_MAX_SUBAGENTS, features: Features::with_defaults(), compaction: CompactionConfig::default(), + cycle: CycleConfig::default(), capacity: CapacityControllerConfig::default(), todos: new_shared_todo_list(), plan_state: new_shared_plan_state(), @@ -431,6 +444,7 @@ fn should_default_defer_tool(name: &str, mode: AppMode) -> bool { | "file_search" | "diagnostics" | "rlm" + | "recall_archive" | MULTI_TOOL_PARALLEL_NAME | "update_plan" | "todo_write" @@ -1574,6 +1588,16 @@ impl Engine { error, }) .await; + + // Checkpoint-restart cycle boundary (issue #124). The turn just + // settled cleanly — no in-flight tools, no streaming, no pending + // approval — so this is the safe phase to swap the context if we've + // crossed the per-cycle token threshold. We only fire on a + // Completed turn; Failed/Interrupted turns leave the buffer alone + // so the user can retry without a forced reset. + if matches!(status, TurnOutcomeStatus::Completed) { + self.maybe_advance_cycle(mode).await; + } } async fn handle_manual_compaction(&mut self) { @@ -4491,6 +4515,154 @@ impl Engine { self.merge_compaction_summary(Some(prompt)); } + /// Run the checkpoint-restart cycle boundary if the session has crossed + /// its token threshold (issue #124). No-op in the common case. + /// + /// Caller must invoke this only at a clean turn boundary (no in-flight + /// tool, no open stream, no pending approval modal). The phase guard + /// inside `should_advance_cycle` is a defence-in-depth check; the + /// engine's wider state machine is the primary enforcement layer. + /// + /// Sub-agents are intentionally NOT awaited: each sub-agent has its own + /// context, the parent's reset doesn't invalidate them. Their handles + /// are captured in the structured-state block so the next cycle can see + /// they're still running. + async fn maybe_advance_cycle(&mut self, mode: AppMode) { + if !should_advance_cycle( + self.session.total_usage.input_tokens, + self.session.total_usage.output_tokens, + &self.session.model, + &self.config.cycle, + false, + ) { + return; + } + + let Some(client) = self.deepseek_client.clone() else { + crate::logging::warn( + "Cycle boundary skipped: API client not configured for briefing turn", + ); + return; + }; + + let from = self.session.cycle_count; + let to = from.saturating_add(1); + let archive_started = self.session.current_cycle_started; + let max_briefing_tokens = self.config.cycle.briefing_max_for(&self.session.model); + + let _ = self + .tx_event + .send(Event::status(format!( + "↻ context refreshing (cycle {from} → {to}, generating briefing…)" + ))) + .await; + + // 1. Generate the model-curated briefing. We do this *before* + // archiving so a briefing-call failure leaves the cycle intact — + // the user can keep working at higher token counts until the next + // boundary check, rather than losing their context to a failed + // handoff. + let briefing_text = match produce_briefing( + &client, + &self.session.model, + &self.session.messages, + max_briefing_tokens, + ) + .await + { + Ok(text) => text, + Err(err) => { + crate::logging::warn(format!( + "Cycle briefing turn failed; skipping cycle advance: {err}" + )); + let _ = self + .tx_event + .send(Event::status(format!( + "↻ cycle handoff failed (continuing in cycle {from}): {err}" + ))) + .await; + return; + } + }; + + let briefing_tokens = estimate_briefing_tokens(&briefing_text); + let now = chrono::Utc::now(); + let briefing = CycleBriefing { + cycle: to, + timestamp: now, + briefing_text: briefing_text.clone(), + token_estimate: briefing_tokens, + }; + + // 2. Archive the cycle to disk. If the archive write fails we still + // proceed with the swap — the briefing alone preserves enough + // state to continue, and the user can recover the lost archive + // from their session log if needed. + match archive_cycle( + &self.session.id, + to, + &self.session.messages, + &self.session.model, + archive_started, + ) { + Ok(path) => { + crate::logging::info(format!("Cycle {to} archived to {}", path.display())); + } + Err(err) => { + crate::logging::warn(format!( + "Failed to archive cycle {to}; continuing with swap: {err}" + )); + } + } + + // 3. Capture structured state. Locks are held only for the snapshot. + let state = StructuredState::capture( + mode.label(), + self.config.workspace.clone(), + std::env::current_dir().ok(), + &self.session.working_set, + &self.config.todos, + &self.config.plan_state, + Some(&self.subagent_manager), + ) + .await; + let state_block = state.to_system_block(); + + // 4. Build the seed messages. The next cycle starts with the + // base system prompt (refreshed below) and these seeds. + let seed_messages = build_seed_messages( + state_block.as_deref(), + Some(&briefing), + None, // pending_user_message — pulled from steer/queue elsewhere + ); + + // 5. Atomic swap. + self.session.messages = seed_messages; + self.session.cycle_count = to; + self.session.current_cycle_started = now; + self.session.cycle_briefings.push(briefing.clone()); + // Drop any compaction summary — that path is incompatible with the + // fresh-context model and would Frankenstein-merge with the briefing. + self.session.compaction_summary_prompt = None; + self.refresh_system_prompt(mode); + self.emit_session_updated().await; + + let _ = self + .tx_event + .send(Event::CycleAdvanced { + from, + to, + briefing: briefing.clone(), + }) + .await; + let _ = self + .tx_event + .send(Event::status(format!( + "↻ context refreshed (cycle {from} → {to}, briefing: {briefing_tokens} tokens carried)" + ))) + .await; + } + /// Refresh the system prompt based on current mode and context. fn refresh_system_prompt(&mut self, mode: AppMode) { let working_set_summary = self diff --git a/crates/tui/src/core/events.rs b/crates/tui/src/core/events.rs index a2474868..b5be4034 100644 --- a/crates/tui/src/core/events.rs +++ b/crates/tui/src/core/events.rs @@ -121,6 +121,17 @@ pub enum Event { message: String, }, + /// Checkpoint-restart cycle boundary advanced (issue #124). The previous + /// cycle has already been archived to disk; the engine has swapped its + /// in-memory message buffer for the seed messages of cycle `to`. + /// Carries the full briefing record so the UI can populate + /// `app.cycle_briefings` for `/cycle `. + CycleAdvanced { + from: u32, + to: u32, + briefing: crate::cycle_manager::CycleBriefing, + }, + /// Capacity decision telemetry. #[allow(dead_code)] CapacityDecision { diff --git a/crates/tui/src/core/session.rs b/crates/tui/src/core/session.rs index 37629848..94791c40 100644 --- a/crates/tui/src/core/session.rs +++ b/crates/tui/src/core/session.rs @@ -2,9 +2,11 @@ //! //! Tracks conversation history, token usage, and session metadata. +use crate::cycle_manager::CycleBriefing; use crate::models::{Message, SystemPrompt, Usage}; use crate::project_context::{ProjectContext, load_project_context_with_parents}; use crate::working_set::WorkingSet; +use chrono::{DateTime, Utc}; use std::path::PathBuf; /// Session state for the engine. @@ -55,6 +57,19 @@ pub struct Session { /// Repo-aware working set for context management. pub working_set: WorkingSet, + + /// Number of cycle boundaries crossed in this session (issue #124). The + /// active cycle index is `cycle_count + 1` (cycles are 1-based for users). + pub cycle_count: u32, + + /// UTC start time of the *current* cycle. Updated when the engine resets + /// the conversation buffer. Used by archive headers and the `/cycles` + /// command's display. + pub current_cycle_started: DateTime, + + /// Briefings produced at past cycle boundaries, in chronological order. + /// Bounded growth: one entry per cycle, briefing capped at ~3,000 tokens. + pub cycle_briefings: Vec, } /// Cumulative usage statistics for a session. @@ -117,6 +132,9 @@ impl Session { None }, working_set: WorkingSet::default(), + cycle_count: 0, + current_cycle_started: Utc::now(), + cycle_briefings: Vec::new(), } } diff --git a/crates/tui/src/cycle_manager.rs b/crates/tui/src/cycle_manager.rs new file mode 100644 index 00000000..c26693d6 --- /dev/null +++ b/crates/tui/src/cycle_manager.rs @@ -0,0 +1,1004 @@ +//! Checkpoint-restart cycle management for long-running sessions (issue #124). +//! +//! ## Why +//! +//! DeepSeek V4's empirical retrieval elbow is 128K tokens (paper Figure 9 — +//! 8K/0.90, 64K/0.87, 128K/0.85, 256K/0.76, 512K/0.66, 1M/0.59). Lossy +//! summarization compaction creates a "Frankenstein" context — half verbatim, +//! half paraphrased — that the model cannot tell apart, so it treats the +//! summary as if it were verbatim and confabulates around the gaps. +//! +//! Checkpoint-restart fixes this by giving every cycle a *homogeneous* fresh +//! context: original system prompt, structured state (todos / plan / working +//! set / sub-agent handles), and a model-curated free-form briefing of at +//! most ~3,000 tokens. The previous cycle is archived to disk in JSONL form +//! so a future `recall_archive` tool (issue #127) can search it on demand. +//! +//! ## Layers of carry-forward +//! +//! 1. **Auto-preserved** (deterministic, no agent judgment): the original +//! system prompt, `SharedTodoList`, `SharedPlanState`, working-set paths, +//! open sub-agent snapshots, mode / workspace / cwd, and the user's most +//! recent unsent message. +//! 2. **Free-form briefing** (model-curated, wrapped as ``): +//! decisions made + why, constraints discovered, hypotheses being tested, +//! approaches that failed, open questions. Tool output bytes, file +//! contents, and step-by-step recaps explicitly do NOT belong here — +//! they're either in the archive or recoverable from disk. +//! +//! ## Trigger +//! +//! - Token threshold: **110K** by default (leaves ~8.5K headroom for the +//! briefing turn plus next-turn growth before crossing the 128K elbow). +//! - Phase guard: callers only invoke `should_advance_cycle` at clean turn +//! boundaries (no in-flight tool, no streaming, no approval modal). +//! - Per-model overrides: `[cycle.per_model]` in config.toml lets operators +//! tune the threshold separately for `deepseek-v4-pro` vs. `-flash` if +//! their workloads have different briefing costs. + +use std::collections::HashMap; +use std::fs::{File, OpenOptions}; +use std::io::Write; +use std::path::{Path, PathBuf}; + +use anyhow::{Context, Result}; +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; + +use crate::client::DeepSeekClient; +use crate::llm_client::LlmClient; +use crate::models::{ContentBlock, Message, MessageRequest, SystemBlock, SystemPrompt}; +use crate::tools::plan::{PlanSnapshot, SharedPlanState}; +use crate::tools::subagent::{SharedSubAgentManager, SubAgentResult, SubAgentStatus}; +use crate::tools::todo::{SharedTodoList, TodoListSnapshot}; +use crate::working_set::WorkingSet; + +/// JSONL header record emitted as the first line of an archived cycle file. +const CYCLE_ARCHIVE_SCHEMA_VERSION: u32 = 1; + +/// Default token threshold at which a cycle boundary fires. Set below the V4 +/// 128K retrieval elbow to leave room for the briefing turn (≤3K tokens) plus +/// the next user turn before the next boundary. +pub const DEFAULT_CYCLE_THRESHOLD_TOKENS: usize = 110_000; + +/// Default cap on the model-curated briefing block. +pub const DEFAULT_BRIEFING_MAX_TOKENS: usize = 3_000; + +/// Conservative chars-per-token used to bound the briefing length to the +/// configured token cap. Matches `compaction::estimate_tokens` (~4 chars/token). +const APPROX_CHARS_PER_TOKEN: usize = 4; + +/// Per-model cycle tuning. Loaded from `[cycle.per_model.]`. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct ModelCycleConfig { + /// Token threshold above which a cycle boundary fires. + pub threshold_tokens: usize, + /// Cap on the model-curated `` briefing. + pub briefing_max_tokens: usize, +} + +impl Default for ModelCycleConfig { + fn default() -> Self { + Self { + threshold_tokens: DEFAULT_CYCLE_THRESHOLD_TOKENS, + briefing_max_tokens: DEFAULT_BRIEFING_MAX_TOKENS, + } + } +} + +/// Top-level cycle configuration. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct CycleConfig { + /// Whether checkpoint-restart cycles are enabled. Defaults to true. + pub enabled: bool, + /// Default token threshold; per-model overrides take precedence when present. + pub threshold_tokens: usize, + /// Default briefing cap; per-model overrides take precedence when present. + pub briefing_max_tokens: usize, + /// Per-model overrides keyed by model identifier (e.g. `deepseek-v4-pro`). + pub per_model: HashMap, +} + +impl Default for CycleConfig { + fn default() -> Self { + let mut per_model: HashMap = HashMap::new(); + per_model.insert("deepseek-v4-pro".to_string(), ModelCycleConfig::default()); + per_model.insert("deepseek-v4-flash".to_string(), ModelCycleConfig::default()); + Self { + enabled: true, + threshold_tokens: DEFAULT_CYCLE_THRESHOLD_TOKENS, + briefing_max_tokens: DEFAULT_BRIEFING_MAX_TOKENS, + per_model, + } + } +} + +impl CycleConfig { + /// Resolve the threshold for a given model (per-model override > default). + #[must_use] + pub fn threshold_for(&self, model: &str) -> usize { + self.per_model + .get(model) + .map(|m| m.threshold_tokens) + .unwrap_or(self.threshold_tokens) + } + + /// Resolve the briefing-token cap for a given model. + #[must_use] + pub fn briefing_max_for(&self, model: &str) -> usize { + self.per_model + .get(model) + .map(|m| m.briefing_max_tokens) + .unwrap_or(self.briefing_max_tokens) + } +} + +/// Snapshot of a model-curated briefing produced at cycle handoff. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CycleBriefing { + /// 1-based cycle number this briefing closes (i.e. the cycle being archived). + pub cycle: u32, + /// UTC timestamp when the briefing turn completed. + pub timestamp: DateTime, + /// Extracted contents of the `` block. + pub briefing_text: String, + /// Approximate token count of `briefing_text`. + pub token_estimate: usize, +} + +/// Decide whether a cycle boundary should fire. +/// +/// `usage` is the *cumulative* session input+output tokens (both `u64` to +/// match `SessionUsage`). `in_flight` is true when a tool is mid-execution, +/// stream is open, or an approval modal is pending — in those cases the +/// caller must wait until the next clean boundary. +#[must_use] +pub fn should_advance_cycle( + cumulative_input_tokens: u64, + cumulative_output_tokens: u64, + model: &str, + cfg: &CycleConfig, + in_flight: bool, +) -> bool { + if !cfg.enabled || in_flight { + return false; + } + let total = cumulative_input_tokens.saturating_add(cumulative_output_tokens); + let threshold = cfg.threshold_for(model) as u64; + if threshold == 0 { + return false; + } + total >= threshold +} + +/// Roll-up of state that survives a cycle boundary deterministically. +/// +/// Construction is cheap — borrow the live state, snapshot it once, render it +/// into a system block. The snapshot decouples rendering from any mutex held +/// by the engine. +#[derive(Debug, Clone, Default)] +pub struct StructuredState { + pub mode_label: String, + pub workspace: PathBuf, + pub cwd: Option, + pub working_set_summary: Option, + pub todo_snapshot: Option, + pub plan_snapshot: Option, + pub subagent_snapshots: Vec, +} + +impl StructuredState { + /// Capture the current state. All locks are held only for the duration of + /// the snapshot. + pub async fn capture( + mode_label: impl Into, + workspace: PathBuf, + cwd: Option, + working_set: &WorkingSet, + todos: &SharedTodoList, + plan_state: &SharedPlanState, + subagents: Option<&SharedSubAgentManager>, + ) -> Self { + let working_set_summary = working_set.summary_block(&workspace); + + let todo_snapshot = { + let guard = todos.lock().await; + let snap = guard.snapshot(); + if snap.items.is_empty() { + None + } else { + Some(snap) + } + }; + + let plan_snapshot = { + let guard = plan_state.lock().await; + if guard.is_empty() { + None + } else { + Some(guard.snapshot()) + } + }; + + let subagent_snapshots = if let Some(handle) = subagents { + let guard = handle.lock().await; + guard + .list() + .into_iter() + .filter(|s| matches!(s.status, SubAgentStatus::Running)) + .collect() + } else { + Vec::new() + }; + + Self { + mode_label: mode_label.into(), + workspace, + cwd, + working_set_summary, + todo_snapshot, + plan_snapshot, + subagent_snapshots, + } + } + + /// Render the structured state as a single system block. Returns `None` + /// when there is nothing meaningful to carry forward (rare in practice — + /// at least the workspace and mode are always present). + #[must_use] + pub fn to_system_block(&self) -> Option { + let mut out = String::new(); + out.push_str("## Cycle State (Auto-Preserved)\n\n"); + out.push_str(&format!("- Mode: `{}`\n", self.mode_label)); + out.push_str(&format!("- Workspace: `{}`\n", self.workspace.display())); + if let Some(cwd) = self.cwd.as_ref() { + out.push_str(&format!("- Cwd: `{}`\n", cwd.display())); + } + + if let Some(plan) = self.plan_snapshot.as_ref() { + out.push_str("\n### Plan\n"); + if let Some(explanation) = plan.explanation.as_ref() { + out.push_str(&format!("{explanation}\n\n")); + } + for item in &plan.items { + let marker = match item.status { + crate::tools::plan::StepStatus::Pending => "[ ]", + crate::tools::plan::StepStatus::InProgress => "[~]", + crate::tools::plan::StepStatus::Completed => "[x]", + }; + out.push_str(&format!("- {marker} {}\n", item.step)); + } + } + + if let Some(todos) = self.todo_snapshot.as_ref() { + out.push_str(&format!( + "\n### Todos ({}% complete)\n", + todos.completion_pct + )); + for item in &todos.items { + let marker = match item.status { + crate::tools::todo::TodoStatus::Pending => "[ ]", + crate::tools::todo::TodoStatus::InProgress => "[~]", + crate::tools::todo::TodoStatus::Completed => "[x]", + }; + out.push_str(&format!("- {marker} {}\n", item.content)); + } + } + + if !self.subagent_snapshots.is_empty() { + out.push_str("\n### Open Sub-Agents\n"); + for s in &self.subagent_snapshots { + let role = s.assignment.role.as_deref().unwrap_or("—"); + let goal = if s.assignment.objective.is_empty() { + "(no objective set)" + } else { + s.assignment.objective.as_str() + }; + out.push_str(&format!("- `{}` (role: {}) — {}\n", s.agent_id, role, goal)); + } + } + + if let Some(working_set) = self.working_set_summary.as_deref() { + out.push('\n'); + out.push_str(working_set); + out.push('\n'); + } + + Some(out) + } +} + +/// Build the prompt the model uses to produce its `` briefing. +const CYCLE_HANDOFF_TEMPLATE: &str = include_str!("prompts/cycle_handoff.md"); + +/// Run the briefing turn. The caller drives this just before swapping the +/// session message buffer. The returned text is the contents of the +/// `` block — outer tags stripped, length-bounded to +/// `max_briefing_tokens` worth of characters as a defensive backstop in case +/// the model ignores the cap. +pub async fn produce_briefing( + client: &DeepSeekClient, + model: &str, + conversation: &[Message], + max_briefing_tokens: usize, +) -> Result { + if conversation.is_empty() { + return Ok(String::new()); + } + + // Append a synthetic instruction asking for the carry_forward block. We + // do not mutate the caller's conversation; this is a one-shot turn. + let mut messages: Vec = conversation.to_vec(); + messages.push(Message { + role: "user".to_string(), + content: vec![ContentBlock::Text { + text: format!( + "[CYCLE BOUNDARY] {}\n\nProduce your `` block now. \ + Stay under {} tokens. Output only the block — no other text.", + "The next turn starts in a fresh context.", max_briefing_tokens + ), + cache_control: None, + }], + }); + + let request = MessageRequest { + model: model.to_string(), + messages, + max_tokens: u32::try_from(max_briefing_tokens.saturating_mul(2)) + .unwrap_or(8_192) + .max(1_024), + system: Some(SystemPrompt::Blocks(vec![SystemBlock { + block_type: "text".to_string(), + text: CYCLE_HANDOFF_TEMPLATE.to_string(), + cache_control: None, + }])), + tools: None, + tool_choice: None, + metadata: None, + thinking: None, + reasoning_effort: None, + stream: Some(false), + // Briefings benefit from low temperature — we want consistent state + // capture, not stylistic variation. + temperature: Some(0.2), + top_p: None, + }; + + let response = client + .create_message(request) + .await + .with_context(|| format!("Cycle briefing turn failed for model {model}"))?; + + let raw = response + .content + .iter() + .filter_map(|block| match block { + ContentBlock::Text { text, .. } => Some(text.as_str()), + _ => None, + }) + .collect::>() + .join("\n"); + + let extracted = extract_carry_forward(&raw); + let bounded = enforce_briefing_cap(&extracted, max_briefing_tokens); + Ok(bounded) +} + +/// Pull the contents of the first `...` block +/// out of the raw model response. If the tags are missing, return the trimmed +/// raw text — the caller would rather have *some* briefing than nothing. +#[must_use] +pub fn extract_carry_forward(raw: &str) -> String { + let lower = raw.to_ascii_lowercase(); + let open_tag = ""; + let close_tag = ""; + + if let Some(start) = lower.find(open_tag) { + let after = start + open_tag.len(); + let tail = &raw[after..]; + let tail_lower = &lower[after..]; + if let Some(end) = tail_lower.find(close_tag) { + return tail[..end].trim().to_string(); + } + // Open tag without close tag — take everything after, trimmed. + return tail.trim().to_string(); + } + raw.trim().to_string() +} + +/// Defensive bound on briefing length. Calibrated at ~4 chars/token to match +/// the rest of the codebase's token estimator. +fn enforce_briefing_cap(text: &str, max_tokens: usize) -> String { + let max_chars = max_tokens.saturating_mul(APPROX_CHARS_PER_TOKEN); + if max_chars == 0 { + return String::new(); + } + if text.chars().count() <= max_chars { + return text.to_string(); + } + let mut out: String = text.chars().take(max_chars).collect(); + out.push_str("\n\n[...briefing truncated to fit cap...]"); + out +} + +/// Estimate briefing tokens — same method as `compaction::estimate_tokens` +/// for symmetry: ~4 chars per token. +#[must_use] +pub fn estimate_briefing_tokens(text: &str) -> usize { + text.len().div_ceil(APPROX_CHARS_PER_TOKEN) +} + +/// Header record written as the first line of an archived cycle JSONL file. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CycleArchiveHeader { + pub schema_version: u32, + pub cycle: u32, + pub session_id: String, + pub model: String, + pub started: DateTime, + pub ended: DateTime, + pub message_count: usize, +} + +/// Resolve the on-disk archive directory: `~/.deepseek/sessions//cycles`. +fn archive_dir_for(session_id: &str) -> Result { + let home = dirs::home_dir().context("Could not resolve home directory for cycle archive")?; + Ok(home + .join(".deepseek") + .join("sessions") + .join(session_id) + .join("cycles")) +} + +/// Archive a cycle's messages to JSONL on disk and return the path written. +/// +/// The first line is a `CycleArchiveHeader` JSON object; each subsequent +/// line is a single `Message` serialized as JSON. +pub fn archive_cycle( + session_id: &str, + cycle_n: u32, + messages: &[Message], + model: &str, + started: DateTime, +) -> Result { + let dir = archive_dir_for(session_id)?; + std::fs::create_dir_all(&dir).with_context(|| { + format!( + "Failed to create cycle archive directory at {}", + dir.display() + ) + })?; + + let path = dir.join(format!("{cycle_n}.jsonl")); + let header = CycleArchiveHeader { + schema_version: CYCLE_ARCHIVE_SCHEMA_VERSION, + cycle: cycle_n, + session_id: session_id.to_string(), + model: model.to_string(), + started, + ended: Utc::now(), + message_count: messages.len(), + }; + + write_archive_file(&path, &header, messages) + .with_context(|| format!("Failed to write cycle archive at {}", path.display()))?; + + Ok(path) +} + +fn write_archive_file( + path: &Path, + header: &CycleArchiveHeader, + messages: &[Message], +) -> Result<()> { + let tmp_path = path.with_extension("jsonl.tmp"); + { + let file = OpenOptions::new() + .create(true) + .truncate(true) + .write(true) + .open(&tmp_path)?; + let mut buf = std::io::BufWriter::new(file); + let header_line = serde_json::to_string(header)?; + buf.write_all(header_line.as_bytes())?; + buf.write_all(b"\n")?; + for message in messages { + let line = serde_json::to_string(message)?; + buf.write_all(line.as_bytes())?; + buf.write_all(b"\n")?; + } + // BufWriter flushes on drop, but we want any error surfaced now — + // not silently into the void. + buf.flush()?; + // File handle drops with `buf`. + } + std::fs::rename(&tmp_path, path)?; + Ok(()) +} + +/// Open an archived cycle JSONL for streaming reads. Returns the parsed +/// header and an iterator over messages. Reserved for the future +/// `recall_archive` tool (#127). +#[allow(dead_code)] +pub fn open_archive(path: &Path) -> Result<(CycleArchiveHeader, ArchiveMessageReader)> { + use std::io::{BufRead, BufReader}; + + let file = File::open(path) + .with_context(|| format!("Failed to open cycle archive at {}", path.display()))?; + let mut reader = BufReader::new(file); + let mut header_line = String::new(); + reader.read_line(&mut header_line)?; + let header: CycleArchiveHeader = + serde_json::from_str(header_line.trim()).with_context(|| { + format!( + "Cycle archive at {} is missing a valid header", + path.display() + ) + })?; + + if header.schema_version > CYCLE_ARCHIVE_SCHEMA_VERSION { + anyhow::bail!( + "Cycle archive schema v{} at {} is newer than supported v{}", + header.schema_version, + path.display(), + CYCLE_ARCHIVE_SCHEMA_VERSION + ); + } + + Ok((header, ArchiveMessageReader { reader })) +} + +/// Iterator yielding `Message`s from an opened archive file. Yields `None` +/// when the file is exhausted. Errors propagate through the `Result`. +#[allow(dead_code)] +#[derive(Debug)] +pub struct ArchiveMessageReader { + reader: std::io::BufReader, +} + +#[allow(dead_code)] +impl Iterator for ArchiveMessageReader { + type Item = Result; + + fn next(&mut self) -> Option { + use std::io::BufRead; + + let mut line = String::new(); + match self.reader.read_line(&mut line) { + Ok(0) => None, + Ok(_) => { + let trimmed = line.trim(); + if trimmed.is_empty() { + return self.next(); + } + Some( + serde_json::from_str::(trimmed) + .map_err(|e| anyhow::anyhow!("Archive line parse failed: {e}")), + ) + } + Err(e) => Some(Err(anyhow::Error::new(e))), + } + } +} + +/// Compose the seed messages for the next cycle. +/// +/// Layout (deterministic order): +/// +/// 1. (system prompt is provided separately, not as a `Message`) +/// 2. Optional structured-state user message (todos / plan / working set / +/// sub-agents) — labeled with `[CYCLE STATE]` so the assistant can tell +/// it apart from a real user turn. +/// 3. The model-curated `` briefing — labeled with `[CYCLE +/// BRIEFING]` so the assistant knows it was self-authored on the previous +/// cycle. +/// 4. Optional pending user message that hadn't been sent yet. +/// +/// The original system prompt is composed by the engine and stays separate +/// from this list — the engine sets `session.system_prompt` directly. +#[must_use] +pub fn build_seed_messages( + structured_state_block: Option<&str>, + briefing: Option<&CycleBriefing>, + pending_user_message: Option<&str>, +) -> Vec { + let mut out: Vec = Vec::new(); + + if let Some(state) = structured_state_block + && !state.trim().is_empty() + { + out.push(Message { + role: "user".to_string(), + content: vec![ContentBlock::Text { + text: format!( + "[CYCLE STATE — auto-preserved across the cycle boundary]\n\n{}", + state.trim() + ), + cache_control: None, + }], + }); + // A user message expects an assistant ack so the next real user + // message lands on a clean alternation. We synthesize a one-line ack. + out.push(Message { + role: "assistant".to_string(), + content: vec![ContentBlock::Text { + text: "Acknowledged. State carried into the new cycle.".to_string(), + cache_control: None, + }], + }); + } + + if let Some(brief) = briefing + && !brief.briefing_text.trim().is_empty() + { + out.push(Message { + role: "user".to_string(), + content: vec![ContentBlock::Text { + text: format!( + "[CYCLE BRIEFING — written by you on cycle {} at {}]\n\n\n{}\n", + brief.cycle, + brief.timestamp.to_rfc3339(), + brief.briefing_text.trim() + ), + cache_control: None, + }], + }); + out.push(Message { + role: "assistant".to_string(), + content: vec![ContentBlock::Text { + text: "Briefing absorbed. Continuing.".to_string(), + cache_control: None, + }], + }); + } + + if let Some(pending) = pending_user_message + && !pending.trim().is_empty() + { + out.push(Message { + role: "user".to_string(), + content: vec![ContentBlock::Text { + text: pending.trim().to_string(), + cache_control: None, + }], + }); + } + + out +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::models::{ContentBlock, Message}; + use std::path::PathBuf; + use tempfile::tempdir; + + fn user_msg(text: &str) -> Message { + Message { + role: "user".to_string(), + content: vec![ContentBlock::Text { + text: text.to_string(), + cache_control: None, + }], + } + } + + fn asst_msg(text: &str) -> Message { + Message { + role: "assistant".to_string(), + content: vec![ContentBlock::Text { + text: text.to_string(), + cache_control: None, + }], + } + } + + #[test] + fn cycle_config_default_includes_v4_overrides() { + let cfg = CycleConfig::default(); + assert!(cfg.enabled); + assert!(cfg.per_model.contains_key("deepseek-v4-pro")); + assert!(cfg.per_model.contains_key("deepseek-v4-flash")); + assert_eq!(cfg.threshold_tokens, DEFAULT_CYCLE_THRESHOLD_TOKENS); + assert_eq!(cfg.briefing_max_tokens, DEFAULT_BRIEFING_MAX_TOKENS); + } + + #[test] + fn threshold_for_falls_back_to_default() { + let cfg = CycleConfig::default(); + assert_eq!( + cfg.threshold_for("deepseek-v4-pro"), + DEFAULT_CYCLE_THRESHOLD_TOKENS + ); + assert_eq!( + cfg.threshold_for("unknown-model"), + DEFAULT_CYCLE_THRESHOLD_TOKENS + ); + } + + #[test] + fn threshold_for_uses_per_model_override() { + let mut cfg = CycleConfig::default(); + cfg.per_model.insert( + "deepseek-v4-pro".to_string(), + ModelCycleConfig { + threshold_tokens: 80_000, + briefing_max_tokens: 2_000, + }, + ); + assert_eq!(cfg.threshold_for("deepseek-v4-pro"), 80_000); + assert_eq!(cfg.briefing_max_for("deepseek-v4-pro"), 2_000); + } + + #[test] + fn should_advance_below_threshold_returns_false() { + let cfg = CycleConfig::default(); + assert!(!should_advance_cycle( + 50_000, + 0, + "deepseek-v4-pro", + &cfg, + false + )); + } + + #[test] + fn should_advance_at_threshold_returns_true() { + let cfg = CycleConfig::default(); + assert!(should_advance_cycle( + DEFAULT_CYCLE_THRESHOLD_TOKENS as u64, + 0, + "deepseek-v4-pro", + &cfg, + false + )); + } + + #[test] + fn should_advance_combines_input_and_output() { + let cfg = CycleConfig::default(); + // 60k + 60k = 120k > 110k threshold + assert!(should_advance_cycle( + 60_000, + 60_000, + "deepseek-v4-pro", + &cfg, + false + )); + } + + #[test] + fn in_flight_phase_guard_blocks_advance() { + let cfg = CycleConfig::default(); + assert!(!should_advance_cycle( + DEFAULT_CYCLE_THRESHOLD_TOKENS as u64 * 2, + 0, + "deepseek-v4-pro", + &cfg, + true, + )); + } + + #[test] + fn disabled_config_blocks_advance() { + let cfg = CycleConfig { + enabled: false, + ..Default::default() + }; + assert!(!should_advance_cycle( + DEFAULT_CYCLE_THRESHOLD_TOKENS as u64 * 2, + 0, + "deepseek-v4-pro", + &cfg, + false, + )); + } + + #[test] + fn extract_carry_forward_pulls_block() { + let raw = "Here is your handoff:\n\nDecision A: chose X because Y.\n\nDone."; + assert_eq!(extract_carry_forward(raw), "Decision A: chose X because Y."); + } + + #[test] + fn extract_carry_forward_handles_missing_close_tag() { + let raw = "\nDecision A: chose X."; + // Missing close tag → returns the tail, trimmed. + assert_eq!(extract_carry_forward(raw), "Decision A: chose X."); + } + + #[test] + fn extract_carry_forward_no_tags_returns_trimmed_body() { + let raw = " Decision A: chose X. "; + assert_eq!(extract_carry_forward(raw), "Decision A: chose X."); + } + + #[test] + fn extract_carry_forward_case_insensitive() { + let raw = "\nState here.\n"; + assert_eq!(extract_carry_forward(raw), "State here."); + } + + #[test] + fn enforce_briefing_cap_truncates_oversized_text() { + let max_tokens = 10; // 10 * 4 = 40 chars + let big = "x".repeat(200); + let bounded = enforce_briefing_cap(&big, max_tokens); + assert!(bounded.starts_with(&"x".repeat(40))); + assert!(bounded.contains("[...briefing truncated")); + } + + #[test] + fn enforce_briefing_cap_passes_short_text_through() { + let txt = "hello world"; + assert_eq!(enforce_briefing_cap(txt, 100), "hello world"); + } + + #[test] + fn build_seed_messages_empty_when_all_inputs_empty() { + let seeds = build_seed_messages(None, None, None); + assert!(seeds.is_empty()); + } + + #[test] + fn build_seed_messages_includes_state_briefing_and_pending() { + let briefing = CycleBriefing { + cycle: 1, + timestamp: Utc::now(), + briefing_text: "Decisions: chose A.".to_string(), + token_estimate: 5, + }; + + let seeds = build_seed_messages( + Some("## Cycle State\n- Mode: agent"), + Some(&briefing), + Some("Continue working on issue #124"), + ); + + // Expected layout: state user + ack assistant + briefing user + ack assistant + pending user. + assert_eq!(seeds.len(), 5); + assert_eq!(seeds[0].role, "user"); + assert_eq!(seeds[1].role, "assistant"); + assert_eq!(seeds[2].role, "user"); + assert_eq!(seeds[3].role, "assistant"); + assert_eq!(seeds[4].role, "user"); + + if let ContentBlock::Text { text, .. } = &seeds[0].content[0] { + assert!(text.contains("[CYCLE STATE")); + assert!(text.contains("agent")); + } else { + panic!("expected text block"); + } + if let ContentBlock::Text { text, .. } = &seeds[2].content[0] { + assert!(text.contains("[CYCLE BRIEFING")); + assert!(text.contains("")); + assert!(text.contains("Decisions: chose A.")); + } else { + panic!("expected text block"); + } + if let ContentBlock::Text { text, .. } = &seeds[4].content[0] { + assert_eq!(text, "Continue working on issue #124"); + } else { + panic!("expected text block"); + } + } + + #[test] + fn build_seed_messages_skips_blank_pending() { + let seeds = build_seed_messages(Some("## State"), None, Some(" ")); + // State block + ack — no pending message. + assert_eq!(seeds.len(), 2); + assert_eq!(seeds[0].role, "user"); + assert_eq!(seeds[1].role, "assistant"); + } + + #[test] + fn structured_state_to_system_block_renders_minimal() { + let state = StructuredState { + mode_label: "agent".to_string(), + workspace: PathBuf::from("/tmp/ws"), + cwd: None, + working_set_summary: None, + todo_snapshot: None, + plan_snapshot: None, + subagent_snapshots: Vec::new(), + }; + let block = state.to_system_block().expect("renders"); + assert!(block.contains("Mode: `agent`")); + assert!(block.contains("Workspace: `/tmp/ws`")); + } + + #[test] + fn archive_cycle_writes_jsonl_with_header_and_messages() { + let dir = tempdir().expect("tempdir"); + let session_id = format!("test-session-{}", uuid::Uuid::new_v4()); + + // Override HOME so archive_dir_for resolves into our tempdir. + // SAFETY: Test isolation only. Restore on drop via the tempdir + // guard. set_var is not concurrency-safe but cargo test runs each + // process serially per test binary unless --test-threads says + // otherwise; this single-threaded write is acceptable for our + // archive smoke test. + let original_home = std::env::var("HOME").ok(); + // SAFETY: see comment above — single-threaded test path. + unsafe { + std::env::set_var("HOME", dir.path()); + } + + let messages = vec![ + user_msg("hello"), + asst_msg("hi"), + user_msg("can you read Cargo.toml?"), + ]; + + let started = Utc::now(); + let path = archive_cycle(&session_id, 1, &messages, "deepseek-v4-pro", started) + .expect("archive_cycle should succeed"); + + assert!(path.exists(), "archive file should exist on disk"); + assert!(path.to_string_lossy().ends_with("/1.jsonl")); + + let contents = std::fs::read_to_string(&path).expect("read archive back"); + let mut lines = contents.lines(); + + let header_line = lines.next().expect("header line present"); + let header: CycleArchiveHeader = serde_json::from_str(header_line).expect("header parses"); + assert_eq!(header.cycle, 1); + assert_eq!(header.session_id, session_id); + assert_eq!(header.model, "deepseek-v4-pro"); + assert_eq!(header.message_count, 3); + assert_eq!(header.schema_version, CYCLE_ARCHIVE_SCHEMA_VERSION); + + for expected in &messages { + let line = lines.next().expect("message line present"); + let parsed: Message = serde_json::from_str(line).expect("message parses"); + assert_eq!(&parsed, expected); + } + assert!(lines.next().is_none(), "no extra trailing lines"); + + // Restore HOME so subsequent tests aren't surprised. + // SAFETY: see set_var comment above. + unsafe { + match original_home { + Some(value) => std::env::set_var("HOME", value), + None => std::env::remove_var("HOME"), + } + } + } + + #[test] + fn open_archive_rejects_newer_schema_version() { + let dir = tempdir().expect("tempdir"); + let path = dir.path().join("999.jsonl"); + let header = CycleArchiveHeader { + schema_version: CYCLE_ARCHIVE_SCHEMA_VERSION + 5, + cycle: 999, + session_id: "future-session".to_string(), + model: "deepseek-v9".to_string(), + started: Utc::now(), + ended: Utc::now(), + message_count: 0, + }; + let mut payload = serde_json::to_string(&header).unwrap(); + payload.push('\n'); + std::fs::write(&path, payload).unwrap(); + + let err = open_archive(&path).expect_err("must reject newer schema version"); + let msg = format!("{err:#}"); + assert!(msg.contains("newer than supported"), "got: {msg}"); + } + + /// Mock `produce_briefing`-style flow purely client-side: we feed a known + /// raw string through `extract_carry_forward` + `enforce_briefing_cap` + /// and assert the same result we'd produce after a real LLM call. + /// Avoids spinning up a live mock server while still proving the + /// extraction contract. + #[test] + fn briefing_extraction_pipeline_preserves_block() { + let raw = "thinking: ok\n\nDecision: pick lib A; constraint: no async.\n\n"; + let extracted = extract_carry_forward(raw); + let bounded = enforce_briefing_cap(&extracted, 50); + assert_eq!(bounded, "Decision: pick lib A; constraint: no async."); + } +} diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs index 605261c8..4e083795 100644 --- a/crates/tui/src/main.rs +++ b/crates/tui/src/main.rs @@ -20,6 +20,7 @@ mod commands; mod compaction; mod config; mod core; +mod cycle_manager; mod deepseek_theme; mod error_taxonomy; mod eval; @@ -2866,8 +2867,13 @@ async fn run_exec_agent( use crate::tools::todo::new_shared_todo_list; use crate::tui::app::AppMode; + // Compaction defaults to disabled in v0.6.6: the checkpoint-restart cycle + // architecture (issue #124) handles long-context resets via fresh contexts + // rather than progressive summarization. The compaction config is still + // wired through so users who explicitly opt back in via [compaction] + // enabled = true keep their old behavior. let compaction = CompactionConfig { - enabled: true, + enabled: false, model: model.to_string(), token_threshold: compaction_threshold_for_model(model), message_threshold: compaction_message_threshold_for_model(model), @@ -2885,6 +2891,7 @@ async fn run_exec_agent( max_subagents, features: config.features(), compaction, + cycle: crate::cycle_manager::CycleConfig::default(), capacity: crate::core::capacity::CapacityControllerConfig::from_app_config(config), todos: new_shared_todo_list(), plan_state: new_shared_plan_state(), diff --git a/crates/tui/src/prompts/cycle_handoff.md b/crates/tui/src/prompts/cycle_handoff.md new file mode 100644 index 00000000..c66c8ac3 --- /dev/null +++ b/crates/tui/src/prompts/cycle_handoff.md @@ -0,0 +1,76 @@ +# Cycle Handoff Briefing + +You are about to cross a context cycle boundary. The conversation so far has +crossed the per-cycle token budget, so this entire transcript is going to be +**archived to disk** and the next turn will start with a fresh context: the +original system prompt, structured state (todos, plan, working set, open +sub-agents), the user's pending message, and a free-form briefing that **you +write right now**. + +Your job, in this single message: produce a `` block of at most +**3,000 tokens** that captures the irreducible state the *next cycle's you* will +need to continue without redoing work. + +## What to put in `` + +Write concrete prose, not bullet-point summaries of the transcript. Cover: + +- **Decisions made and why.** The things you've chosen and the reasoning that + led there. Not "we discussed options" — name the choice and the constraint + that made it the right one. +- **Constraints discovered.** Concrete facts about the codebase, environment, + user preferences, or external systems that the next cycle will trip over if + it doesn't know them. (e.g. "the audit log is JSONL not JSON", "the user + insists on no `unwrap()` in non-test code", "macOS sandbox blocks raw + sockets in tools/exec.rs".) +- **Hypotheses being tested.** Open questions you're actively investigating, + what you're trying to falsify, what evidence would change your mind. +- **Approaches that failed.** Dead ends with enough detail that the next + cycle won't repeat them. Name the approach and the specific reason it + didn't work, not just "tried X, didn't work". +- **Open questions for the user.** Things you're blocked on that the next + cycle should ask about if the user doesn't volunteer them. + +## What NOT to put in `` + +- Tool output bytes. (They're already archived to disk.) +- File contents you read. (The next cycle can re-read them — pricier than a + briefing token, but cheaper than a wrong assumption built on a stale + paraphrase.) +- Step-by-step recap of what you did. The next cycle does not need to know + the order of operations; it needs to know the *current state*. +- Pleasantries, throat-clearing, framing language. Every token matters. + +## Format + +Open with `` on its own line. Close with `` on +its own line. No prose outside the tags. No nested tags. No code fences around +the block itself (you can use code fences inside if you need to quote a +specific snippet). + +The `recall_archive` tool is available in the next cycle. It searches the +archived transcripts (BM25 over message text, top-N hits) when your briefing +missed something the next cycle needs. Use it sparingly — frequent recalls +mean your briefing was too sparse, so refine your *next* briefing rather than +leaning on the archive. Don't try to be exhaustive here: be precise about the +load-bearing state and trust the archive for the rest. + +## Example shape (do not copy verbatim — write your own) + +``` + +Working on issue #124 (cycle-restart). Key decisions: (1) trigger at 110K +tokens not 128K — need ~8.5K headroom for the briefing turn itself plus +next-turn growth before the next boundary; (2) archive to JSONL with a +header line so future tools can stream-read without parsing the whole +file. Constraint discovered: DeepSeek V4 thinking-mode requires +reasoning_content replay on assistant messages with tool calls — so seed +messages can't include orphan tool calls from the archived cycle. The +approach of "summarize then keep recent messages" (the old compaction +path) was failing because the model couldn't tell which fragments were +verbatim vs. paraphrased; replacing it entirely. Open question for user: +do they want per-model briefing token caps, or one global cap? + +``` + +Now write your `` for this conversation. diff --git a/crates/tui/src/runtime_threads.rs b/crates/tui/src/runtime_threads.rs index 3b4dfb54..2d8aa206 100644 --- a/crates/tui/src/runtime_threads.rs +++ b/crates/tui/src/runtime_threads.rs @@ -35,7 +35,13 @@ use crate::tui::app::AppMode; const EVENT_CHANNEL_CAPACITY: usize = 1024; const MAX_ACTIVE_THREADS_DEFAULT: usize = 8; const SUMMARY_LIMIT: usize = 280; -const CURRENT_RUNTIME_SCHEMA_VERSION: u32 = 1; +/// Bumped to 2 for v0.6.6 — see issue #124. The persisted thread/turn/item +/// records didn't change shape, but the live engine semantics did: cycle +/// boundaries advance the `Session.cycle_count` and produce archived JSONL +/// files at `~/.deepseek/sessions//cycles/.jsonl`. A v1 reader on a +/// session written by v2 wouldn't know about the cycle archive directory and +/// might misinterpret message counts; bumping is the safe choice. +const CURRENT_RUNTIME_SCHEMA_VERSION: u32 = 2; const RUNTIME_RESTART_REASON: &str = "Interrupted by process restart"; const fn default_runtime_schema_version() -> u32 { @@ -1335,8 +1341,11 @@ impl RuntimeThreadManager { } } + // Compaction defaults to disabled in v0.6.6 — the cycle architecture + // (issue #124) handles long-context resets. Threads keep the + // legacy summarizer wired off unless an operator opts in via config. let compaction = CompactionConfig { - enabled: true, + enabled: false, model: thread.model.clone(), token_threshold: compaction_threshold_for_model(&thread.model), message_threshold: compaction_message_threshold_for_model(&thread.model), @@ -1353,6 +1362,7 @@ impl RuntimeThreadManager { max_subagents: self.config.max_subagents().clamp(1, MAX_SUBAGENTS), features: self.config.features(), compaction, + cycle: crate::cycle_manager::CycleConfig::default(), capacity: crate::core::capacity::CapacityControllerConfig::from_app_config( &self.config, ), @@ -1681,6 +1691,26 @@ impl RuntimeThreadManager { .await?; } } + EngineEvent::CycleAdvanced { from, to, briefing } => { + // Surface the cycle boundary in the runtime event timeline so + // background-task subscribers and replay see it. The actual + // archive write is the engine's responsibility (see + // `cycle_manager::archive_cycle`); this event is informational. + self.emit_event( + &thread_id, + Some(&turn_id), + None, + "cycle.advanced", + json!({ + "from": from, + "to": to, + "briefing_tokens": briefing.token_estimate, + "cycle": briefing.cycle, + "timestamp": briefing.timestamp, + }), + ) + .await?; + } EngineEvent::CoherenceState { state, label, @@ -2493,6 +2523,39 @@ mod tests { } } + #[test] + fn store_load_thread_rejects_newer_schema_version() { + let dir = test_runtime_dir(); + let store = RuntimeThreadStore::open(dir.clone()).expect("open store"); + + // Construct a thread record persisted with a future schema version. + let mut thread = sample_thread("thr_future"); + thread.schema_version = CURRENT_RUNTIME_SCHEMA_VERSION + 1; + + // Bypass save_thread (which would respect our local schema_version) + // by writing the JSON directly so we can simulate a future writer. + let path = store.threads_dir.join(format!("{}.json", thread.id)); + std::fs::create_dir_all(path.parent().unwrap()).expect("mkdirs"); + let payload = serde_json::to_string(&thread).expect("serialize thread"); + std::fs::write(&path, payload).expect("write thread"); + + let err = store + .load_thread(&thread.id) + .expect_err("load_thread must reject newer schema"); + let msg = format!("{err:#}"); + assert!(msg.contains("newer than supported"), "got: {msg}"); + + // Cleanup so we don't leak across tests. + let _ = std::fs::remove_dir_all(dir); + } + + #[test] + fn current_runtime_schema_version_is_two_on_v066() { + // Locks the bump in (issue #124). Bump deliberately when persisted + // shape changes. + assert_eq!(CURRENT_RUNTIME_SCHEMA_VERSION, 2); + } + #[test] fn enforce_lru_capacity_does_not_loop_when_all_threads_are_active() { let mut active = ActiveThreads::default(); diff --git a/crates/tui/src/tools/mod.rs b/crates/tui/src/tools/mod.rs index 93da2539..e080f0a5 100644 --- a/crates/tui/src/tools/mod.rs +++ b/crates/tui/src/tools/mod.rs @@ -13,6 +13,7 @@ pub mod git_history; pub mod parallel; pub mod plan; pub mod project; +pub mod recall_archive; pub mod registry; pub mod review; pub mod rlm; diff --git a/crates/tui/src/tools/recall_archive.rs b/crates/tui/src/tools/recall_archive.rs new file mode 100644 index 00000000..f413e54e --- /dev/null +++ b/crates/tui/src/tools/recall_archive.rs @@ -0,0 +1,711 @@ +//! `recall_archive` tool — search prior cycle archives (issue #127). +//! +//! Companion to the checkpoint-restart cycle architecture (#124). When the +//! agent's `` briefing missed something, this tool scans the +//! on-disk JSONL archives at `~/.deepseek/sessions//cycles/*.jsonl` and +//! returns the top-N matching messages. +//! +//! ## Scoring +//! +//! v1: a simplified BM25 over tokenized message text. No external embedding +//! model, no cache — every call walks the archives. Acceptable because the +//! per-cycle archive is bounded by the 110K cycle threshold and most sessions +//! cross at most a handful of cycles. v2 (later) can add an +//! `~/.deepseek/embeddings/` cache built on archive write. + +use std::collections::HashMap; +use std::fs::read_dir; +use std::path::PathBuf; + +use async_trait::async_trait; +use serde::Serialize; +use serde_json::{Value, json}; + +use super::spec::{ + ApprovalRequirement, ToolCapability, ToolContext, ToolError, ToolResult, ToolSpec, + optional_u64, required_str, +}; +use crate::cycle_manager::open_archive; +use crate::models::{ContentBlock, Message}; + +const DEFAULT_MAX_RESULTS: usize = 3; +const HARD_MAX_RESULTS: usize = 10; +const CONTEXT_WINDOW_CHARS: usize = 240; + +/// BM25 hyper-parameters. Standard defaults from the literature. +const K1: f64 = 1.5; +const B: f64 = 0.75; + +pub struct RecallArchiveTool; + +#[derive(Debug, Clone, Serialize)] +struct RecallHit { + cycle: u32, + /// 0-based message index within the cycle. + message_index: usize, + role: String, + score: f64, + /// Short window around the best match, with `…` markers when truncated. + excerpt: String, +} + +#[async_trait] +impl ToolSpec for RecallArchiveTool { + fn name(&self) -> &'static str { + "recall_archive" + } + + fn description(&self) -> &'static str { + "Search prior context cycles for content not in your briefing. Use sparingly — \ + frequent recalls mean your briefing was too sparse; refine your next briefing." + } + + fn input_schema(&self) -> Value { + json!({ + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "Search query. Tokenized and BM25-scored against archived messages." + }, + "cycle": { + "type": "integer", + "description": "Optional: limit to a specific prior cycle number." + }, + "max_results": { + "type": "integer", + "description": "Maximum hits to return (default 3, hard-capped at 10)." + } + }, + "required": ["query"] + }) + } + + fn capabilities(&self) -> Vec { + vec![ToolCapability::ReadOnly] + } + + fn approval_requirement(&self) -> ApprovalRequirement { + ApprovalRequirement::Auto + } + + async fn execute(&self, input: Value, context: &ToolContext) -> Result { + let query = required_str(&input, "query")?.trim().to_string(); + if query.is_empty() { + return Err(ToolError::invalid_input("query cannot be empty")); + } + + let max_results = (optional_u64(&input, "max_results", DEFAULT_MAX_RESULTS as u64) + as usize) + .clamp(1, HARD_MAX_RESULTS); + let cycle_filter = input.get("cycle").and_then(Value::as_u64).map(|n| n as u32); + + let session_id = context.state_namespace.as_str(); + let archives = list_archives(session_id).map_err(|err| { + ToolError::execution_failed(format!("Failed to enumerate cycle archives: {err}")) + })?; + + if archives.is_empty() { + return Ok(ToolResult::success(json!({ + "hits": [], + "note": "No prior cycle archives exist. The session has not crossed a cycle boundary yet." + }).to_string())); + } + + let documents = load_messages(&archives, cycle_filter).map_err(|err| { + ToolError::execution_failed(format!("Failed to read cycle archives: {err}")) + })?; + + if documents.is_empty() { + let note = match cycle_filter { + Some(c) => format!("Cycle {c} has no messages in its archive."), + None => "Cycle archives exist but contain no message text.".to_string(), + }; + return Ok(ToolResult::success( + json!({"hits": [], "note": note}).to_string(), + )); + } + + let query_tokens = tokenize(&query); + if query_tokens.is_empty() { + return Err(ToolError::invalid_input( + "query has no scoring tokens after tokenization", + )); + } + + let hits = score_bm25(&documents, &query_tokens, max_results); + + let payload = json!({ + "query": query, + "cycles_searched": archives.len(), + "messages_scanned": documents.len(), + "hits": hits, + }); + + Ok(ToolResult::success(payload.to_string())) + } +} + +/// One archived message + its provenance, ready to score. +struct ArchivedDoc { + cycle: u32, + message_index: usize, + role: String, + text: String, + tokens: Vec, +} + +fn archive_root(session_id: &str) -> Result { + let home = dirs::home_dir().ok_or_else(|| { + std::io::Error::new( + std::io::ErrorKind::NotFound, + "Could not resolve home directory for cycle archive root", + ) + })?; + Ok(home + .join(".deepseek") + .join("sessions") + .join(session_id) + .join("cycles")) +} + +/// Enumerate all archive files for a session, sorted by cycle number ascending. +fn list_archives(session_id: &str) -> Result, std::io::Error> { + let root = archive_root(session_id)?; + if !root.exists() { + return Ok(Vec::new()); + } + let mut archives: Vec<(u32, PathBuf)> = Vec::new(); + for entry in read_dir(&root)? { + let entry = entry?; + let path = entry.path(); + if path.extension().and_then(|s| s.to_str()) != Some("jsonl") { + continue; + } + let stem = match path.file_stem().and_then(|s| s.to_str()) { + Some(s) => s, + None => continue, + }; + let Ok(cycle_n) = stem.parse::() else { + continue; + }; + archives.push((cycle_n, path)); + } + archives.sort_by_key(|(n, _)| *n); + Ok(archives) +} + +/// Read messages from each archive into a flat scoreable list. +fn load_messages( + archives: &[(u32, PathBuf)], + cycle_filter: Option, +) -> Result, anyhow::Error> { + let mut docs: Vec = Vec::new(); + for (cycle_n, path) in archives { + if let Some(filter) = cycle_filter + && *cycle_n != filter + { + continue; + } + let (header, reader) = open_archive(path)?; + for (idx, message_result) in reader.enumerate() { + let message = message_result?; + let text = message_text(&message); + if text.trim().is_empty() { + continue; + } + let tokens = tokenize(&text); + if tokens.is_empty() { + continue; + } + docs.push(ArchivedDoc { + cycle: header.cycle, + message_index: idx, + role: message.role, + text, + tokens, + }); + } + } + Ok(docs) +} + +/// Concatenate all text-bearing content blocks of a message. +fn message_text(message: &Message) -> String { + let mut out = String::new(); + let mut push = |s: &str| { + if !out.is_empty() { + out.push('\n'); + } + out.push_str(s); + }; + for block in &message.content { + match block { + ContentBlock::Text { text, .. } => push(text), + ContentBlock::ToolUse { name, input, .. } => { + push(&format!("[tool_use {name}] {input}")); + } + ContentBlock::ToolResult { content, .. } => { + push(&format!("[tool_result] {content}")); + } + ContentBlock::Thinking { thinking } => { + push(&format!("[thinking] {thinking}")); + } + ContentBlock::ServerToolUse { name, input, .. } => { + push(&format!("[server_tool_use {name}] {input}")); + } + ContentBlock::ToolSearchToolResult { content, .. } => { + push(&format!("[tool_search_result] {content}")); + } + ContentBlock::CodeExecutionToolResult { content, .. } => { + push(&format!("[code_execution_result] {content}")); + } + } + } + out +} + +/// Lower-case, split on non-alphanumerics, drop short tokens. Same recipe as +/// most lightweight BM25 implementations. +fn tokenize(text: &str) -> Vec { + text.to_ascii_lowercase() + .split(|c: char| !c.is_alphanumeric()) + .filter(|s| s.len() >= 2) + .map(str::to_string) + .collect() +} + +/// Score documents against a query using BM25, return the top-N. +fn score_bm25(docs: &[ArchivedDoc], query_tokens: &[String], max_results: usize) -> Vec { + if docs.is_empty() || query_tokens.is_empty() { + return Vec::new(); + } + + let n = docs.len() as f64; + let avgdl: f64 = docs.iter().map(|d| d.tokens.len() as f64).sum::() / n.max(1.0); + + // Document frequency per query term. + let mut df: HashMap<&str, u64> = HashMap::new(); + for token in query_tokens { + let mut count = 0u64; + for doc in docs { + if doc.tokens.iter().any(|t| t == token) { + count += 1; + } + } + df.insert(token.as_str(), count); + } + + let mut scored: Vec<(f64, &ArchivedDoc)> = docs + .iter() + .map(|doc| (bm25_doc_score(doc, query_tokens, &df, n, avgdl), doc)) + .filter(|(score, _)| *score > 0.0) + .collect(); + + scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal)); + scored.truncate(max_results); + + scored + .into_iter() + .map(|(score, doc)| RecallHit { + cycle: doc.cycle, + message_index: doc.message_index, + role: doc.role.clone(), + score: round_score(score), + excerpt: best_window(&doc.text, query_tokens, CONTEXT_WINDOW_CHARS), + }) + .collect() +} + +fn bm25_doc_score( + doc: &ArchivedDoc, + query_tokens: &[String], + df: &HashMap<&str, u64>, + n: f64, + avgdl: f64, +) -> f64 { + let dl = doc.tokens.len() as f64; + if dl == 0.0 { + return 0.0; + } + let mut score = 0.0; + for token in query_tokens { + let tf = doc.tokens.iter().filter(|t| *t == token).count() as f64; + if tf == 0.0 { + continue; + } + let df_t = df.get(token.as_str()).copied().unwrap_or(0) as f64; + let idf = ((n - df_t + 0.5) / (df_t + 0.5) + 1.0).ln(); + let denom = tf + K1 * (1.0 - B + B * (dl / avgdl.max(1.0))); + score += idf * (tf * (K1 + 1.0)) / denom.max(f64::EPSILON); + } + score +} + +fn round_score(score: f64) -> f64 { + (score * 1000.0).round() / 1000.0 +} + +/// Find the substring of `text` of at most `window_chars` characters that +/// contains the densest cluster of query tokens. Returns it with `…` markers +/// when truncated. Falls back to a head-of-text excerpt when no tokens hit. +fn best_window(text: &str, query_tokens: &[String], window_chars: usize) -> String { + let lower = text.to_ascii_lowercase(); + let mut hit_positions: Vec = Vec::new(); + for token in query_tokens { + let mut start = 0usize; + while let Some(pos) = lower[start..].find(token.as_str()) { + hit_positions.push(start + pos); + start += pos + token.len(); + } + } + if hit_positions.is_empty() { + return head_excerpt(text, window_chars); + } + hit_positions.sort_unstable(); + + // Greedy: center the window on the first hit, walk forward as long as + // additional hits fit in the window. + let center = hit_positions[0]; + let half = window_chars / 2; + let start = center.saturating_sub(half); + let end = (start + window_chars).min(text.len()); + let start = align_char_boundary(text, start, false); + let end = align_char_boundary(text, end, true); + let prefix = if start > 0 { "…" } else { "" }; + let suffix = if end < text.len() { "…" } else { "" }; + format!("{prefix}{}{suffix}", &text[start..end]) +} + +fn head_excerpt(text: &str, max_chars: usize) -> String { + if text.len() <= max_chars { + return text.to_string(); + } + let cut = align_char_boundary(text, max_chars, true); + format!("{}…", &text[..cut]) +} + +/// Walk left or right until `idx` lands on a UTF-8 char boundary. +fn align_char_boundary(text: &str, mut idx: usize, walk_right: bool) -> usize { + if idx >= text.len() { + return text.len(); + } + while idx > 0 && idx < text.len() && !text.is_char_boundary(idx) { + if walk_right { + idx += 1; + } else { + idx -= 1; + } + } + idx +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::cycle_manager::archive_cycle; + use crate::models::{ContentBlock, Message}; + use chrono::Utc; + use tempfile::TempDir; + + fn user_msg(text: &str) -> Message { + Message { + role: "user".to_string(), + content: vec![ContentBlock::Text { + text: text.to_string(), + cache_control: None, + }], + } + } + + fn asst_msg(text: &str) -> Message { + Message { + role: "assistant".to_string(), + content: vec![ContentBlock::Text { + text: text.to_string(), + cache_control: None, + }], + } + } + + /// Serializes HOME-mutating tests since cargo runs tests in parallel by + /// default. Held for the full test (no `.await` while holding it). + static HOME_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(()); + + /// Guard that points HOME at a tempdir for the test's lifetime and + /// restores the original on drop. Holds `HOME_LOCK` to serialize. + struct HomeGuard { + _tmp: TempDir, + original: Option, + _lock: std::sync::MutexGuard<'static, ()>, + } + impl HomeGuard { + fn new() -> Self { + let lock = HOME_LOCK.lock().unwrap_or_else(|p| p.into_inner()); + let tmp = TempDir::new().expect("tempdir"); + let original = std::env::var("HOME").ok(); + // SAFETY: serialized by HOME_LOCK; only this thread mutates HOME + // for the duration of the guard. + unsafe { std::env::set_var("HOME", tmp.path()) }; + Self { + _tmp: tmp, + original, + _lock: lock, + } + } + } + impl Drop for HomeGuard { + fn drop(&mut self) { + // SAFETY: still holding HOME_LOCK. + unsafe { + match self.original.take() { + Some(v) => std::env::set_var("HOME", v), + None => std::env::remove_var("HOME"), + } + } + } + } + + fn fresh_session_id() -> String { + format!("test-{}", uuid::Uuid::new_v4()) + } + + fn ctx_for_session(workspace: &std::path::Path, session_id: &str) -> ToolContext { + ToolContext::new(workspace).with_state_namespace(session_id.to_string()) + } + + #[test] + fn tokenize_lowers_splits_drops_short() { + // Filter is `len >= 2`, so "a" and "0" drop; "42" stays. + let toks = tokenize("Hello, World! a 42 OAuth-2.0"); + assert_eq!(toks, vec!["hello", "world", "42", "oauth"]); + } + + #[test] + fn message_text_concatenates_blocks() { + let m = Message { + role: "user".to_string(), + content: vec![ + ContentBlock::Text { + text: "first".to_string(), + cache_control: None, + }, + ContentBlock::Text { + text: "second".to_string(), + cache_control: None, + }, + ], + }; + assert_eq!(message_text(&m), "first\nsecond"); + } + + #[test] + fn list_archives_handles_missing_dir() { + let _home = HomeGuard::new(); + let sid = fresh_session_id(); + let archives = list_archives(&sid).expect("list_archives"); + assert!(archives.is_empty()); + } + + #[test] + fn list_archives_sorts_by_cycle_number() { + let _home = HomeGuard::new(); + let sid = fresh_session_id(); + let now = Utc::now(); + archive_cycle(&sid, 3, &[user_msg("c3")], "deepseek-v4-pro", now).unwrap(); + archive_cycle(&sid, 1, &[user_msg("c1")], "deepseek-v4-pro", now).unwrap(); + archive_cycle(&sid, 2, &[user_msg("c2")], "deepseek-v4-pro", now).unwrap(); + let archives = list_archives(&sid).unwrap(); + let cycles: Vec = archives.iter().map(|(n, _)| *n).collect(); + assert_eq!(cycles, vec![1, 2, 3]); + } + + #[tokio::test] + async fn execute_returns_empty_when_no_archives() { + let _home = HomeGuard::new(); + let sid = fresh_session_id(); + let workspace = TempDir::new().unwrap(); + let ctx = ctx_for_session(workspace.path(), &sid); + let tool = RecallArchiveTool; + let result = tool + .execute(json!({"query": "anything"}), &ctx) + .await + .unwrap(); + assert!(result.content.contains("No prior cycle archives")); + } + + #[tokio::test] + async fn execute_finds_matching_messages() { + let _home = HomeGuard::new(); + let sid = fresh_session_id(); + let workspace = TempDir::new().unwrap(); + let ctx = ctx_for_session(workspace.path(), &sid); + let now = Utc::now(); + let messages = vec![ + user_msg("How does the cycle restart strategy work?"), + asst_msg("It archives messages to JSONL when crossing the 110K threshold."), + user_msg("What happens if briefing is too short?"), + asst_msg("Use recall_archive to retrieve specific past content from JSONL files."), + ]; + archive_cycle(&sid, 1, &messages, "deepseek-v4-pro", now).unwrap(); + + let tool = RecallArchiveTool; + let result = tool + .execute( + json!({"query": "JSONL archive briefing", "max_results": 3}), + &ctx, + ) + .await + .unwrap(); + assert!( + result.content.contains("\"cycle\":1"), + "got: {}", + result.content + ); + assert!( + result.content.contains("\"hits\""), + "got: {}", + result.content + ); + assert!(result.content.contains("JSONL"), "got: {}", result.content); + } + + #[tokio::test] + async fn execute_filters_by_cycle() { + let _home = HomeGuard::new(); + let sid = fresh_session_id(); + let workspace = TempDir::new().unwrap(); + let ctx = ctx_for_session(workspace.path(), &sid); + let now = Utc::now(); + archive_cycle( + &sid, + 1, + &[user_msg("alpha pattern")], + "deepseek-v4-pro", + now, + ) + .unwrap(); + archive_cycle( + &sid, + 2, + &[user_msg("alpha pattern")], + "deepseek-v4-pro", + now, + ) + .unwrap(); + + let tool = RecallArchiveTool; + let result = tool + .execute( + json!({"query": "alpha", "cycle": 2, "max_results": 5}), + &ctx, + ) + .await + .unwrap(); + assert!( + result.content.contains("\"cycle\":2"), + "got: {}", + result.content + ); + assert!( + !result.content.contains("\"cycle\":1"), + "got: {}", + result.content + ); + } + + #[tokio::test] + async fn execute_caps_max_results_at_hard_max() { + let _home = HomeGuard::new(); + let sid = fresh_session_id(); + let workspace = TempDir::new().unwrap(); + let ctx = ctx_for_session(workspace.path(), &sid); + let now = Utc::now(); + let mut messages: Vec = Vec::new(); + for i in 0..30 { + messages.push(user_msg(&format!("alpha message number {i}"))); + } + archive_cycle(&sid, 1, &messages, "deepseek-v4-pro", now).unwrap(); + + let tool = RecallArchiveTool; + let result = tool + .execute(json!({"query": "alpha", "max_results": 999}), &ctx) + .await + .unwrap(); + let count = result.content.matches("\"message_index\":").count(); + assert!(count <= HARD_MAX_RESULTS, "got {count} hits"); + } + + #[tokio::test] + async fn execute_rejects_empty_query() { + let _home = HomeGuard::new(); + let sid = fresh_session_id(); + let workspace = TempDir::new().unwrap(); + let ctx = ctx_for_session(workspace.path(), &sid); + let tool = RecallArchiveTool; + let err = tool + .execute(json!({"query": " "}), &ctx) + .await + .unwrap_err(); + assert!(matches!(err, ToolError::InvalidInput { .. })); + } + + #[test] + fn best_window_centers_on_first_hit() { + let text = "lorem ipsum dolor sit amet, the quick brown fox jumps over the lazy dog"; + let win = best_window(text, &["fox".to_string()], 30); + assert!(win.contains("fox"), "got: {win}"); + } + + #[test] + fn best_window_falls_back_to_head_when_no_hits() { + let text = "the quick brown fox jumps"; + let win = best_window(text, &["zzz".to_string()], 10); + assert!(win.starts_with("the quick"), "got: {win}"); + } + + #[test] + fn align_char_boundary_handles_multibyte() { + let text = "héllo world"; + // Index 2 is mid-byte for `é` (UTF-8 encoded as 2 bytes). + let aligned = align_char_boundary(text, 2, true); + assert!(text.is_char_boundary(aligned), "boundary check"); + } + + #[test] + fn bm25_returns_relevant_docs_drops_irrelevant() { + // BM25 length normalization can let very short matching docs outrank + // longer ones with higher term-frequency, so we only assert the + // weak invariant: matching docs are returned, non-matching docs are + // filtered out. + let docs = vec![ + ArchivedDoc { + cycle: 1, + message_index: 0, + role: "user".to_string(), + text: "cat dog cat dog cat".to_string(), + tokens: tokenize("cat dog cat dog cat"), + }, + ArchivedDoc { + cycle: 1, + message_index: 1, + role: "user".to_string(), + text: "fish bird".to_string(), + tokens: tokenize("fish bird"), + }, + ArchivedDoc { + cycle: 1, + message_index: 2, + role: "user".to_string(), + text: "cat sleeps".to_string(), + tokens: tokenize("cat sleeps"), + }, + ]; + let hits = score_bm25(&docs, &["cat".to_string()], 3); + let indices: Vec = hits.iter().map(|h| h.message_index).collect(); + assert!(indices.contains(&0), "doc 0 (3x cat) should appear"); + assert!(indices.contains(&2), "doc 2 (1x cat) should appear"); + assert!(!indices.contains(&1), "zero-score doc filtered"); + assert!(hits[0].score > 0.0, "top hit has positive score"); + } +} diff --git a/crates/tui/src/tools/registry.rs b/crates/tui/src/tools/registry.rs index eccb4331..d6427348 100644 --- a/crates/tui/src/tools/registry.rs +++ b/crates/tui/src/tools/registry.rs @@ -399,6 +399,14 @@ impl ToolRegistryBuilder { self.with_tool(Arc::new(ReviewTool::new(client, model))) } + /// Include the `recall_archive` tool — searches prior cycle archives + /// produced by the checkpoint-restart system (issue #127). + #[must_use] + pub fn with_recall_archive_tool(self) -> Self { + use super::recall_archive::RecallArchiveTool; + self.with_tool(Arc::new(RecallArchiveTool)) + } + /// Include note tool. #[must_use] pub fn with_note_tool(self) -> Self { @@ -486,6 +494,7 @@ impl ToolRegistryBuilder { .with_plan_tool(plan_state) .with_review_tool(client.clone(), model.clone()) .with_rlm_tool(client, model) + .with_recall_archive_tool() .with_subagent_tools(manager, runtime) } diff --git a/crates/tui/src/tui/app.rs b/crates/tui/src/tui/app.rs index 71e3577d..b86f0150 100644 --- a/crates/tui/src/tui/app.rs +++ b/crates/tui/src/tui/app.rs @@ -11,6 +11,7 @@ use thiserror::Error; use crate::compaction::CompactionConfig; use crate::config::{ApiProvider, Config, has_api_key, save_api_key}; use crate::core::coherence::CoherenceState; +use crate::cycle_manager::{CycleBriefing, CycleConfig}; use crate::hooks::{HookContext, HookEvent, HookExecutor, HookResult}; use crate::models::{ Message, SystemPrompt, compaction_message_threshold_for_model, @@ -593,6 +594,18 @@ pub struct App { /// Ctrl+C keeps its current "interrupt this turn" semantics in those /// states. See [`App::arm_quit`] / [`App::quit_is_armed`]. pub quit_armed_until: Option, + + /// Number of checkpoint-restart cycles crossed in this session + /// (issue #124). Mirrors `Session.cycle_count` on the engine side. + pub cycle_count: u32, + + /// Briefings produced at past cycle boundaries, in chronological order. + /// Used by `/cycles` and `/cycle ` slash commands. + pub cycle_briefings: Vec, + + /// Active cycle configuration (token threshold, briefing cap, per-model + /// overrides). Loaded from config and forwarded to the engine. + pub cycle: CycleConfig, } /// Message queued while the engine is busy. @@ -873,6 +886,9 @@ impl App { coherence_state: CoherenceState::default(), last_send_at: None, quit_armed_until: None, + cycle_count: 0, + cycle_briefings: Vec::new(), + cycle: CycleConfig::default(), } } @@ -1888,6 +1904,12 @@ impl App { ..Default::default() } } + + /// Forward the active cycle configuration to the engine. Cloned so the + /// engine has its own copy to mutate per-session. + pub fn cycle_config(&self) -> CycleConfig { + self.cycle.clone() + } } pub fn media_attachment_reference(kind: &str, path: &Path, description: Option<&str>) -> String { diff --git a/crates/tui/src/tui/sidebar.rs b/crates/tui/src/tui/sidebar.rs index 8cd65320..0b721065 100644 --- a/crates/tui/src/tui/sidebar.rs +++ b/crates/tui/src/tui/sidebar.rs @@ -61,6 +61,19 @@ fn render_sidebar_plan(f: &mut Frame, area: Rect, app: &App) { let content_width = area.width.saturating_sub(4) as usize; let mut lines: Vec> = Vec::with_capacity(usize::from(area.height).max(4)); + // Cycle indicator (issue #124). Only shown once a boundary has fired — + // first-time users with cycle_count == 0 don't need this row of chrome. + if app.cycle_count > 0 { + lines.push(Line::from(Span::styled( + format!( + "cycles: {} (active: {})", + app.cycle_count, + app.cycle_count.saturating_add(1) + ), + Style::default().fg(theme.plan_summary_color), + ))); + } + match app.plan_state.try_lock() { Ok(plan) => { if plan.is_empty() { diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs index aaf9cc1a..8c2590ff 100644 --- a/crates/tui/src/tui/ui.rs +++ b/crates/tui/src/tui/ui.rs @@ -327,6 +327,7 @@ fn build_engine_config(app: &App, config: &Config) -> EngineConfig { max_subagents: app.max_subagents, features: config.features(), compaction: app.compaction_config(), + cycle: app.cycle_config(), capacity: crate::core::capacity::CapacityControllerConfig::from_app_config(config), todos: app.todos.clone(), plan_state: app.plan_state.clone(), @@ -682,6 +683,21 @@ async fn run_event_loop( app.is_compacting = false; app.status_message = Some(message); } + EngineEvent::CycleAdvanced { from, to, briefing } => { + // Mirror the engine-side counter on the UI app state + // so the sidebar / slash commands stay in sync, and + // record the briefing so `/cycle ` can show it. + app.cycle_count = to; + let briefing_tokens = briefing.token_estimate; + app.cycle_briefings.push(briefing); + let separator = format!( + "─── cycle {from} → {to} (briefing: {briefing_tokens} tokens) ───" + ); + app.add_message(HistoryCell::System { content: separator }); + app.status_message = Some(format!( + "↻ context refreshed (cycle {from} → {to}, briefing: {briefing_tokens} tokens carried)" + )); + } EngineEvent::CoherenceState { state, .. } => { app.coherence_state = state; }