diff --git a/crates/tui/src/commands/cycle.rs b/crates/tui/src/commands/cycle.rs
new file mode 100644
index 00000000..c965d8eb
--- /dev/null
+++ b/crates/tui/src/commands/cycle.rs
@@ -0,0 +1,222 @@
+//! Cycle commands: `/cycles` (list past cycle boundaries) and
+//! `/cycle <n>` (show one cycle's briefing in detail).
+
+use std::fmt::Write;
+
+use crate::tui::app::App;
+
+use super::CommandResult;
+
+/// `/cycles` — list past cycle handoffs in compact form.
+pub fn list_cycles(app: &App) -> CommandResult {
+    if app.cycle_briefings.is_empty() {
+        let msg = format!(
+            "No cycle boundaries have fired yet (current cycle: 1, threshold: {} tokens for {}).",
+            app.cycle.threshold_for(&app.model),
+            app.model
+        );
+        return CommandResult::message(msg);
+    }
+
+    let mut out = String::new();
+    let _ = writeln!(
+        out,
+        "Cycle handoffs in this session ({} total). Active cycle: {}.",
+        app.cycle_briefings.len(),
+        app.cycle_count.saturating_add(1),
+    );
+    out.push('\n');
+    for brief in &app.cycle_briefings {
+        let preview = first_line(&brief.briefing_text, 80);
+        let _ = writeln!(
+            out,
+            "  cycle {n}  @ {ts}  briefing: {tokens} tokens  ─ {preview}",
+            n = brief.cycle,
+            ts = brief.timestamp.to_rfc3339(),
+            tokens = brief.token_estimate,
+            preview = preview,
+        );
+    }
+    out.push('\n');
+    out.push_str("Use `/cycle <n>` to show the full briefing for a specific cycle.\n");
+    CommandResult::message(out)
+}
+
+/// `/cycle <n>` — print the full briefing for cycle `n`.
+pub fn show_cycle(app: &App, arg: Option<&str>) -> CommandResult {
+    let Some(raw) = arg.map(str::trim) else {
+        return CommandResult::error(
+            "Usage: /cycle <n>  — n is the cycle number from /cycles".to_string(),
+        );
+    };
+    if raw.is_empty() {
+        return CommandResult::error("Usage: /cycle <n>".to_string());
+    }
+    let Ok(n) = raw.parse::<u32>() else {
+        return CommandResult::error(format!(
+            "Cycle number must be a positive integer (got '{raw}')."
+        ));
+    };
+
+    let Some(brief) = app.cycle_briefings.iter().find(|b| b.cycle == n) else {
+        let known: Vec<String> = app
+            .cycle_briefings
+            .iter()
+            .map(|b| b.cycle.to_string())
+            .collect();
+        let known_str = if known.is_empty() {
+            "(none)".to_string()
+        } else {
+            known.join(", ")
+        };
+        return CommandResult::error(format!(
+            "Cycle {n} not found in this session. Known cycles: {known_str}."
+        ));
+    };
+
+    let mut out = String::new();
+    let _ = writeln!(
+        out,
+        "── Cycle {n}  ({ts})  briefing: {tokens} tokens ──",
+        n = brief.cycle,
+        ts = brief.timestamp.to_rfc3339(),
+        tokens = brief.token_estimate,
+    );
+    out.push('\n');
+    out.push_str(brief.briefing_text.trim());
+    out.push('\n');
+    CommandResult::message(out)
+}
+
+/// `/recall <query>` — user-initiated BM25 search of cycle archives.
+///
+/// Synchronous wrapper around `tools::recall_archive::RecallArchiveTool` so
+/// users can probe the archive without invoking the model. Output is the
+/// same JSON payload the agent would see; the assistant pretty-prints
+/// short results and dumps long ones inline.
+pub fn recall_archive(app: &App, arg: Option<&str>) -> CommandResult {
+    use crate::tools::recall_archive::RecallArchiveTool;
+    use crate::tools::spec::{ToolContext, ToolSpec};
+
+    let Some(raw) = arg.map(str::trim) else {
+        return CommandResult::error("Usage: /recall <query>".to_string());
+    };
+    if raw.is_empty() {
+        return CommandResult::error("Usage: /recall <query>".to_string());
+    }
+
+    let session_id = app
+        .current_session_id
+        .clone()
+        .unwrap_or_else(|| "workspace".to_string());
+
+    let context = ToolContext::new(app.workspace.clone()).with_state_namespace(session_id);
+    let tool = RecallArchiveTool;
+    let input = serde_json::json!({"query": raw});
+
+    let result = tokio::task::block_in_place(|| {
+        tokio::runtime::Handle::current().block_on(tool.execute(input, &context))
+    });
+
+    match result {
+        Ok(res) => CommandResult::message(res.content),
+        Err(err) => CommandResult::error(format!("recall_archive failed: {err}")),
+    }
+}
+
+/// Truncate `text` to its first non-empty line, capped at `max_chars`.
+fn first_line(text: &str, max_chars: usize) -> String {
+    let line = text
+        .lines()
+        .map(str::trim)
+        .find(|l| !l.is_empty())
+        .unwrap_or("");
+    if line.chars().count() <= max_chars {
+        line.to_string()
+    } else {
+        let prefix: String = line.chars().take(max_chars).collect();
+        format!("{prefix}…")
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::cycle_manager::CycleBriefing;
+    use crate::tui::app::{App, TuiOptions};
+    use chrono::Utc;
+    use std::path::PathBuf;
+
+    fn test_options() -> TuiOptions {
+        TuiOptions {
+            model: "deepseek-v4-pro".to_string(),
+            workspace: PathBuf::from("."),
+            allow_shell: false,
+            use_alt_screen: true,
+            use_mouse_capture: false,
+            use_bracketed_paste: true,
+            max_subagents: 1,
+            skills_dir: PathBuf::from("."),
+            memory_path: PathBuf::from("memory.md"),
+            notes_path: PathBuf::from("notes.txt"),
+            mcp_config_path: PathBuf::from("mcp.json"),
+            use_memory: false,
+            start_in_agent_mode: false,
+            skip_onboarding: true,
+            yolo: false,
+            resume_session_id: None,
+        }
+    }
+
+    #[test]
+    fn list_cycles_reports_no_boundaries_yet() {
+        let app = App::new(test_options(), &crate::config::Config::default());
+        let res = list_cycles(&app);
+        assert!(res.message.is_some());
+        assert!(
+            res.message
+                .as_deref()
+                .unwrap()
+                .contains("No cycle boundaries")
+        );
+    }
+
+    #[test]
+    fn show_cycle_rejects_nonexistent_cycle() {
+        let app = App::new(test_options(), &crate::config::Config::default());
+        let res = show_cycle(&app, Some("3"));
+        let msg = res.message.expect("error message");
+        assert!(msg.contains("Cycle 3 not found"), "got: {msg}");
+    }
+
+    #[test]
+    fn list_and_show_cycles_render_briefings() {
+        let mut app = App::new(test_options(), &crate::config::Config::default());
+        app.cycle_briefings.push(CycleBriefing {
+            cycle: 1,
+            timestamp: Utc::now(),
+            briefing_text: "Decision: chose A; constraint: no async.".to_string(),
+            token_estimate: 12,
+        });
+        app.cycle_count = 1;
+
+        let listed = list_cycles(&app).message.expect("list message");
+        assert!(listed.contains("cycle 1"));
+        assert!(listed.contains("12 tokens"));
+
+        let shown = show_cycle(&app, Some("1")).message.expect("show message");
+        assert!(shown.contains("Decision: chose A"));
+    }
+
+    #[test]
+    fn show_cycle_validates_argument() {
+        let app = App::new(test_options(), &crate::config::Config::default());
+        let res = show_cycle(&app, None);
+        let msg = res.message.expect("error message");
+        assert!(msg.contains("Usage: /cycle"));
+
+        let res = show_cycle(&app, Some("not-a-number"));
+        let msg = res.message.expect("error message");
+        assert!(msg.contains("must be a positive integer"));
+    }
+}
diff --git a/crates/tui/src/commands/mod.rs b/crates/tui/src/commands/mod.rs
index f60786b9..fbf7f98c 100644
--- a/crates/tui/src/commands/mod.rs
+++ b/crates/tui/src/commands/mod.rs
@@ -6,6 +6,7 @@
 mod attachment;
 mod config;
 mod core;
+mod cycle;
 mod debug;
 mod init;
 mod note;
@@ -204,9 +205,27 @@ pub const COMMANDS: &[CommandInfo] = &[
     CommandInfo {
         name: "compact",
         aliases: &[],
-        description: "Trigger context compaction to free up space",
+        description: "Trigger context compaction to free up space (legacy; v0.6.6 prefers cycle restart)",
         usage: "/compact",
     },
+    CommandInfo {
+        name: "cycles",
+        aliases: &[],
+        description: "List checkpoint-restart cycle handoffs in this session",
+        usage: "/cycles",
+    },
+    CommandInfo {
+        name: "cycle",
+        aliases: &[],
+        description: "Show the carry-forward briefing for a specific cycle",
+        usage: "/cycle <n>",
+    },
+    CommandInfo {
+        name: "recall",
+        aliases: &[],
+        description: "Search prior cycle archives (BM25 over message text)",
+        usage: "/recall <query>",
+    },
     CommandInfo {
         name: "export",
         aliases: &[],
@@ -357,6 +376,9 @@ pub fn execute(cmd: &str, app: &mut App) -> CommandResult {
         "sessions" | "resume" => session::sessions(app),
         "load" => session::load(app, arg),
         "compact" => session::compact(app),
+        "cycles" => cycle::list_cycles(app),
+        "cycle" => cycle::show_cycle(app, arg),
+        "recall" => cycle::recall_archive(app, arg),
         "export" => session::export(app, arg),
 
         // Config commands
diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs
index ae68e81b..49d84a4a 100644
--- a/crates/tui/src/core/engine.rs
+++ b/crates/tui/src/core/engine.rs
@@ -25,6 +25,10 @@ use crate::compaction::{
     CompactionConfig, compact_messages_safe, estimate_tokens, merge_system_prompts, should_compact,
 };
 use crate::config::{Config, DEFAULT_MAX_SUBAGENTS, DEFAULT_TEXT_MODEL};
+use crate::cycle_manager::{
+    CycleBriefing, CycleConfig, StructuredState, archive_cycle, build_seed_messages,
+    estimate_briefing_tokens, produce_briefing, should_advance_cycle,
+};
 use crate::features::{Feature, Features};
 use crate::llm_client::LlmClient;
 use crate::mcp::McpPool;
@@ -83,7 +87,15 @@ pub struct EngineConfig {
     /// Feature flags controlling tool availability.
     pub features: Features,
     /// Auto-compaction settings for long conversations.
+    ///
+    /// As of v0.6.6 the high-level summarization compaction (`compact_messages_safe`)
+    /// is **disabled by default**; the checkpoint-restart cycle architecture
+    /// (`cycle_manager`) replaces it. The compaction config is still wired through
+    /// for the per-tool-result truncation path (`compact_tool_result_for_context`)
+    /// and for users who explicitly opt back in via `[compaction] enabled = true`.
     pub compaction: CompactionConfig,
+    /// Checkpoint-restart cycle settings (issue #124).
+    pub cycle: CycleConfig,
     /// Capacity-controller settings.
     pub capacity: CapacityControllerConfig,
     /// Shared Todo list state.
@@ -109,6 +121,7 @@ impl Default for EngineConfig {
             max_subagents: DEFAULT_MAX_SUBAGENTS,
             features: Features::with_defaults(),
             compaction: CompactionConfig::default(),
+            cycle: CycleConfig::default(),
             capacity: CapacityControllerConfig::default(),
             todos: new_shared_todo_list(),
             plan_state: new_shared_plan_state(),
@@ -431,6 +444,7 @@ fn should_default_defer_tool(name: &str, mode: AppMode) -> bool {
             | "file_search"
             | "diagnostics"
             | "rlm"
+            | "recall_archive"
             | MULTI_TOOL_PARALLEL_NAME
             | "update_plan"
             | "todo_write"
@@ -1574,6 +1588,16 @@ impl Engine {
                 error,
             })
             .await;
+
+        // Checkpoint-restart cycle boundary (issue #124). The turn just
+        // settled cleanly — no in-flight tools, no streaming, no pending
+        // approval — so this is the safe phase to swap the context if we've
+        // crossed the per-cycle token threshold. We only fire on a
+        // Completed turn; Failed/Interrupted turns leave the buffer alone
+        // so the user can retry without a forced reset.
+        if matches!(status, TurnOutcomeStatus::Completed) {
+            self.maybe_advance_cycle(mode).await;
+        }
     }
 
     async fn handle_manual_compaction(&mut self) {
@@ -4491,6 +4515,154 @@ impl Engine {
         self.merge_compaction_summary(Some(prompt));
     }
 
+    /// Run the checkpoint-restart cycle boundary if the session has crossed
+    /// its token threshold (issue #124). No-op in the common case.
+    ///
+    /// Caller must invoke this only at a clean turn boundary (no in-flight
+    /// tool, no open stream, no pending approval modal). The phase guard
+    /// inside `should_advance_cycle` is a defence-in-depth check; the
+    /// engine's wider state machine is the primary enforcement layer.
+    ///
+    /// Sub-agents are intentionally NOT awaited: each sub-agent has its own
+    /// context, the parent's reset doesn't invalidate them. Their handles
+    /// are captured in the structured-state block so the next cycle can see
+    /// they're still running.
+    async fn maybe_advance_cycle(&mut self, mode: AppMode) {
+        if !should_advance_cycle(
+            self.session.total_usage.input_tokens,
+            self.session.total_usage.output_tokens,
+            &self.session.model,
+            &self.config.cycle,
+            false,
+        ) {
+            return;
+        }
+
+        let Some(client) = self.deepseek_client.clone() else {
+            crate::logging::warn(
+                "Cycle boundary skipped: API client not configured for briefing turn",
+            );
+            return;
+        };
+
+        let from = self.session.cycle_count;
+        let to = from.saturating_add(1);
+        let archive_started = self.session.current_cycle_started;
+        let max_briefing_tokens = self.config.cycle.briefing_max_for(&self.session.model);
+
+        let _ = self
+            .tx_event
+            .send(Event::status(format!(
+                "↻ context refreshing (cycle {from} → {to}, generating briefing…)"
+            )))
+            .await;
+
+        // 1. Generate the model-curated briefing. We do this *before*
+        //    archiving so a briefing-call failure leaves the cycle intact —
+        //    the user can keep working at higher token counts until the next
+        //    boundary check, rather than losing their context to a failed
+        //    handoff.
+        let briefing_text = match produce_briefing(
+            &client,
+            &self.session.model,
+            &self.session.messages,
+            max_briefing_tokens,
+        )
+        .await
+        {
+            Ok(text) => text,
+            Err(err) => {
+                crate::logging::warn(format!(
+                    "Cycle briefing turn failed; skipping cycle advance: {err}"
+                ));
+                let _ = self
+                    .tx_event
+                    .send(Event::status(format!(
+                        "↻ cycle handoff failed (continuing in cycle {from}): {err}"
+                    )))
+                    .await;
+                return;
+            }
+        };
+
+        let briefing_tokens = estimate_briefing_tokens(&briefing_text);
+        let now = chrono::Utc::now();
+        let briefing = CycleBriefing {
+            cycle: to,
+            timestamp: now,
+            briefing_text: briefing_text.clone(),
+            token_estimate: briefing_tokens,
+        };
+
+        // 2. Archive the cycle to disk. If the archive write fails we still
+        //    proceed with the swap — the briefing alone preserves enough
+        //    state to continue, and the user can recover the lost archive
+        //    from their session log if needed.
+        match archive_cycle(
+            &self.session.id,
+            to,
+            &self.session.messages,
+            &self.session.model,
+            archive_started,
+        ) {
+            Ok(path) => {
+                crate::logging::info(format!("Cycle {to} archived to {}", path.display()));
+            }
+            Err(err) => {
+                crate::logging::warn(format!(
+                    "Failed to archive cycle {to}; continuing with swap: {err}"
+                ));
+            }
+        }
+
+        // 3. Capture structured state. Locks are held only for the snapshot.
+        let state = StructuredState::capture(
+            mode.label(),
+            self.config.workspace.clone(),
+            std::env::current_dir().ok(),
+            &self.session.working_set,
+            &self.config.todos,
+            &self.config.plan_state,
+            Some(&self.subagent_manager),
+        )
+        .await;
+        let state_block = state.to_system_block();
+
+        // 4. Build the seed messages. The next cycle starts with the
+        //    base system prompt (refreshed below) and these seeds.
+        let seed_messages = build_seed_messages(
+            state_block.as_deref(),
+            Some(&briefing),
+            None, // pending_user_message — pulled from steer/queue elsewhere
+        );
+
+        // 5. Atomic swap.
+        self.session.messages = seed_messages;
+        self.session.cycle_count = to;
+        self.session.current_cycle_started = now;
+        self.session.cycle_briefings.push(briefing.clone());
+        // Drop any compaction summary — that path is incompatible with the
+        // fresh-context model and would Frankenstein-merge with the briefing.
+        self.session.compaction_summary_prompt = None;
+        self.refresh_system_prompt(mode);
+        self.emit_session_updated().await;
+
+        let _ = self
+            .tx_event
+            .send(Event::CycleAdvanced {
+                from,
+                to,
+                briefing: briefing.clone(),
+            })
+            .await;
+        let _ = self
+            .tx_event
+            .send(Event::status(format!(
+                "↻ context refreshed (cycle {from} → {to}, briefing: {briefing_tokens} tokens carried)"
+            )))
+            .await;
+    }
+
     /// Refresh the system prompt based on current mode and context.
     fn refresh_system_prompt(&mut self, mode: AppMode) {
         let working_set_summary = self
diff --git a/crates/tui/src/core/events.rs b/crates/tui/src/core/events.rs
index a2474868..b5be4034 100644
--- a/crates/tui/src/core/events.rs
+++ b/crates/tui/src/core/events.rs
@@ -121,6 +121,17 @@ pub enum Event {
         message: String,
     },
 
+    /// Checkpoint-restart cycle boundary advanced (issue #124). The previous
+    /// cycle has already been archived to disk; the engine has swapped its
+    /// in-memory message buffer for the seed messages of cycle `to`.
+    /// Carries the full briefing record so the UI can populate
+    /// `app.cycle_briefings` for `/cycle <n>`.
+    CycleAdvanced {
+        from: u32,
+        to: u32,
+        briefing: crate::cycle_manager::CycleBriefing,
+    },
+
     /// Capacity decision telemetry.
     #[allow(dead_code)]
     CapacityDecision {
diff --git a/crates/tui/src/core/session.rs b/crates/tui/src/core/session.rs
index 37629848..94791c40 100644
--- a/crates/tui/src/core/session.rs
+++ b/crates/tui/src/core/session.rs
@@ -2,9 +2,11 @@
 //!
 //! Tracks conversation history, token usage, and session metadata.
 
+use crate::cycle_manager::CycleBriefing;
 use crate::models::{Message, SystemPrompt, Usage};
 use crate::project_context::{ProjectContext, load_project_context_with_parents};
 use crate::working_set::WorkingSet;
+use chrono::{DateTime, Utc};
 use std::path::PathBuf;
 
 /// Session state for the engine.
@@ -55,6 +57,19 @@ pub struct Session {
 
     /// Repo-aware working set for context management.
     pub working_set: WorkingSet,
+
+    /// Number of cycle boundaries crossed in this session (issue #124). The
+    /// active cycle index is `cycle_count + 1` (cycles are 1-based for users).
+    pub cycle_count: u32,
+
+    /// UTC start time of the *current* cycle. Updated when the engine resets
+    /// the conversation buffer. Used by archive headers and the `/cycles`
+    /// command's display.
+    pub current_cycle_started: DateTime<Utc>,
+
+    /// Briefings produced at past cycle boundaries, in chronological order.
+    /// Bounded growth: one entry per cycle, briefing capped at ~3,000 tokens.
+    pub cycle_briefings: Vec<CycleBriefing>,
 }
 
 /// Cumulative usage statistics for a session.
@@ -117,6 +132,9 @@ impl Session {
                 None
             },
             working_set: WorkingSet::default(),
+            cycle_count: 0,
+            current_cycle_started: Utc::now(),
+            cycle_briefings: Vec::new(),
         }
     }
 
diff --git a/crates/tui/src/cycle_manager.rs b/crates/tui/src/cycle_manager.rs
new file mode 100644
index 00000000..c26693d6
--- /dev/null
+++ b/crates/tui/src/cycle_manager.rs
@@ -0,0 +1,1004 @@
+//! Checkpoint-restart cycle management for long-running sessions (issue #124).
+//!
+//! ## Why
+//!
+//! DeepSeek V4's empirical retrieval elbow is 128K tokens (paper Figure 9 —
+//! 8K/0.90, 64K/0.87, 128K/0.85, 256K/0.76, 512K/0.66, 1M/0.59). Lossy
+//! summarization compaction creates a "Frankenstein" context — half verbatim,
+//! half paraphrased — that the model cannot tell apart, so it treats the
+//! summary as if it were verbatim and confabulates around the gaps.
+//!
+//! Checkpoint-restart fixes this by giving every cycle a *homogeneous* fresh
+//! context: original system prompt, structured state (todos / plan / working
+//! set / sub-agent handles), and a model-curated free-form briefing of at
+//! most ~3,000 tokens. The previous cycle is archived to disk in JSONL form
+//! so a future `recall_archive` tool (issue #127) can search it on demand.
+//!
+//! ## Layers of carry-forward
+//!
+//! 1. **Auto-preserved** (deterministic, no agent judgment): the original
+//!    system prompt, `SharedTodoList`, `SharedPlanState`, working-set paths,
+//!    open sub-agent snapshots, mode / workspace / cwd, and the user's most
+//!    recent unsent message.
+//! 2. **Free-form briefing** (model-curated, wrapped as `<carry_forward>`):
+//!    decisions made + why, constraints discovered, hypotheses being tested,
+//!    approaches that failed, open questions. Tool output bytes, file
+//!    contents, and step-by-step recaps explicitly do NOT belong here —
+//!    they're either in the archive or recoverable from disk.
+//!
+//! ## Trigger
+//!
+//! - Token threshold: **110K** by default (leaves ~8.5K headroom for the
+//!   briefing turn plus next-turn growth before crossing the 128K elbow).
+//! - Phase guard: callers only invoke `should_advance_cycle` at clean turn
+//!   boundaries (no in-flight tool, no streaming, no approval modal).
+//! - Per-model overrides: `[cycle.per_model]` in config.toml lets operators
+//!   tune the threshold separately for `deepseek-v4-pro` vs. `-flash` if
+//!   their workloads have different briefing costs.
+
+use std::collections::HashMap;
+use std::fs::{File, OpenOptions};
+use std::io::Write;
+use std::path::{Path, PathBuf};
+
+use anyhow::{Context, Result};
+use chrono::{DateTime, Utc};
+use serde::{Deserialize, Serialize};
+
+use crate::client::DeepSeekClient;
+use crate::llm_client::LlmClient;
+use crate::models::{ContentBlock, Message, MessageRequest, SystemBlock, SystemPrompt};
+use crate::tools::plan::{PlanSnapshot, SharedPlanState};
+use crate::tools::subagent::{SharedSubAgentManager, SubAgentResult, SubAgentStatus};
+use crate::tools::todo::{SharedTodoList, TodoListSnapshot};
+use crate::working_set::WorkingSet;
+
+/// JSONL header record emitted as the first line of an archived cycle file.
+const CYCLE_ARCHIVE_SCHEMA_VERSION: u32 = 1;
+
+/// Default token threshold at which a cycle boundary fires. Set below the V4
+/// 128K retrieval elbow to leave room for the briefing turn (≤3K tokens) plus
+/// the next user turn before the next boundary.
+pub const DEFAULT_CYCLE_THRESHOLD_TOKENS: usize = 110_000;
+
+/// Default cap on the model-curated briefing block.
+pub const DEFAULT_BRIEFING_MAX_TOKENS: usize = 3_000;
+
+/// Conservative chars-per-token used to bound the briefing length to the
+/// configured token cap. Matches `compaction::estimate_tokens` (~4 chars/token).
+const APPROX_CHARS_PER_TOKEN: usize = 4;
+
+/// Per-model cycle tuning. Loaded from `[cycle.per_model.<model>]`.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub struct ModelCycleConfig {
+    /// Token threshold above which a cycle boundary fires.
+    pub threshold_tokens: usize,
+    /// Cap on the model-curated `<carry_forward>` briefing.
+    pub briefing_max_tokens: usize,
+}
+
+impl Default for ModelCycleConfig {
+    fn default() -> Self {
+        Self {
+            threshold_tokens: DEFAULT_CYCLE_THRESHOLD_TOKENS,
+            briefing_max_tokens: DEFAULT_BRIEFING_MAX_TOKENS,
+        }
+    }
+}
+
+/// Top-level cycle configuration.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub struct CycleConfig {
+    /// Whether checkpoint-restart cycles are enabled. Defaults to true.
+    pub enabled: bool,
+    /// Default token threshold; per-model overrides take precedence when present.
+    pub threshold_tokens: usize,
+    /// Default briefing cap; per-model overrides take precedence when present.
+    pub briefing_max_tokens: usize,
+    /// Per-model overrides keyed by model identifier (e.g. `deepseek-v4-pro`).
+    pub per_model: HashMap<String, ModelCycleConfig>,
+}
+
+impl Default for CycleConfig {
+    fn default() -> Self {
+        let mut per_model: HashMap<String, ModelCycleConfig> = HashMap::new();
+        per_model.insert("deepseek-v4-pro".to_string(), ModelCycleConfig::default());
+        per_model.insert("deepseek-v4-flash".to_string(), ModelCycleConfig::default());
+        Self {
+            enabled: true,
+            threshold_tokens: DEFAULT_CYCLE_THRESHOLD_TOKENS,
+            briefing_max_tokens: DEFAULT_BRIEFING_MAX_TOKENS,
+            per_model,
+        }
+    }
+}
+
+impl CycleConfig {
+    /// Resolve the threshold for a given model (per-model override > default).
+    #[must_use]
+    pub fn threshold_for(&self, model: &str) -> usize {
+        self.per_model
+            .get(model)
+            .map(|m| m.threshold_tokens)
+            .unwrap_or(self.threshold_tokens)
+    }
+
+    /// Resolve the briefing-token cap for a given model.
+    #[must_use]
+    pub fn briefing_max_for(&self, model: &str) -> usize {
+        self.per_model
+            .get(model)
+            .map(|m| m.briefing_max_tokens)
+            .unwrap_or(self.briefing_max_tokens)
+    }
+}
+
+/// Snapshot of a model-curated briefing produced at cycle handoff.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct CycleBriefing {
+    /// 1-based cycle number this briefing closes (i.e. the cycle being archived).
+    pub cycle: u32,
+    /// UTC timestamp when the briefing turn completed.
+    pub timestamp: DateTime<Utc>,
+    /// Extracted contents of the `<carry_forward>` block.
+    pub briefing_text: String,
+    /// Approximate token count of `briefing_text`.
+    pub token_estimate: usize,
+}
+
+/// Decide whether a cycle boundary should fire.
+///
+/// `usage` is the *cumulative* session input+output tokens (both `u64` to
+/// match `SessionUsage`). `in_flight` is true when a tool is mid-execution,
+/// stream is open, or an approval modal is pending — in those cases the
+/// caller must wait until the next clean boundary.
+#[must_use]
+pub fn should_advance_cycle(
+    cumulative_input_tokens: u64,
+    cumulative_output_tokens: u64,
+    model: &str,
+    cfg: &CycleConfig,
+    in_flight: bool,
+) -> bool {
+    if !cfg.enabled || in_flight {
+        return false;
+    }
+    let total = cumulative_input_tokens.saturating_add(cumulative_output_tokens);
+    let threshold = cfg.threshold_for(model) as u64;
+    if threshold == 0 {
+        return false;
+    }
+    total >= threshold
+}
+
+/// Roll-up of state that survives a cycle boundary deterministically.
+///
+/// Construction is cheap — borrow the live state, snapshot it once, render it
+/// into a system block. The snapshot decouples rendering from any mutex held
+/// by the engine.
+#[derive(Debug, Clone, Default)]
+pub struct StructuredState {
+    pub mode_label: String,
+    pub workspace: PathBuf,
+    pub cwd: Option<PathBuf>,
+    pub working_set_summary: Option<String>,
+    pub todo_snapshot: Option<TodoListSnapshot>,
+    pub plan_snapshot: Option<PlanSnapshot>,
+    pub subagent_snapshots: Vec<SubAgentResult>,
+}
+
+impl StructuredState {
+    /// Capture the current state. All locks are held only for the duration of
+    /// the snapshot.
+    pub async fn capture(
+        mode_label: impl Into<String>,
+        workspace: PathBuf,
+        cwd: Option<PathBuf>,
+        working_set: &WorkingSet,
+        todos: &SharedTodoList,
+        plan_state: &SharedPlanState,
+        subagents: Option<&SharedSubAgentManager>,
+    ) -> Self {
+        let working_set_summary = working_set.summary_block(&workspace);
+
+        let todo_snapshot = {
+            let guard = todos.lock().await;
+            let snap = guard.snapshot();
+            if snap.items.is_empty() {
+                None
+            } else {
+                Some(snap)
+            }
+        };
+
+        let plan_snapshot = {
+            let guard = plan_state.lock().await;
+            if guard.is_empty() {
+                None
+            } else {
+                Some(guard.snapshot())
+            }
+        };
+
+        let subagent_snapshots = if let Some(handle) = subagents {
+            let guard = handle.lock().await;
+            guard
+                .list()
+                .into_iter()
+                .filter(|s| matches!(s.status, SubAgentStatus::Running))
+                .collect()
+        } else {
+            Vec::new()
+        };
+
+        Self {
+            mode_label: mode_label.into(),
+            workspace,
+            cwd,
+            working_set_summary,
+            todo_snapshot,
+            plan_snapshot,
+            subagent_snapshots,
+        }
+    }
+
+    /// Render the structured state as a single system block. Returns `None`
+    /// when there is nothing meaningful to carry forward (rare in practice —
+    /// at least the workspace and mode are always present).
+    #[must_use]
+    pub fn to_system_block(&self) -> Option<String> {
+        let mut out = String::new();
+        out.push_str("## Cycle State (Auto-Preserved)\n\n");
+        out.push_str(&format!("- Mode: `{}`\n", self.mode_label));
+        out.push_str(&format!("- Workspace: `{}`\n", self.workspace.display()));
+        if let Some(cwd) = self.cwd.as_ref() {
+            out.push_str(&format!("- Cwd: `{}`\n", cwd.display()));
+        }
+
+        if let Some(plan) = self.plan_snapshot.as_ref() {
+            out.push_str("\n### Plan\n");
+            if let Some(explanation) = plan.explanation.as_ref() {
+                out.push_str(&format!("{explanation}\n\n"));
+            }
+            for item in &plan.items {
+                let marker = match item.status {
+                    crate::tools::plan::StepStatus::Pending => "[ ]",
+                    crate::tools::plan::StepStatus::InProgress => "[~]",
+                    crate::tools::plan::StepStatus::Completed => "[x]",
+                };
+                out.push_str(&format!("- {marker} {}\n", item.step));
+            }
+        }
+
+        if let Some(todos) = self.todo_snapshot.as_ref() {
+            out.push_str(&format!(
+                "\n### Todos ({}% complete)\n",
+                todos.completion_pct
+            ));
+            for item in &todos.items {
+                let marker = match item.status {
+                    crate::tools::todo::TodoStatus::Pending => "[ ]",
+                    crate::tools::todo::TodoStatus::InProgress => "[~]",
+                    crate::tools::todo::TodoStatus::Completed => "[x]",
+                };
+                out.push_str(&format!("- {marker} {}\n", item.content));
+            }
+        }
+
+        if !self.subagent_snapshots.is_empty() {
+            out.push_str("\n### Open Sub-Agents\n");
+            for s in &self.subagent_snapshots {
+                let role = s.assignment.role.as_deref().unwrap_or("—");
+                let goal = if s.assignment.objective.is_empty() {
+                    "(no objective set)"
+                } else {
+                    s.assignment.objective.as_str()
+                };
+                out.push_str(&format!("- `{}` (role: {}) — {}\n", s.agent_id, role, goal));
+            }
+        }
+
+        if let Some(working_set) = self.working_set_summary.as_deref() {
+            out.push('\n');
+            out.push_str(working_set);
+            out.push('\n');
+        }
+
+        Some(out)
+    }
+}
+
+/// Build the prompt the model uses to produce its `<carry_forward>` briefing.
+const CYCLE_HANDOFF_TEMPLATE: &str = include_str!("prompts/cycle_handoff.md");
+
+/// Run the briefing turn. The caller drives this just before swapping the
+/// session message buffer. The returned text is the contents of the
+/// `<carry_forward>` block — outer tags stripped, length-bounded to
+/// `max_briefing_tokens` worth of characters as a defensive backstop in case
+/// the model ignores the cap.
+pub async fn produce_briefing(
+    client: &DeepSeekClient,
+    model: &str,
+    conversation: &[Message],
+    max_briefing_tokens: usize,
+) -> Result<String> {
+    if conversation.is_empty() {
+        return Ok(String::new());
+    }
+
+    // Append a synthetic instruction asking for the carry_forward block. We
+    // do not mutate the caller's conversation; this is a one-shot turn.
+    let mut messages: Vec<Message> = conversation.to_vec();
+    messages.push(Message {
+        role: "user".to_string(),
+        content: vec![ContentBlock::Text {
+            text: format!(
+                "[CYCLE BOUNDARY] {}\n\nProduce your `<carry_forward>` block now. \
+                 Stay under {} tokens. Output only the block — no other text.",
+                "The next turn starts in a fresh context.", max_briefing_tokens
+            ),
+            cache_control: None,
+        }],
+    });
+
+    let request = MessageRequest {
+        model: model.to_string(),
+        messages,
+        max_tokens: u32::try_from(max_briefing_tokens.saturating_mul(2))
+            .unwrap_or(8_192)
+            .max(1_024),
+        system: Some(SystemPrompt::Blocks(vec![SystemBlock {
+            block_type: "text".to_string(),
+            text: CYCLE_HANDOFF_TEMPLATE.to_string(),
+            cache_control: None,
+        }])),
+        tools: None,
+        tool_choice: None,
+        metadata: None,
+        thinking: None,
+        reasoning_effort: None,
+        stream: Some(false),
+        // Briefings benefit from low temperature — we want consistent state
+        // capture, not stylistic variation.
+        temperature: Some(0.2),
+        top_p: None,
+    };
+
+    let response = client
+        .create_message(request)
+        .await
+        .with_context(|| format!("Cycle briefing turn failed for model {model}"))?;
+
+    let raw = response
+        .content
+        .iter()
+        .filter_map(|block| match block {
+            ContentBlock::Text { text, .. } => Some(text.as_str()),
+            _ => None,
+        })
+        .collect::<Vec<_>>()
+        .join("\n");
+
+    let extracted = extract_carry_forward(&raw);
+    let bounded = enforce_briefing_cap(&extracted, max_briefing_tokens);
+    Ok(bounded)
+}
+
+/// Pull the contents of the first `<carry_forward>...</carry_forward>` block
+/// out of the raw model response. If the tags are missing, return the trimmed
+/// raw text — the caller would rather have *some* briefing than nothing.
+#[must_use]
+pub fn extract_carry_forward(raw: &str) -> String {
+    let lower = raw.to_ascii_lowercase();
+    let open_tag = "<carry_forward>";
+    let close_tag = "</carry_forward>";
+
+    if let Some(start) = lower.find(open_tag) {
+        let after = start + open_tag.len();
+        let tail = &raw[after..];
+        let tail_lower = &lower[after..];
+        if let Some(end) = tail_lower.find(close_tag) {
+            return tail[..end].trim().to_string();
+        }
+        // Open tag without close tag — take everything after, trimmed.
+        return tail.trim().to_string();
+    }
+    raw.trim().to_string()
+}
+
+/// Defensive bound on briefing length. Calibrated at ~4 chars/token to match
+/// the rest of the codebase's token estimator.
+fn enforce_briefing_cap(text: &str, max_tokens: usize) -> String {
+    let max_chars = max_tokens.saturating_mul(APPROX_CHARS_PER_TOKEN);
+    if max_chars == 0 {
+        return String::new();
+    }
+    if text.chars().count() <= max_chars {
+        return text.to_string();
+    }
+    let mut out: String = text.chars().take(max_chars).collect();
+    out.push_str("\n\n[...briefing truncated to fit cap...]");
+    out
+}
+
+/// Estimate briefing tokens — same method as `compaction::estimate_tokens`
+/// for symmetry: ~4 chars per token.
+#[must_use]
+pub fn estimate_briefing_tokens(text: &str) -> usize {
+    text.len().div_ceil(APPROX_CHARS_PER_TOKEN)
+}
+
+/// Header record written as the first line of an archived cycle JSONL file.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct CycleArchiveHeader {
+    pub schema_version: u32,
+    pub cycle: u32,
+    pub session_id: String,
+    pub model: String,
+    pub started: DateTime<Utc>,
+    pub ended: DateTime<Utc>,
+    pub message_count: usize,
+}
+
+/// Resolve the on-disk archive directory: `~/.deepseek/sessions/<id>/cycles`.
+fn archive_dir_for(session_id: &str) -> Result<PathBuf> {
+    let home = dirs::home_dir().context("Could not resolve home directory for cycle archive")?;
+    Ok(home
+        .join(".deepseek")
+        .join("sessions")
+        .join(session_id)
+        .join("cycles"))
+}
+
+/// Archive a cycle's messages to JSONL on disk and return the path written.
+///
+/// The first line is a `CycleArchiveHeader` JSON object; each subsequent
+/// line is a single `Message` serialized as JSON.
+pub fn archive_cycle(
+    session_id: &str,
+    cycle_n: u32,
+    messages: &[Message],
+    model: &str,
+    started: DateTime<Utc>,
+) -> Result<PathBuf> {
+    let dir = archive_dir_for(session_id)?;
+    std::fs::create_dir_all(&dir).with_context(|| {
+        format!(
+            "Failed to create cycle archive directory at {}",
+            dir.display()
+        )
+    })?;
+
+    let path = dir.join(format!("{cycle_n}.jsonl"));
+    let header = CycleArchiveHeader {
+        schema_version: CYCLE_ARCHIVE_SCHEMA_VERSION,
+        cycle: cycle_n,
+        session_id: session_id.to_string(),
+        model: model.to_string(),
+        started,
+        ended: Utc::now(),
+        message_count: messages.len(),
+    };
+
+    write_archive_file(&path, &header, messages)
+        .with_context(|| format!("Failed to write cycle archive at {}", path.display()))?;
+
+    Ok(path)
+}
+
+fn write_archive_file(
+    path: &Path,
+    header: &CycleArchiveHeader,
+    messages: &[Message],
+) -> Result<()> {
+    let tmp_path = path.with_extension("jsonl.tmp");
+    {
+        let file = OpenOptions::new()
+            .create(true)
+            .truncate(true)
+            .write(true)
+            .open(&tmp_path)?;
+        let mut buf = std::io::BufWriter::new(file);
+        let header_line = serde_json::to_string(header)?;
+        buf.write_all(header_line.as_bytes())?;
+        buf.write_all(b"\n")?;
+        for message in messages {
+            let line = serde_json::to_string(message)?;
+            buf.write_all(line.as_bytes())?;
+            buf.write_all(b"\n")?;
+        }
+        // BufWriter flushes on drop, but we want any error surfaced now —
+        // not silently into the void.
+        buf.flush()?;
+        // File handle drops with `buf`.
+    }
+    std::fs::rename(&tmp_path, path)?;
+    Ok(())
+}
+
+/// Open an archived cycle JSONL for streaming reads. Returns the parsed
+/// header and an iterator over messages. Reserved for the future
+/// `recall_archive` tool (#127).
+#[allow(dead_code)]
+pub fn open_archive(path: &Path) -> Result<(CycleArchiveHeader, ArchiveMessageReader)> {
+    use std::io::{BufRead, BufReader};
+
+    let file = File::open(path)
+        .with_context(|| format!("Failed to open cycle archive at {}", path.display()))?;
+    let mut reader = BufReader::new(file);
+    let mut header_line = String::new();
+    reader.read_line(&mut header_line)?;
+    let header: CycleArchiveHeader =
+        serde_json::from_str(header_line.trim()).with_context(|| {
+            format!(
+                "Cycle archive at {} is missing a valid header",
+                path.display()
+            )
+        })?;
+
+    if header.schema_version > CYCLE_ARCHIVE_SCHEMA_VERSION {
+        anyhow::bail!(
+            "Cycle archive schema v{} at {} is newer than supported v{}",
+            header.schema_version,
+            path.display(),
+            CYCLE_ARCHIVE_SCHEMA_VERSION
+        );
+    }
+
+    Ok((header, ArchiveMessageReader { reader }))
+}
+
+/// Iterator yielding `Message`s from an opened archive file. Yields `None`
+/// when the file is exhausted. Errors propagate through the `Result`.
+#[allow(dead_code)]
+#[derive(Debug)]
+pub struct ArchiveMessageReader {
+    reader: std::io::BufReader<File>,
+}
+
+#[allow(dead_code)]
+impl Iterator for ArchiveMessageReader {
+    type Item = Result<Message>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        use std::io::BufRead;
+
+        let mut line = String::new();
+        match self.reader.read_line(&mut line) {
+            Ok(0) => None,
+            Ok(_) => {
+                let trimmed = line.trim();
+                if trimmed.is_empty() {
+                    return self.next();
+                }
+                Some(
+                    serde_json::from_str::<Message>(trimmed)
+                        .map_err(|e| anyhow::anyhow!("Archive line parse failed: {e}")),
+                )
+            }
+            Err(e) => Some(Err(anyhow::Error::new(e))),
+        }
+    }
+}
+
+/// Compose the seed messages for the next cycle.
+///
+/// Layout (deterministic order):
+///
+/// 1. (system prompt is provided separately, not as a `Message`)
+/// 2. Optional structured-state user message (todos / plan / working set /
+///    sub-agents) — labeled with `[CYCLE STATE]` so the assistant can tell
+///    it apart from a real user turn.
+/// 3. The model-curated `<carry_forward>` briefing — labeled with `[CYCLE
+///    BRIEFING]` so the assistant knows it was self-authored on the previous
+///    cycle.
+/// 4. Optional pending user message that hadn't been sent yet.
+///
+/// The original system prompt is composed by the engine and stays separate
+/// from this list — the engine sets `session.system_prompt` directly.
+#[must_use]
+pub fn build_seed_messages(
+    structured_state_block: Option<&str>,
+    briefing: Option<&CycleBriefing>,
+    pending_user_message: Option<&str>,
+) -> Vec<Message> {
+    let mut out: Vec<Message> = Vec::new();
+
+    if let Some(state) = structured_state_block
+        && !state.trim().is_empty()
+    {
+        out.push(Message {
+            role: "user".to_string(),
+            content: vec![ContentBlock::Text {
+                text: format!(
+                    "[CYCLE STATE — auto-preserved across the cycle boundary]\n\n{}",
+                    state.trim()
+                ),
+                cache_control: None,
+            }],
+        });
+        // A user message expects an assistant ack so the next real user
+        // message lands on a clean alternation. We synthesize a one-line ack.
+        out.push(Message {
+            role: "assistant".to_string(),
+            content: vec![ContentBlock::Text {
+                text: "Acknowledged. State carried into the new cycle.".to_string(),
+                cache_control: None,
+            }],
+        });
+    }
+
+    if let Some(brief) = briefing
+        && !brief.briefing_text.trim().is_empty()
+    {
+        out.push(Message {
+            role: "user".to_string(),
+            content: vec![ContentBlock::Text {
+                text: format!(
+                    "[CYCLE BRIEFING — written by you on cycle {} at {}]\n\n<carry_forward>\n{}\n</carry_forward>",
+                    brief.cycle,
+                    brief.timestamp.to_rfc3339(),
+                    brief.briefing_text.trim()
+                ),
+                cache_control: None,
+            }],
+        });
+        out.push(Message {
+            role: "assistant".to_string(),
+            content: vec![ContentBlock::Text {
+                text: "Briefing absorbed. Continuing.".to_string(),
+                cache_control: None,
+            }],
+        });
+    }
+
+    if let Some(pending) = pending_user_message
+        && !pending.trim().is_empty()
+    {
+        out.push(Message {
+            role: "user".to_string(),
+            content: vec![ContentBlock::Text {
+                text: pending.trim().to_string(),
+                cache_control: None,
+            }],
+        });
+    }
+
+    out
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::models::{ContentBlock, Message};
+    use std::path::PathBuf;
+    use tempfile::tempdir;
+
+    fn user_msg(text: &str) -> Message {
+        Message {
+            role: "user".to_string(),
+            content: vec![ContentBlock::Text {
+                text: text.to_string(),
+                cache_control: None,
+            }],
+        }
+    }
+
+    fn asst_msg(text: &str) -> Message {
+        Message {
+            role: "assistant".to_string(),
+            content: vec![ContentBlock::Text {
+                text: text.to_string(),
+                cache_control: None,
+            }],
+        }
+    }
+
+    #[test]
+    fn cycle_config_default_includes_v4_overrides() {
+        let cfg = CycleConfig::default();
+        assert!(cfg.enabled);
+        assert!(cfg.per_model.contains_key("deepseek-v4-pro"));
+        assert!(cfg.per_model.contains_key("deepseek-v4-flash"));
+        assert_eq!(cfg.threshold_tokens, DEFAULT_CYCLE_THRESHOLD_TOKENS);
+        assert_eq!(cfg.briefing_max_tokens, DEFAULT_BRIEFING_MAX_TOKENS);
+    }
+
+    #[test]
+    fn threshold_for_falls_back_to_default() {
+        let cfg = CycleConfig::default();
+        assert_eq!(
+            cfg.threshold_for("deepseek-v4-pro"),
+            DEFAULT_CYCLE_THRESHOLD_TOKENS
+        );
+        assert_eq!(
+            cfg.threshold_for("unknown-model"),
+            DEFAULT_CYCLE_THRESHOLD_TOKENS
+        );
+    }
+
+    #[test]
+    fn threshold_for_uses_per_model_override() {
+        let mut cfg = CycleConfig::default();
+        cfg.per_model.insert(
+            "deepseek-v4-pro".to_string(),
+            ModelCycleConfig {
+                threshold_tokens: 80_000,
+                briefing_max_tokens: 2_000,
+            },
+        );
+        assert_eq!(cfg.threshold_for("deepseek-v4-pro"), 80_000);
+        assert_eq!(cfg.briefing_max_for("deepseek-v4-pro"), 2_000);
+    }
+
+    #[test]
+    fn should_advance_below_threshold_returns_false() {
+        let cfg = CycleConfig::default();
+        assert!(!should_advance_cycle(
+            50_000,
+            0,
+            "deepseek-v4-pro",
+            &cfg,
+            false
+        ));
+    }
+
+    #[test]
+    fn should_advance_at_threshold_returns_true() {
+        let cfg = CycleConfig::default();
+        assert!(should_advance_cycle(
+            DEFAULT_CYCLE_THRESHOLD_TOKENS as u64,
+            0,
+            "deepseek-v4-pro",
+            &cfg,
+            false
+        ));
+    }
+
+    #[test]
+    fn should_advance_combines_input_and_output() {
+        let cfg = CycleConfig::default();
+        // 60k + 60k = 120k > 110k threshold
+        assert!(should_advance_cycle(
+            60_000,
+            60_000,
+            "deepseek-v4-pro",
+            &cfg,
+            false
+        ));
+    }
+
+    #[test]
+    fn in_flight_phase_guard_blocks_advance() {
+        let cfg = CycleConfig::default();
+        assert!(!should_advance_cycle(
+            DEFAULT_CYCLE_THRESHOLD_TOKENS as u64 * 2,
+            0,
+            "deepseek-v4-pro",
+            &cfg,
+            true,
+        ));
+    }
+
+    #[test]
+    fn disabled_config_blocks_advance() {
+        let cfg = CycleConfig {
+            enabled: false,
+            ..Default::default()
+        };
+        assert!(!should_advance_cycle(
+            DEFAULT_CYCLE_THRESHOLD_TOKENS as u64 * 2,
+            0,
+            "deepseek-v4-pro",
+            &cfg,
+            false,
+        ));
+    }
+
+    #[test]
+    fn extract_carry_forward_pulls_block() {
+        let raw = "Here is your handoff:\n<carry_forward>\nDecision A: chose X because Y.\n</carry_forward>\nDone.";
+        assert_eq!(extract_carry_forward(raw), "Decision A: chose X because Y.");
+    }
+
+    #[test]
+    fn extract_carry_forward_handles_missing_close_tag() {
+        let raw = "<carry_forward>\nDecision A: chose X.";
+        // Missing close tag → returns the tail, trimmed.
+        assert_eq!(extract_carry_forward(raw), "Decision A: chose X.");
+    }
+
+    #[test]
+    fn extract_carry_forward_no_tags_returns_trimmed_body() {
+        let raw = "  Decision A: chose X.  ";
+        assert_eq!(extract_carry_forward(raw), "Decision A: chose X.");
+    }
+
+    #[test]
+    fn extract_carry_forward_case_insensitive() {
+        let raw = "<CARRY_FORWARD>\nState here.\n</CARRY_FORWARD>";
+        assert_eq!(extract_carry_forward(raw), "State here.");
+    }
+
+    #[test]
+    fn enforce_briefing_cap_truncates_oversized_text() {
+        let max_tokens = 10; // 10 * 4 = 40 chars
+        let big = "x".repeat(200);
+        let bounded = enforce_briefing_cap(&big, max_tokens);
+        assert!(bounded.starts_with(&"x".repeat(40)));
+        assert!(bounded.contains("[...briefing truncated"));
+    }
+
+    #[test]
+    fn enforce_briefing_cap_passes_short_text_through() {
+        let txt = "hello world";
+        assert_eq!(enforce_briefing_cap(txt, 100), "hello world");
+    }
+
+    #[test]
+    fn build_seed_messages_empty_when_all_inputs_empty() {
+        let seeds = build_seed_messages(None, None, None);
+        assert!(seeds.is_empty());
+    }
+
+    #[test]
+    fn build_seed_messages_includes_state_briefing_and_pending() {
+        let briefing = CycleBriefing {
+            cycle: 1,
+            timestamp: Utc::now(),
+            briefing_text: "Decisions: chose A.".to_string(),
+            token_estimate: 5,
+        };
+
+        let seeds = build_seed_messages(
+            Some("## Cycle State\n- Mode: agent"),
+            Some(&briefing),
+            Some("Continue working on issue #124"),
+        );
+
+        // Expected layout: state user + ack assistant + briefing user + ack assistant + pending user.
+        assert_eq!(seeds.len(), 5);
+        assert_eq!(seeds[0].role, "user");
+        assert_eq!(seeds[1].role, "assistant");
+        assert_eq!(seeds[2].role, "user");
+        assert_eq!(seeds[3].role, "assistant");
+        assert_eq!(seeds[4].role, "user");
+
+        if let ContentBlock::Text { text, .. } = &seeds[0].content[0] {
+            assert!(text.contains("[CYCLE STATE"));
+            assert!(text.contains("agent"));
+        } else {
+            panic!("expected text block");
+        }
+        if let ContentBlock::Text { text, .. } = &seeds[2].content[0] {
+            assert!(text.contains("[CYCLE BRIEFING"));
+            assert!(text.contains("<carry_forward>"));
+            assert!(text.contains("Decisions: chose A."));
+        } else {
+            panic!("expected text block");
+        }
+        if let ContentBlock::Text { text, .. } = &seeds[4].content[0] {
+            assert_eq!(text, "Continue working on issue #124");
+        } else {
+            panic!("expected text block");
+        }
+    }
+
+    #[test]
+    fn build_seed_messages_skips_blank_pending() {
+        let seeds = build_seed_messages(Some("## State"), None, Some("   "));
+        // State block + ack — no pending message.
+        assert_eq!(seeds.len(), 2);
+        assert_eq!(seeds[0].role, "user");
+        assert_eq!(seeds[1].role, "assistant");
+    }
+
+    #[test]
+    fn structured_state_to_system_block_renders_minimal() {
+        let state = StructuredState {
+            mode_label: "agent".to_string(),
+            workspace: PathBuf::from("/tmp/ws"),
+            cwd: None,
+            working_set_summary: None,
+            todo_snapshot: None,
+            plan_snapshot: None,
+            subagent_snapshots: Vec::new(),
+        };
+        let block = state.to_system_block().expect("renders");
+        assert!(block.contains("Mode: `agent`"));
+        assert!(block.contains("Workspace: `/tmp/ws`"));
+    }
+
+    #[test]
+    fn archive_cycle_writes_jsonl_with_header_and_messages() {
+        let dir = tempdir().expect("tempdir");
+        let session_id = format!("test-session-{}", uuid::Uuid::new_v4());
+
+        // Override HOME so archive_dir_for resolves into our tempdir.
+        // SAFETY: Test isolation only. Restore on drop via the tempdir
+        // guard. set_var is not concurrency-safe but cargo test runs each
+        // process serially per test binary unless --test-threads says
+        // otherwise; this single-threaded write is acceptable for our
+        // archive smoke test.
+        let original_home = std::env::var("HOME").ok();
+        // SAFETY: see comment above — single-threaded test path.
+        unsafe {
+            std::env::set_var("HOME", dir.path());
+        }
+
+        let messages = vec![
+            user_msg("hello"),
+            asst_msg("hi"),
+            user_msg("can you read Cargo.toml?"),
+        ];
+
+        let started = Utc::now();
+        let path = archive_cycle(&session_id, 1, &messages, "deepseek-v4-pro", started)
+            .expect("archive_cycle should succeed");
+
+        assert!(path.exists(), "archive file should exist on disk");
+        assert!(path.to_string_lossy().ends_with("/1.jsonl"));
+
+        let contents = std::fs::read_to_string(&path).expect("read archive back");
+        let mut lines = contents.lines();
+
+        let header_line = lines.next().expect("header line present");
+        let header: CycleArchiveHeader = serde_json::from_str(header_line).expect("header parses");
+        assert_eq!(header.cycle, 1);
+        assert_eq!(header.session_id, session_id);
+        assert_eq!(header.model, "deepseek-v4-pro");
+        assert_eq!(header.message_count, 3);
+        assert_eq!(header.schema_version, CYCLE_ARCHIVE_SCHEMA_VERSION);
+
+        for expected in &messages {
+            let line = lines.next().expect("message line present");
+            let parsed: Message = serde_json::from_str(line).expect("message parses");
+            assert_eq!(&parsed, expected);
+        }
+        assert!(lines.next().is_none(), "no extra trailing lines");
+
+        // Restore HOME so subsequent tests aren't surprised.
+        // SAFETY: see set_var comment above.
+        unsafe {
+            match original_home {
+                Some(value) => std::env::set_var("HOME", value),
+                None => std::env::remove_var("HOME"),
+            }
+        }
+    }
+
+    #[test]
+    fn open_archive_rejects_newer_schema_version() {
+        let dir = tempdir().expect("tempdir");
+        let path = dir.path().join("999.jsonl");
+        let header = CycleArchiveHeader {
+            schema_version: CYCLE_ARCHIVE_SCHEMA_VERSION + 5,
+            cycle: 999,
+            session_id: "future-session".to_string(),
+            model: "deepseek-v9".to_string(),
+            started: Utc::now(),
+            ended: Utc::now(),
+            message_count: 0,
+        };
+        let mut payload = serde_json::to_string(&header).unwrap();
+        payload.push('\n');
+        std::fs::write(&path, payload).unwrap();
+
+        let err = open_archive(&path).expect_err("must reject newer schema version");
+        let msg = format!("{err:#}");
+        assert!(msg.contains("newer than supported"), "got: {msg}");
+    }
+
+    /// Mock `produce_briefing`-style flow purely client-side: we feed a known
+    /// raw string through `extract_carry_forward` + `enforce_briefing_cap`
+    /// and assert the same result we'd produce after a real LLM call.
+    /// Avoids spinning up a live mock server while still proving the
+    /// extraction contract.
+    #[test]
+    fn briefing_extraction_pipeline_preserves_block() {
+        let raw = "thinking: ok\n<carry_forward>\nDecision: pick lib A; constraint: no async.\n</carry_forward>\n";
+        let extracted = extract_carry_forward(raw);
+        let bounded = enforce_briefing_cap(&extracted, 50);
+        assert_eq!(bounded, "Decision: pick lib A; constraint: no async.");
+    }
+}
diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs
index 605261c8..4e083795 100644
--- a/crates/tui/src/main.rs
+++ b/crates/tui/src/main.rs
@@ -20,6 +20,7 @@ mod commands;
 mod compaction;
 mod config;
 mod core;
+mod cycle_manager;
 mod deepseek_theme;
 mod error_taxonomy;
 mod eval;
@@ -2866,8 +2867,13 @@ async fn run_exec_agent(
     use crate::tools::todo::new_shared_todo_list;
     use crate::tui::app::AppMode;
 
+    // Compaction defaults to disabled in v0.6.6: the checkpoint-restart cycle
+    // architecture (issue #124) handles long-context resets via fresh contexts
+    // rather than progressive summarization. The compaction config is still
+    // wired through so users who explicitly opt back in via [compaction]
+    // enabled = true keep their old behavior.
     let compaction = CompactionConfig {
-        enabled: true,
+        enabled: false,
         model: model.to_string(),
         token_threshold: compaction_threshold_for_model(model),
         message_threshold: compaction_message_threshold_for_model(model),
@@ -2885,6 +2891,7 @@ async fn run_exec_agent(
         max_subagents,
         features: config.features(),
         compaction,
+        cycle: crate::cycle_manager::CycleConfig::default(),
         capacity: crate::core::capacity::CapacityControllerConfig::from_app_config(config),
         todos: new_shared_todo_list(),
         plan_state: new_shared_plan_state(),
diff --git a/crates/tui/src/prompts/cycle_handoff.md b/crates/tui/src/prompts/cycle_handoff.md
new file mode 100644
index 00000000..c66c8ac3
--- /dev/null
+++ b/crates/tui/src/prompts/cycle_handoff.md
@@ -0,0 +1,76 @@
+# Cycle Handoff Briefing
+
+You are about to cross a context cycle boundary. The conversation so far has
+crossed the per-cycle token budget, so this entire transcript is going to be
+**archived to disk** and the next turn will start with a fresh context: the
+original system prompt, structured state (todos, plan, working set, open
+sub-agents), the user's pending message, and a free-form briefing that **you
+write right now**.
+
+Your job, in this single message: produce a `<carry_forward>` block of at most
+**3,000 tokens** that captures the irreducible state the *next cycle's you* will
+need to continue without redoing work.
+
+## What to put in `<carry_forward>`
+
+Write concrete prose, not bullet-point summaries of the transcript. Cover:
+
+- **Decisions made and why.** The things you've chosen and the reasoning that
+  led there. Not "we discussed options" — name the choice and the constraint
+  that made it the right one.
+- **Constraints discovered.** Concrete facts about the codebase, environment,
+  user preferences, or external systems that the next cycle will trip over if
+  it doesn't know them. (e.g. "the audit log is JSONL not JSON", "the user
+  insists on no `unwrap()` in non-test code", "macOS sandbox blocks raw
+  sockets in tools/exec.rs".)
+- **Hypotheses being tested.** Open questions you're actively investigating,
+  what you're trying to falsify, what evidence would change your mind.
+- **Approaches that failed.** Dead ends with enough detail that the next
+  cycle won't repeat them. Name the approach and the specific reason it
+  didn't work, not just "tried X, didn't work".
+- **Open questions for the user.** Things you're blocked on that the next
+  cycle should ask about if the user doesn't volunteer them.
+
+## What NOT to put in `<carry_forward>`
+
+- Tool output bytes. (They're already archived to disk.)
+- File contents you read. (The next cycle can re-read them — pricier than a
+  briefing token, but cheaper than a wrong assumption built on a stale
+  paraphrase.)
+- Step-by-step recap of what you did. The next cycle does not need to know
+  the order of operations; it needs to know the *current state*.
+- Pleasantries, throat-clearing, framing language. Every token matters.
+
+## Format
+
+Open with `<carry_forward>` on its own line. Close with `</carry_forward>` on
+its own line. No prose outside the tags. No nested tags. No code fences around
+the block itself (you can use code fences inside if you need to quote a
+specific snippet).
+
+The `recall_archive` tool is available in the next cycle. It searches the
+archived transcripts (BM25 over message text, top-N hits) when your briefing
+missed something the next cycle needs. Use it sparingly — frequent recalls
+mean your briefing was too sparse, so refine your *next* briefing rather than
+leaning on the archive. Don't try to be exhaustive here: be precise about the
+load-bearing state and trust the archive for the rest.
+
+## Example shape (do not copy verbatim — write your own)
+
+```
+<carry_forward>
+Working on issue #124 (cycle-restart). Key decisions: (1) trigger at 110K
+tokens not 128K — need ~8.5K headroom for the briefing turn itself plus
+next-turn growth before the next boundary; (2) archive to JSONL with a
+header line so future tools can stream-read without parsing the whole
+file. Constraint discovered: DeepSeek V4 thinking-mode requires
+reasoning_content replay on assistant messages with tool calls — so seed
+messages can't include orphan tool calls from the archived cycle. The
+approach of "summarize then keep recent messages" (the old compaction
+path) was failing because the model couldn't tell which fragments were
+verbatim vs. paraphrased; replacing it entirely. Open question for user:
+do they want per-model briefing token caps, or one global cap?
+</carry_forward>
+```
+
+Now write your `<carry_forward>` for this conversation.
diff --git a/crates/tui/src/runtime_threads.rs b/crates/tui/src/runtime_threads.rs
index 3b4dfb54..2d8aa206 100644
--- a/crates/tui/src/runtime_threads.rs
+++ b/crates/tui/src/runtime_threads.rs
@@ -35,7 +35,13 @@ use crate::tui::app::AppMode;
 const EVENT_CHANNEL_CAPACITY: usize = 1024;
 const MAX_ACTIVE_THREADS_DEFAULT: usize = 8;
 const SUMMARY_LIMIT: usize = 280;
-const CURRENT_RUNTIME_SCHEMA_VERSION: u32 = 1;
+/// Bumped to 2 for v0.6.6 — see issue #124. The persisted thread/turn/item
+/// records didn't change shape, but the live engine semantics did: cycle
+/// boundaries advance the `Session.cycle_count` and produce archived JSONL
+/// files at `~/.deepseek/sessions/<id>/cycles/<n>.jsonl`. A v1 reader on a
+/// session written by v2 wouldn't know about the cycle archive directory and
+/// might misinterpret message counts; bumping is the safe choice.
+const CURRENT_RUNTIME_SCHEMA_VERSION: u32 = 2;
 const RUNTIME_RESTART_REASON: &str = "Interrupted by process restart";
 
 const fn default_runtime_schema_version() -> u32 {
@@ -1335,8 +1341,11 @@ impl RuntimeThreadManager {
             }
         }
 
+        // Compaction defaults to disabled in v0.6.6 — the cycle architecture
+        // (issue #124) handles long-context resets. Threads keep the
+        // legacy summarizer wired off unless an operator opts in via config.
         let compaction = CompactionConfig {
-            enabled: true,
+            enabled: false,
             model: thread.model.clone(),
             token_threshold: compaction_threshold_for_model(&thread.model),
             message_threshold: compaction_message_threshold_for_model(&thread.model),
@@ -1353,6 +1362,7 @@ impl RuntimeThreadManager {
             max_subagents: self.config.max_subagents().clamp(1, MAX_SUBAGENTS),
             features: self.config.features(),
             compaction,
+            cycle: crate::cycle_manager::CycleConfig::default(),
             capacity: crate::core::capacity::CapacityControllerConfig::from_app_config(
                 &self.config,
             ),
@@ -1681,6 +1691,26 @@ impl RuntimeThreadManager {
                         .await?;
                     }
                 }
+                EngineEvent::CycleAdvanced { from, to, briefing } => {
+                    // Surface the cycle boundary in the runtime event timeline so
+                    // background-task subscribers and replay see it. The actual
+                    // archive write is the engine's responsibility (see
+                    // `cycle_manager::archive_cycle`); this event is informational.
+                    self.emit_event(
+                        &thread_id,
+                        Some(&turn_id),
+                        None,
+                        "cycle.advanced",
+                        json!({
+                            "from": from,
+                            "to": to,
+                            "briefing_tokens": briefing.token_estimate,
+                            "cycle": briefing.cycle,
+                            "timestamp": briefing.timestamp,
+                        }),
+                    )
+                    .await?;
+                }
                 EngineEvent::CoherenceState {
                     state,
                     label,
@@ -2493,6 +2523,39 @@ mod tests {
         }
     }
 
+    #[test]
+    fn store_load_thread_rejects_newer_schema_version() {
+        let dir = test_runtime_dir();
+        let store = RuntimeThreadStore::open(dir.clone()).expect("open store");
+
+        // Construct a thread record persisted with a future schema version.
+        let mut thread = sample_thread("thr_future");
+        thread.schema_version = CURRENT_RUNTIME_SCHEMA_VERSION + 1;
+
+        // Bypass save_thread (which would respect our local schema_version)
+        // by writing the JSON directly so we can simulate a future writer.
+        let path = store.threads_dir.join(format!("{}.json", thread.id));
+        std::fs::create_dir_all(path.parent().unwrap()).expect("mkdirs");
+        let payload = serde_json::to_string(&thread).expect("serialize thread");
+        std::fs::write(&path, payload).expect("write thread");
+
+        let err = store
+            .load_thread(&thread.id)
+            .expect_err("load_thread must reject newer schema");
+        let msg = format!("{err:#}");
+        assert!(msg.contains("newer than supported"), "got: {msg}");
+
+        // Cleanup so we don't leak across tests.
+        let _ = std::fs::remove_dir_all(dir);
+    }
+
+    #[test]
+    fn current_runtime_schema_version_is_two_on_v066() {
+        // Locks the bump in (issue #124). Bump deliberately when persisted
+        // shape changes.
+        assert_eq!(CURRENT_RUNTIME_SCHEMA_VERSION, 2);
+    }
+
     #[test]
     fn enforce_lru_capacity_does_not_loop_when_all_threads_are_active() {
         let mut active = ActiveThreads::default();
diff --git a/crates/tui/src/tools/mod.rs b/crates/tui/src/tools/mod.rs
index 93da2539..e080f0a5 100644
--- a/crates/tui/src/tools/mod.rs
+++ b/crates/tui/src/tools/mod.rs
@@ -13,6 +13,7 @@ pub mod git_history;
 pub mod parallel;
 pub mod plan;
 pub mod project;
+pub mod recall_archive;
 pub mod registry;
 pub mod review;
 pub mod rlm;
diff --git a/crates/tui/src/tools/recall_archive.rs b/crates/tui/src/tools/recall_archive.rs
new file mode 100644
index 00000000..f413e54e
--- /dev/null
+++ b/crates/tui/src/tools/recall_archive.rs
@@ -0,0 +1,711 @@
+//! `recall_archive` tool — search prior cycle archives (issue #127).
+//!
+//! Companion to the checkpoint-restart cycle architecture (#124). When the
+//! agent's `<carry_forward>` briefing missed something, this tool scans the
+//! on-disk JSONL archives at `~/.deepseek/sessions/<id>/cycles/*.jsonl` and
+//! returns the top-N matching messages.
+//!
+//! ## Scoring
+//!
+//! v1: a simplified BM25 over tokenized message text. No external embedding
+//! model, no cache — every call walks the archives. Acceptable because the
+//! per-cycle archive is bounded by the 110K cycle threshold and most sessions
+//! cross at most a handful of cycles. v2 (later) can add an
+//! `~/.deepseek/embeddings/` cache built on archive write.
+
+use std::collections::HashMap;
+use std::fs::read_dir;
+use std::path::PathBuf;
+
+use async_trait::async_trait;
+use serde::Serialize;
+use serde_json::{Value, json};
+
+use super::spec::{
+    ApprovalRequirement, ToolCapability, ToolContext, ToolError, ToolResult, ToolSpec,
+    optional_u64, required_str,
+};
+use crate::cycle_manager::open_archive;
+use crate::models::{ContentBlock, Message};
+
+const DEFAULT_MAX_RESULTS: usize = 3;
+const HARD_MAX_RESULTS: usize = 10;
+const CONTEXT_WINDOW_CHARS: usize = 240;
+
+/// BM25 hyper-parameters. Standard defaults from the literature.
+const K1: f64 = 1.5;
+const B: f64 = 0.75;
+
+pub struct RecallArchiveTool;
+
+#[derive(Debug, Clone, Serialize)]
+struct RecallHit {
+    cycle: u32,
+    /// 0-based message index within the cycle.
+    message_index: usize,
+    role: String,
+    score: f64,
+    /// Short window around the best match, with `…` markers when truncated.
+    excerpt: String,
+}
+
+#[async_trait]
+impl ToolSpec for RecallArchiveTool {
+    fn name(&self) -> &'static str {
+        "recall_archive"
+    }
+
+    fn description(&self) -> &'static str {
+        "Search prior context cycles for content not in your briefing. Use sparingly — \
+         frequent recalls mean your briefing was too sparse; refine your next briefing."
+    }
+
+    fn input_schema(&self) -> Value {
+        json!({
+            "type": "object",
+            "properties": {
+                "query": {
+                    "type": "string",
+                    "description": "Search query. Tokenized and BM25-scored against archived messages."
+                },
+                "cycle": {
+                    "type": "integer",
+                    "description": "Optional: limit to a specific prior cycle number."
+                },
+                "max_results": {
+                    "type": "integer",
+                    "description": "Maximum hits to return (default 3, hard-capped at 10)."
+                }
+            },
+            "required": ["query"]
+        })
+    }
+
+    fn capabilities(&self) -> Vec<ToolCapability> {
+        vec![ToolCapability::ReadOnly]
+    }
+
+    fn approval_requirement(&self) -> ApprovalRequirement {
+        ApprovalRequirement::Auto
+    }
+
+    async fn execute(&self, input: Value, context: &ToolContext) -> Result<ToolResult, ToolError> {
+        let query = required_str(&input, "query")?.trim().to_string();
+        if query.is_empty() {
+            return Err(ToolError::invalid_input("query cannot be empty"));
+        }
+
+        let max_results = (optional_u64(&input, "max_results", DEFAULT_MAX_RESULTS as u64)
+            as usize)
+            .clamp(1, HARD_MAX_RESULTS);
+        let cycle_filter = input.get("cycle").and_then(Value::as_u64).map(|n| n as u32);
+
+        let session_id = context.state_namespace.as_str();
+        let archives = list_archives(session_id).map_err(|err| {
+            ToolError::execution_failed(format!("Failed to enumerate cycle archives: {err}"))
+        })?;
+
+        if archives.is_empty() {
+            return Ok(ToolResult::success(json!({
+                "hits": [],
+                "note": "No prior cycle archives exist. The session has not crossed a cycle boundary yet."
+            }).to_string()));
+        }
+
+        let documents = load_messages(&archives, cycle_filter).map_err(|err| {
+            ToolError::execution_failed(format!("Failed to read cycle archives: {err}"))
+        })?;
+
+        if documents.is_empty() {
+            let note = match cycle_filter {
+                Some(c) => format!("Cycle {c} has no messages in its archive."),
+                None => "Cycle archives exist but contain no message text.".to_string(),
+            };
+            return Ok(ToolResult::success(
+                json!({"hits": [], "note": note}).to_string(),
+            ));
+        }
+
+        let query_tokens = tokenize(&query);
+        if query_tokens.is_empty() {
+            return Err(ToolError::invalid_input(
+                "query has no scoring tokens after tokenization",
+            ));
+        }
+
+        let hits = score_bm25(&documents, &query_tokens, max_results);
+
+        let payload = json!({
+            "query": query,
+            "cycles_searched": archives.len(),
+            "messages_scanned": documents.len(),
+            "hits": hits,
+        });
+
+        Ok(ToolResult::success(payload.to_string()))
+    }
+}
+
+/// One archived message + its provenance, ready to score.
+struct ArchivedDoc {
+    cycle: u32,
+    message_index: usize,
+    role: String,
+    text: String,
+    tokens: Vec<String>,
+}
+
+fn archive_root(session_id: &str) -> Result<PathBuf, std::io::Error> {
+    let home = dirs::home_dir().ok_or_else(|| {
+        std::io::Error::new(
+            std::io::ErrorKind::NotFound,
+            "Could not resolve home directory for cycle archive root",
+        )
+    })?;
+    Ok(home
+        .join(".deepseek")
+        .join("sessions")
+        .join(session_id)
+        .join("cycles"))
+}
+
+/// Enumerate all archive files for a session, sorted by cycle number ascending.
+fn list_archives(session_id: &str) -> Result<Vec<(u32, PathBuf)>, std::io::Error> {
+    let root = archive_root(session_id)?;
+    if !root.exists() {
+        return Ok(Vec::new());
+    }
+    let mut archives: Vec<(u32, PathBuf)> = Vec::new();
+    for entry in read_dir(&root)? {
+        let entry = entry?;
+        let path = entry.path();
+        if path.extension().and_then(|s| s.to_str()) != Some("jsonl") {
+            continue;
+        }
+        let stem = match path.file_stem().and_then(|s| s.to_str()) {
+            Some(s) => s,
+            None => continue,
+        };
+        let Ok(cycle_n) = stem.parse::<u32>() else {
+            continue;
+        };
+        archives.push((cycle_n, path));
+    }
+    archives.sort_by_key(|(n, _)| *n);
+    Ok(archives)
+}
+
+/// Read messages from each archive into a flat scoreable list.
+fn load_messages(
+    archives: &[(u32, PathBuf)],
+    cycle_filter: Option<u32>,
+) -> Result<Vec<ArchivedDoc>, anyhow::Error> {
+    let mut docs: Vec<ArchivedDoc> = Vec::new();
+    for (cycle_n, path) in archives {
+        if let Some(filter) = cycle_filter
+            && *cycle_n != filter
+        {
+            continue;
+        }
+        let (header, reader) = open_archive(path)?;
+        for (idx, message_result) in reader.enumerate() {
+            let message = message_result?;
+            let text = message_text(&message);
+            if text.trim().is_empty() {
+                continue;
+            }
+            let tokens = tokenize(&text);
+            if tokens.is_empty() {
+                continue;
+            }
+            docs.push(ArchivedDoc {
+                cycle: header.cycle,
+                message_index: idx,
+                role: message.role,
+                text,
+                tokens,
+            });
+        }
+    }
+    Ok(docs)
+}
+
+/// Concatenate all text-bearing content blocks of a message.
+fn message_text(message: &Message) -> String {
+    let mut out = String::new();
+    let mut push = |s: &str| {
+        if !out.is_empty() {
+            out.push('\n');
+        }
+        out.push_str(s);
+    };
+    for block in &message.content {
+        match block {
+            ContentBlock::Text { text, .. } => push(text),
+            ContentBlock::ToolUse { name, input, .. } => {
+                push(&format!("[tool_use {name}] {input}"));
+            }
+            ContentBlock::ToolResult { content, .. } => {
+                push(&format!("[tool_result] {content}"));
+            }
+            ContentBlock::Thinking { thinking } => {
+                push(&format!("[thinking] {thinking}"));
+            }
+            ContentBlock::ServerToolUse { name, input, .. } => {
+                push(&format!("[server_tool_use {name}] {input}"));
+            }
+            ContentBlock::ToolSearchToolResult { content, .. } => {
+                push(&format!("[tool_search_result] {content}"));
+            }
+            ContentBlock::CodeExecutionToolResult { content, .. } => {
+                push(&format!("[code_execution_result] {content}"));
+            }
+        }
+    }
+    out
+}
+
+/// Lower-case, split on non-alphanumerics, drop short tokens. Same recipe as
+/// most lightweight BM25 implementations.
+fn tokenize(text: &str) -> Vec<String> {
+    text.to_ascii_lowercase()
+        .split(|c: char| !c.is_alphanumeric())
+        .filter(|s| s.len() >= 2)
+        .map(str::to_string)
+        .collect()
+}
+
+/// Score documents against a query using BM25, return the top-N.
+fn score_bm25(docs: &[ArchivedDoc], query_tokens: &[String], max_results: usize) -> Vec<RecallHit> {
+    if docs.is_empty() || query_tokens.is_empty() {
+        return Vec::new();
+    }
+
+    let n = docs.len() as f64;
+    let avgdl: f64 = docs.iter().map(|d| d.tokens.len() as f64).sum::<f64>() / n.max(1.0);
+
+    // Document frequency per query term.
+    let mut df: HashMap<&str, u64> = HashMap::new();
+    for token in query_tokens {
+        let mut count = 0u64;
+        for doc in docs {
+            if doc.tokens.iter().any(|t| t == token) {
+                count += 1;
+            }
+        }
+        df.insert(token.as_str(), count);
+    }
+
+    let mut scored: Vec<(f64, &ArchivedDoc)> = docs
+        .iter()
+        .map(|doc| (bm25_doc_score(doc, query_tokens, &df, n, avgdl), doc))
+        .filter(|(score, _)| *score > 0.0)
+        .collect();
+
+    scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
+    scored.truncate(max_results);
+
+    scored
+        .into_iter()
+        .map(|(score, doc)| RecallHit {
+            cycle: doc.cycle,
+            message_index: doc.message_index,
+            role: doc.role.clone(),
+            score: round_score(score),
+            excerpt: best_window(&doc.text, query_tokens, CONTEXT_WINDOW_CHARS),
+        })
+        .collect()
+}
+
+fn bm25_doc_score(
+    doc: &ArchivedDoc,
+    query_tokens: &[String],
+    df: &HashMap<&str, u64>,
+    n: f64,
+    avgdl: f64,
+) -> f64 {
+    let dl = doc.tokens.len() as f64;
+    if dl == 0.0 {
+        return 0.0;
+    }
+    let mut score = 0.0;
+    for token in query_tokens {
+        let tf = doc.tokens.iter().filter(|t| *t == token).count() as f64;
+        if tf == 0.0 {
+            continue;
+        }
+        let df_t = df.get(token.as_str()).copied().unwrap_or(0) as f64;
+        let idf = ((n - df_t + 0.5) / (df_t + 0.5) + 1.0).ln();
+        let denom = tf + K1 * (1.0 - B + B * (dl / avgdl.max(1.0)));
+        score += idf * (tf * (K1 + 1.0)) / denom.max(f64::EPSILON);
+    }
+    score
+}
+
+fn round_score(score: f64) -> f64 {
+    (score * 1000.0).round() / 1000.0
+}
+
+/// Find the substring of `text` of at most `window_chars` characters that
+/// contains the densest cluster of query tokens. Returns it with `…` markers
+/// when truncated. Falls back to a head-of-text excerpt when no tokens hit.
+fn best_window(text: &str, query_tokens: &[String], window_chars: usize) -> String {
+    let lower = text.to_ascii_lowercase();
+    let mut hit_positions: Vec<usize> = Vec::new();
+    for token in query_tokens {
+        let mut start = 0usize;
+        while let Some(pos) = lower[start..].find(token.as_str()) {
+            hit_positions.push(start + pos);
+            start += pos + token.len();
+        }
+    }
+    if hit_positions.is_empty() {
+        return head_excerpt(text, window_chars);
+    }
+    hit_positions.sort_unstable();
+
+    // Greedy: center the window on the first hit, walk forward as long as
+    // additional hits fit in the window.
+    let center = hit_positions[0];
+    let half = window_chars / 2;
+    let start = center.saturating_sub(half);
+    let end = (start + window_chars).min(text.len());
+    let start = align_char_boundary(text, start, false);
+    let end = align_char_boundary(text, end, true);
+    let prefix = if start > 0 { "…" } else { "" };
+    let suffix = if end < text.len() { "…" } else { "" };
+    format!("{prefix}{}{suffix}", &text[start..end])
+}
+
+fn head_excerpt(text: &str, max_chars: usize) -> String {
+    if text.len() <= max_chars {
+        return text.to_string();
+    }
+    let cut = align_char_boundary(text, max_chars, true);
+    format!("{}…", &text[..cut])
+}
+
+/// Walk left or right until `idx` lands on a UTF-8 char boundary.
+fn align_char_boundary(text: &str, mut idx: usize, walk_right: bool) -> usize {
+    if idx >= text.len() {
+        return text.len();
+    }
+    while idx > 0 && idx < text.len() && !text.is_char_boundary(idx) {
+        if walk_right {
+            idx += 1;
+        } else {
+            idx -= 1;
+        }
+    }
+    idx
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::cycle_manager::archive_cycle;
+    use crate::models::{ContentBlock, Message};
+    use chrono::Utc;
+    use tempfile::TempDir;
+
+    fn user_msg(text: &str) -> Message {
+        Message {
+            role: "user".to_string(),
+            content: vec![ContentBlock::Text {
+                text: text.to_string(),
+                cache_control: None,
+            }],
+        }
+    }
+
+    fn asst_msg(text: &str) -> Message {
+        Message {
+            role: "assistant".to_string(),
+            content: vec![ContentBlock::Text {
+                text: text.to_string(),
+                cache_control: None,
+            }],
+        }
+    }
+
+    /// Serializes HOME-mutating tests since cargo runs tests in parallel by
+    /// default. Held for the full test (no `.await` while holding it).
+    static HOME_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
+
+    /// Guard that points HOME at a tempdir for the test's lifetime and
+    /// restores the original on drop. Holds `HOME_LOCK` to serialize.
+    struct HomeGuard {
+        _tmp: TempDir,
+        original: Option<String>,
+        _lock: std::sync::MutexGuard<'static, ()>,
+    }
+    impl HomeGuard {
+        fn new() -> Self {
+            let lock = HOME_LOCK.lock().unwrap_or_else(|p| p.into_inner());
+            let tmp = TempDir::new().expect("tempdir");
+            let original = std::env::var("HOME").ok();
+            // SAFETY: serialized by HOME_LOCK; only this thread mutates HOME
+            // for the duration of the guard.
+            unsafe { std::env::set_var("HOME", tmp.path()) };
+            Self {
+                _tmp: tmp,
+                original,
+                _lock: lock,
+            }
+        }
+    }
+    impl Drop for HomeGuard {
+        fn drop(&mut self) {
+            // SAFETY: still holding HOME_LOCK.
+            unsafe {
+                match self.original.take() {
+                    Some(v) => std::env::set_var("HOME", v),
+                    None => std::env::remove_var("HOME"),
+                }
+            }
+        }
+    }
+
+    fn fresh_session_id() -> String {
+        format!("test-{}", uuid::Uuid::new_v4())
+    }
+
+    fn ctx_for_session(workspace: &std::path::Path, session_id: &str) -> ToolContext {
+        ToolContext::new(workspace).with_state_namespace(session_id.to_string())
+    }
+
+    #[test]
+    fn tokenize_lowers_splits_drops_short() {
+        // Filter is `len >= 2`, so "a" and "0" drop; "42" stays.
+        let toks = tokenize("Hello, World! a 42 OAuth-2.0");
+        assert_eq!(toks, vec!["hello", "world", "42", "oauth"]);
+    }
+
+    #[test]
+    fn message_text_concatenates_blocks() {
+        let m = Message {
+            role: "user".to_string(),
+            content: vec![
+                ContentBlock::Text {
+                    text: "first".to_string(),
+                    cache_control: None,
+                },
+                ContentBlock::Text {
+                    text: "second".to_string(),
+                    cache_control: None,
+                },
+            ],
+        };
+        assert_eq!(message_text(&m), "first\nsecond");
+    }
+
+    #[test]
+    fn list_archives_handles_missing_dir() {
+        let _home = HomeGuard::new();
+        let sid = fresh_session_id();
+        let archives = list_archives(&sid).expect("list_archives");
+        assert!(archives.is_empty());
+    }
+
+    #[test]
+    fn list_archives_sorts_by_cycle_number() {
+        let _home = HomeGuard::new();
+        let sid = fresh_session_id();
+        let now = Utc::now();
+        archive_cycle(&sid, 3, &[user_msg("c3")], "deepseek-v4-pro", now).unwrap();
+        archive_cycle(&sid, 1, &[user_msg("c1")], "deepseek-v4-pro", now).unwrap();
+        archive_cycle(&sid, 2, &[user_msg("c2")], "deepseek-v4-pro", now).unwrap();
+        let archives = list_archives(&sid).unwrap();
+        let cycles: Vec<u32> = archives.iter().map(|(n, _)| *n).collect();
+        assert_eq!(cycles, vec![1, 2, 3]);
+    }
+
+    #[tokio::test]
+    async fn execute_returns_empty_when_no_archives() {
+        let _home = HomeGuard::new();
+        let sid = fresh_session_id();
+        let workspace = TempDir::new().unwrap();
+        let ctx = ctx_for_session(workspace.path(), &sid);
+        let tool = RecallArchiveTool;
+        let result = tool
+            .execute(json!({"query": "anything"}), &ctx)
+            .await
+            .unwrap();
+        assert!(result.content.contains("No prior cycle archives"));
+    }
+
+    #[tokio::test]
+    async fn execute_finds_matching_messages() {
+        let _home = HomeGuard::new();
+        let sid = fresh_session_id();
+        let workspace = TempDir::new().unwrap();
+        let ctx = ctx_for_session(workspace.path(), &sid);
+        let now = Utc::now();
+        let messages = vec![
+            user_msg("How does the cycle restart strategy work?"),
+            asst_msg("It archives messages to JSONL when crossing the 110K threshold."),
+            user_msg("What happens if briefing is too short?"),
+            asst_msg("Use recall_archive to retrieve specific past content from JSONL files."),
+        ];
+        archive_cycle(&sid, 1, &messages, "deepseek-v4-pro", now).unwrap();
+
+        let tool = RecallArchiveTool;
+        let result = tool
+            .execute(
+                json!({"query": "JSONL archive briefing", "max_results": 3}),
+                &ctx,
+            )
+            .await
+            .unwrap();
+        assert!(
+            result.content.contains("\"cycle\":1"),
+            "got: {}",
+            result.content
+        );
+        assert!(
+            result.content.contains("\"hits\""),
+            "got: {}",
+            result.content
+        );
+        assert!(result.content.contains("JSONL"), "got: {}", result.content);
+    }
+
+    #[tokio::test]
+    async fn execute_filters_by_cycle() {
+        let _home = HomeGuard::new();
+        let sid = fresh_session_id();
+        let workspace = TempDir::new().unwrap();
+        let ctx = ctx_for_session(workspace.path(), &sid);
+        let now = Utc::now();
+        archive_cycle(
+            &sid,
+            1,
+            &[user_msg("alpha pattern")],
+            "deepseek-v4-pro",
+            now,
+        )
+        .unwrap();
+        archive_cycle(
+            &sid,
+            2,
+            &[user_msg("alpha pattern")],
+            "deepseek-v4-pro",
+            now,
+        )
+        .unwrap();
+
+        let tool = RecallArchiveTool;
+        let result = tool
+            .execute(
+                json!({"query": "alpha", "cycle": 2, "max_results": 5}),
+                &ctx,
+            )
+            .await
+            .unwrap();
+        assert!(
+            result.content.contains("\"cycle\":2"),
+            "got: {}",
+            result.content
+        );
+        assert!(
+            !result.content.contains("\"cycle\":1"),
+            "got: {}",
+            result.content
+        );
+    }
+
+    #[tokio::test]
+    async fn execute_caps_max_results_at_hard_max() {
+        let _home = HomeGuard::new();
+        let sid = fresh_session_id();
+        let workspace = TempDir::new().unwrap();
+        let ctx = ctx_for_session(workspace.path(), &sid);
+        let now = Utc::now();
+        let mut messages: Vec<Message> = Vec::new();
+        for i in 0..30 {
+            messages.push(user_msg(&format!("alpha message number {i}")));
+        }
+        archive_cycle(&sid, 1, &messages, "deepseek-v4-pro", now).unwrap();
+
+        let tool = RecallArchiveTool;
+        let result = tool
+            .execute(json!({"query": "alpha", "max_results": 999}), &ctx)
+            .await
+            .unwrap();
+        let count = result.content.matches("\"message_index\":").count();
+        assert!(count <= HARD_MAX_RESULTS, "got {count} hits");
+    }
+
+    #[tokio::test]
+    async fn execute_rejects_empty_query() {
+        let _home = HomeGuard::new();
+        let sid = fresh_session_id();
+        let workspace = TempDir::new().unwrap();
+        let ctx = ctx_for_session(workspace.path(), &sid);
+        let tool = RecallArchiveTool;
+        let err = tool
+            .execute(json!({"query": "   "}), &ctx)
+            .await
+            .unwrap_err();
+        assert!(matches!(err, ToolError::InvalidInput { .. }));
+    }
+
+    #[test]
+    fn best_window_centers_on_first_hit() {
+        let text = "lorem ipsum dolor sit amet, the quick brown fox jumps over the lazy dog";
+        let win = best_window(text, &["fox".to_string()], 30);
+        assert!(win.contains("fox"), "got: {win}");
+    }
+
+    #[test]
+    fn best_window_falls_back_to_head_when_no_hits() {
+        let text = "the quick brown fox jumps";
+        let win = best_window(text, &["zzz".to_string()], 10);
+        assert!(win.starts_with("the quick"), "got: {win}");
+    }
+
+    #[test]
+    fn align_char_boundary_handles_multibyte() {
+        let text = "héllo world";
+        // Index 2 is mid-byte for `é` (UTF-8 encoded as 2 bytes).
+        let aligned = align_char_boundary(text, 2, true);
+        assert!(text.is_char_boundary(aligned), "boundary check");
+    }
+
+    #[test]
+    fn bm25_returns_relevant_docs_drops_irrelevant() {
+        // BM25 length normalization can let very short matching docs outrank
+        // longer ones with higher term-frequency, so we only assert the
+        // weak invariant: matching docs are returned, non-matching docs are
+        // filtered out.
+        let docs = vec![
+            ArchivedDoc {
+                cycle: 1,
+                message_index: 0,
+                role: "user".to_string(),
+                text: "cat dog cat dog cat".to_string(),
+                tokens: tokenize("cat dog cat dog cat"),
+            },
+            ArchivedDoc {
+                cycle: 1,
+                message_index: 1,
+                role: "user".to_string(),
+                text: "fish bird".to_string(),
+                tokens: tokenize("fish bird"),
+            },
+            ArchivedDoc {
+                cycle: 1,
+                message_index: 2,
+                role: "user".to_string(),
+                text: "cat sleeps".to_string(),
+                tokens: tokenize("cat sleeps"),
+            },
+        ];
+        let hits = score_bm25(&docs, &["cat".to_string()], 3);
+        let indices: Vec<usize> = hits.iter().map(|h| h.message_index).collect();
+        assert!(indices.contains(&0), "doc 0 (3x cat) should appear");
+        assert!(indices.contains(&2), "doc 2 (1x cat) should appear");
+        assert!(!indices.contains(&1), "zero-score doc filtered");
+        assert!(hits[0].score > 0.0, "top hit has positive score");
+    }
+}
diff --git a/crates/tui/src/tools/registry.rs b/crates/tui/src/tools/registry.rs
index eccb4331..d6427348 100644
--- a/crates/tui/src/tools/registry.rs
+++ b/crates/tui/src/tools/registry.rs
@@ -399,6 +399,14 @@ impl ToolRegistryBuilder {
         self.with_tool(Arc::new(ReviewTool::new(client, model)))
     }
 
+    /// Include the `recall_archive` tool — searches prior cycle archives
+    /// produced by the checkpoint-restart system (issue #127).
+    #[must_use]
+    pub fn with_recall_archive_tool(self) -> Self {
+        use super::recall_archive::RecallArchiveTool;
+        self.with_tool(Arc::new(RecallArchiveTool))
+    }
+
     /// Include note tool.
     #[must_use]
     pub fn with_note_tool(self) -> Self {
@@ -486,6 +494,7 @@ impl ToolRegistryBuilder {
             .with_plan_tool(plan_state)
             .with_review_tool(client.clone(), model.clone())
             .with_rlm_tool(client, model)
+            .with_recall_archive_tool()
             .with_subagent_tools(manager, runtime)
     }
 
diff --git a/crates/tui/src/tui/app.rs b/crates/tui/src/tui/app.rs
index 71e3577d..b86f0150 100644
--- a/crates/tui/src/tui/app.rs
+++ b/crates/tui/src/tui/app.rs
@@ -11,6 +11,7 @@ use thiserror::Error;
 use crate::compaction::CompactionConfig;
 use crate::config::{ApiProvider, Config, has_api_key, save_api_key};
 use crate::core::coherence::CoherenceState;
+use crate::cycle_manager::{CycleBriefing, CycleConfig};
 use crate::hooks::{HookContext, HookEvent, HookExecutor, HookResult};
 use crate::models::{
     Message, SystemPrompt, compaction_message_threshold_for_model,
@@ -593,6 +594,18 @@ pub struct App {
     /// Ctrl+C keeps its current "interrupt this turn" semantics in those
     /// states. See [`App::arm_quit`] / [`App::quit_is_armed`].
     pub quit_armed_until: Option<Instant>,
+
+    /// Number of checkpoint-restart cycles crossed in this session
+    /// (issue #124). Mirrors `Session.cycle_count` on the engine side.
+    pub cycle_count: u32,
+
+    /// Briefings produced at past cycle boundaries, in chronological order.
+    /// Used by `/cycles` and `/cycle <n>` slash commands.
+    pub cycle_briefings: Vec<CycleBriefing>,
+
+    /// Active cycle configuration (token threshold, briefing cap, per-model
+    /// overrides). Loaded from config and forwarded to the engine.
+    pub cycle: CycleConfig,
 }
 
 /// Message queued while the engine is busy.
@@ -873,6 +886,9 @@ impl App {
             coherence_state: CoherenceState::default(),
             last_send_at: None,
             quit_armed_until: None,
+            cycle_count: 0,
+            cycle_briefings: Vec::new(),
+            cycle: CycleConfig::default(),
         }
     }
 
@@ -1888,6 +1904,12 @@ impl App {
             ..Default::default()
         }
     }
+
+    /// Forward the active cycle configuration to the engine. Cloned so the
+    /// engine has its own copy to mutate per-session.
+    pub fn cycle_config(&self) -> CycleConfig {
+        self.cycle.clone()
+    }
 }
 
 pub fn media_attachment_reference(kind: &str, path: &Path, description: Option<&str>) -> String {
diff --git a/crates/tui/src/tui/sidebar.rs b/crates/tui/src/tui/sidebar.rs
index 8cd65320..0b721065 100644
--- a/crates/tui/src/tui/sidebar.rs
+++ b/crates/tui/src/tui/sidebar.rs
@@ -61,6 +61,19 @@ fn render_sidebar_plan(f: &mut Frame, area: Rect, app: &App) {
     let content_width = area.width.saturating_sub(4) as usize;
     let mut lines: Vec<Line<'static>> = Vec::with_capacity(usize::from(area.height).max(4));
 
+    // Cycle indicator (issue #124). Only shown once a boundary has fired —
+    // first-time users with cycle_count == 0 don't need this row of chrome.
+    if app.cycle_count > 0 {
+        lines.push(Line::from(Span::styled(
+            format!(
+                "cycles: {} (active: {})",
+                app.cycle_count,
+                app.cycle_count.saturating_add(1)
+            ),
+            Style::default().fg(theme.plan_summary_color),
+        )));
+    }
+
     match app.plan_state.try_lock() {
         Ok(plan) => {
             if plan.is_empty() {
diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs
index aaf9cc1a..8c2590ff 100644
--- a/crates/tui/src/tui/ui.rs
+++ b/crates/tui/src/tui/ui.rs
@@ -327,6 +327,7 @@ fn build_engine_config(app: &App, config: &Config) -> EngineConfig {
         max_subagents: app.max_subagents,
         features: config.features(),
         compaction: app.compaction_config(),
+        cycle: app.cycle_config(),
         capacity: crate::core::capacity::CapacityControllerConfig::from_app_config(config),
         todos: app.todos.clone(),
         plan_state: app.plan_state.clone(),
@@ -682,6 +683,21 @@ async fn run_event_loop(
                         app.is_compacting = false;
                         app.status_message = Some(message);
                     }
+                    EngineEvent::CycleAdvanced { from, to, briefing } => {
+                        // Mirror the engine-side counter on the UI app state
+                        // so the sidebar / slash commands stay in sync, and
+                        // record the briefing so `/cycle <n>` can show it.
+                        app.cycle_count = to;
+                        let briefing_tokens = briefing.token_estimate;
+                        app.cycle_briefings.push(briefing);
+                        let separator = format!(
+                            "─── cycle {from} → {to}  (briefing: {briefing_tokens} tokens) ───"
+                        );
+                        app.add_message(HistoryCell::System { content: separator });
+                        app.status_message = Some(format!(
+                            "↻ context refreshed (cycle {from} → {to}, briefing: {briefing_tokens} tokens carried)"
+                        ));
+                    }
                     EngineEvent::CoherenceState { state, .. } => {
                         app.coherence_state = state;
                     }