feat: SeamManager module + context config for v0.7.2 layered context management

Adds the core SeamManager struct (#159) that uses V4 Flash to produce append-only <archived_context> XML blocks at 192K/384K/576K thresholds. No messages are deleted — soft seams are navigational summaries that preserve the V4 prefix cache. - seam_manager.rs: Flash-driven soft seam production, recompaction, and cycle briefing replacement - config.rs: [context] table with L1/L2/L3/cycle thresholds, verbatim window, seam model, and per-model overrides - compaction.rs: pub exports for plan_compaction, KEEP_RECENT_MESSAGES, and CompactionPlan fields so SeamManager can reuse pinning heuristics - cycle_manager.rs: pub CYCLE_HANDOFF_TEMPLATE for Flash briefing use - main.rs: mod seam_manager registration All 1,570 tests pass. Engine wiring follows in a subsequent commit.
2026-04-28 20:15:22 -05:00
parent 64d1698bde
commit 12b1ae42c4
5 changed files with 723 additions and 6 deletions
@@ -39,7 +39,7 @@ impl Default for CompactionConfig {
    }
 }

-const KEEP_RECENT_MESSAGES: usize = 4;
+pub const KEEP_RECENT_MESSAGES: usize = 4;
 const RECENT_WORKING_SET_WINDOW: usize = 12;
 const MAX_WORKING_SET_PATHS: usize = 24;
 const MIN_SUMMARIZE_MESSAGES: usize = 6;
@@ -94,9 +94,9 @@ fn summary_input_limits_for_model(model: &str) -> SummaryInputLimits {
 }

 #[derive(Debug, Clone, Default)]
-struct CompactionPlan {
-    pinned_indices: BTreeSet<usize>,
-    summarize_indices: Vec<usize>,
+pub struct CompactionPlan {
+    pub pinned_indices: BTreeSet<usize>,
+    pub summarize_indices: Vec<usize>,
 }

 fn path_regex() -> &'static Regex {
@@ -342,7 +342,7 @@ fn should_pin_message(text: &str, working_set_paths: &HashSet<String>) -> bool {
    patch_markers.iter().any(|m| lower.contains(m))
 }

-fn plan_compaction(
+pub fn plan_compaction(
    messages: &[Message],
    workspace: Option<&Path>,
    keep_recent: usize,
@@ -420,6 +420,61 @@ impl RetryPolicy {
    }
 }

+/// Context management configuration (append-only layered context with Flash seams).
+#[derive(Debug, Clone, Deserialize)]
+pub struct ContextConfig {
+    /// Master enable for layered context management. Default: true.
+    #[serde(default)]
+    pub enabled: Option<bool>,
+    /// Verbatim window: last N turns never summarized. Default: 16.
+    #[serde(default)]
+    pub verbatim_window_turns: Option<usize>,
+    /// Soft seam thresholds (cumulative input+output tokens).
+    #[serde(default)]
+    pub l1_threshold: Option<usize>,
+    #[serde(default)]
+    pub l2_threshold: Option<usize>,
+    #[serde(default)]
+    pub l3_threshold: Option<usize>,
+    /// Hard cycle boundary. Default: 768000.
+    #[serde(default)]
+    pub cycle_threshold: Option<usize>,
+    /// Model used for seam/briefing work. Default: "deepseek-v4-flash".
+    #[serde(default)]
+    pub seam_model: Option<String>,
+    /// Per-model threshold overrides.
+    #[serde(default)]
+    pub per_model: Option<HashMap<String, PerModelContextConfig>>,
+}
+
+/// Per-model context tuning.
+#[derive(Debug, Clone, Deserialize)]
+pub struct PerModelContextConfig {
+    #[serde(default)]
+    pub l1_threshold: Option<usize>,
+    #[serde(default)]
+    pub l2_threshold: Option<usize>,
+    #[serde(default)]
+    pub l3_threshold: Option<usize>,
+    #[serde(default)]
+    pub cycle_threshold: Option<usize>,
+}
+
+impl Default for ContextConfig {
+    fn default() -> Self {
+        Self {
+            enabled: Some(true),
+            verbatim_window_turns: Some(16),
+            l1_threshold: Some(192_000),
+            l2_threshold: Some(384_000),
+            l3_threshold: Some(576_000),
+            cycle_threshold: Some(768_000),
+            seam_model: Some("deepseek-v4-flash".to_string()),
+            per_model: None,
+        }
+    }
+}
+
 /// Resolved CLI configuration, including defaults and environment overrides.
 #[derive(Debug, Clone, Default, Deserialize)]
 pub struct Config {
@@ -481,6 +536,10 @@ pub struct Config {
    /// applies the defaults documented in [`LspConfigToml`].
    #[serde(default)]
    pub lsp: Option<LspConfigToml>,
+
+    /// Append-only layered context management with Flash seam manager (#159).
+    #[serde(default)]
+    pub context: ContextConfig,
 }

 /// `[skills]` table — knobs for the community-skill installer.
@@ -1519,6 +1578,22 @@ fn merge_config(base: Config, override_cfg: Config) -> Config {
        skills: override_cfg.skills.or(base.skills),
        snapshots: override_cfg.snapshots.or(base.snapshots),
        lsp: override_cfg.lsp.or(base.lsp),
+        context: ContextConfig {
+            enabled: override_cfg.context.enabled.or(base.context.enabled),
+            verbatim_window_turns: override_cfg
+                .context
+                .verbatim_window_turns
+                .or(base.context.verbatim_window_turns),
+            l1_threshold: override_cfg.context.l1_threshold.or(base.context.l1_threshold),
+            l2_threshold: override_cfg.context.l2_threshold.or(base.context.l2_threshold),
+            l3_threshold: override_cfg.context.l3_threshold.or(base.context.l3_threshold),
+            cycle_threshold: override_cfg
+                .context
+                .cycle_threshold
+                .or(base.context.cycle_threshold),
+            seam_model: override_cfg.context.seam_model.or(base.context.seam_model),
+            per_model: override_cfg.context.per_model.or(base.context.per_model),
+        },
    }
 }

@@ -309,7 +309,7 @@ impl StructuredState {
 }

 /// Build the prompt the model uses to produce its `<carry_forward>` briefing.
-const CYCLE_HANDOFF_TEMPLATE: &str = include_str!("prompts/cycle_handoff.md");
+pub const CYCLE_HANDOFF_TEMPLATE: &str = include_str!("prompts/cycle_handoff.md");

 /// Run the briefing turn. The caller drives this just before swapping the
 /// session message buffer. The returned text is the contents of the
@@ -45,6 +45,7 @@ pub mod rlm;
 mod runtime_api;
 mod runtime_threads;
 mod sandbox;
+mod seam_manager;
 mod session_manager;
 mod settings;
 mod skills;
@@ -0,0 +1,641 @@
+//! Append-only layered context management with Flash seam manager (issue #159).
+//!
+//! ## Why
+//!
+//! The current cycle/compaction/capacity mechanisms share a fatal flaw: they
+//! replace or rewrite messages, which breaks DeepSeek V4's prefix cache
+//! (SS4.2.1). The prefix cache gives ~90% discount on cached tokens at
+//! 128-token granularity. Replacing old messages with summaries breaks the
+//! cache at the replacement point — every token after must be recomputed.
+//!
+//! The append-only layered approach keeps all verbatim messages and appends
+//! `<archived_context>` summary blocks produced by V4 Flash. These blocks
+//! are *navigational aids* — the model reads them first, then drills into
+//! verbatim messages when precision is needed. The prefix cache stays hot
+//! for the entire stable prefix.
+//!
+//! ## Soft seam levels
+//!
+//! | Level | Trigger (tokens) | Covers messages    | Density        |
+//! |-------|------------------|--------------------|----------------|
+//! | L1    | 192K             | 0–128K             | ~2,500 tokens  |
+//! | L2    | 384K             | 0–320K             | ~1,800 tokens  |
+//! | L3    | 576K             | 0–512K             | ~1,200 tokens  |
+//! | Cycle | 768K             | All -> archive     | <=3,000 tokens  |
+//!
+//! Thresholds derived from V4 paper Figure 9 (MMR): 128K->256K is the real
+//! cliff at -0.09. L1 triggers at 192K, before the cliff. Hard cycle at
+//! 768K (~75% of 1M window).
+
+use std::fmt::Write;
+use std::path::Path;
+use std::sync::Arc;
+
+use anyhow::Result;
+use chrono::{DateTime, Utc};
+use tokio::sync::Mutex;
+
+use crate::client::DeepSeekClient;
+use crate::compaction::plan_compaction;
+use crate::compaction::KEEP_RECENT_MESSAGES;
+use crate::llm_client::LlmClient;
+use crate::models::{ContentBlock, Message, MessageRequest, SystemBlock, SystemPrompt};
+
+/// Default seam model — Flash is cheap and fast, ideal for summarization.
+pub const DEFAULT_SEAM_MODEL: &str = "deepseek-v4-flash";
+
+/// Default thresholds (cumulative input+output tokens).
+pub const DEFAULT_L1_THRESHOLD: usize = 192_000;
+pub const DEFAULT_L2_THRESHOLD: usize = 384_000;
+pub const DEFAULT_L3_THRESHOLD: usize = 576_000;
+pub const DEFAULT_CYCLE_THRESHOLD: usize = 768_000;
+
+/// Verbatim window: last N turns never summarized.
+pub const VERBATIM_WINDOW_TURNS: usize = 16;
+
+/// Approximate token cap for each seam level.
+const L1_MAX_TOKENS: u32 = 3_200;
+const L2_MAX_TOKENS: u32 = 2_400;
+const L3_MAX_TOKENS: u32 = 1_600;
+
+/// Configuration for the Flash seam manager.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct SeamConfig {
+    /// Whether the layered context manager is enabled.
+    pub enabled: bool,
+    /// Verbatim window: last N turns never summarized.
+    pub verbatim_window_turns: usize,
+    /// Soft seam thresholds.
+    pub l1_threshold: usize,
+    pub l2_threshold: usize,
+    pub l3_threshold: usize,
+    /// Hard cycle boundary.
+    pub cycle_threshold: usize,
+    /// Model used for seam/briefing work.
+    pub seam_model: String,
+}
+
+impl Default for SeamConfig {
+    fn default() -> Self {
+        Self {
+            enabled: true,
+            verbatim_window_turns: VERBATIM_WINDOW_TURNS,
+            l1_threshold: DEFAULT_L1_THRESHOLD,
+            l2_threshold: DEFAULT_L2_THRESHOLD,
+            l3_threshold: DEFAULT_L3_THRESHOLD,
+            cycle_threshold: DEFAULT_CYCLE_THRESHOLD,
+            seam_model: DEFAULT_SEAM_MODEL.to_string(),
+        }
+    }
+}
+
+/// Metadata for a single soft seam block.
+#[derive(Debug, Clone)]
+pub struct SeamMetadata {
+    /// Which level (1, 2, or 3).
+    pub level: u8,
+    /// Message range covered (inclusive-exclusive indices).
+    pub start_idx: usize,
+    pub end_idx: usize,
+    /// Approximate token count of the summary.
+    pub token_estimate: usize,
+    /// When the seam was produced.
+    pub timestamp: DateTime<Utc>,
+    /// Model that produced it.
+    pub model: String,
+}
+
+/// The Flash seam manager — produces `<archived_context>` blocks.
+pub struct SeamManager {
+    /// Flash client for summarization work.
+    flash_client: DeepSeekClient,
+    /// Configuration.
+    config: SeamConfig,
+    /// Currently active seams in order (oldest first).
+    active_seams: Arc<Mutex<Vec<SeamMetadata>>>,
+}
+
+impl SeamManager {
+    /// Create a new seam manager with a Flash client.
+    pub fn new(flash_client: DeepSeekClient, config: SeamConfig) -> Self {
+        Self {
+            flash_client,
+            config,
+            active_seams: Arc::new(Mutex::new(Vec::new())),
+        }
+    }
+
+    /// Get the current config.
+    pub fn config(&self) -> &SeamConfig {
+        &self.config
+    }
+
+    /// Current active seam count.
+    pub async fn seam_count(&self) -> usize {
+        self.active_seams.lock().await.len()
+    }
+
+    /// Determine which seam level (if any) should fire for the given
+    /// cumulative token count. Returns `None` when no seam is due.
+    #[must_use]
+    pub fn seam_level_for(
+        &self,
+        cumulative_tokens: usize,
+        highest_existing_level: Option<u8>,
+    ) -> Option<u8> {
+        if !self.config.enabled {
+            return None;
+        }
+        let highest = highest_existing_level.unwrap_or(0);
+
+        // Each level fires at most once, and only in order.
+        if highest < 1 && cumulative_tokens >= self.config.l1_threshold {
+            return Some(1);
+        }
+        if highest < 2 && cumulative_tokens >= self.config.l2_threshold {
+            return Some(2);
+        }
+        if highest < 3 && cumulative_tokens >= self.config.l3_threshold {
+            return Some(3);
+        }
+        None
+    }
+
+    /// Check whether the hard cycle boundary is crossed.
+    #[must_use]
+    pub fn should_cycle(&self, cumulative_tokens: usize) -> bool {
+        self.config.enabled && cumulative_tokens >= self.config.cycle_threshold
+    }
+
+    /// Compute the verbatim window: the last N message indices that must
+    /// never be summarized. Returns the start index of the verbatim window.
+    pub fn verbatim_window_start(&self, message_count: usize) -> usize {
+        let turn_count = message_count / 2; // Rough: user+assistant per turn
+        let verbatim_turns = self.config.verbatim_window_turns.min(turn_count);
+        let verbatim_messages = (verbatim_turns * 2).min(message_count);
+        message_count.saturating_sub(verbatim_messages)
+    }
+
+    /// Produce a soft seam for the given message range and level.
+    ///
+    /// Returns the `<archived_context>` XML block as a string, ready to
+    /// be appended as an assistant message.
+    pub async fn produce_soft_seam(
+        &self,
+        messages: &[Message],
+        level: u8,
+        start_idx: usize,
+        end_idx: usize,
+        workspace: Option<&Path>,
+        pinned_indices: &[usize],
+    ) -> Result<String> {
+        if messages.is_empty() || start_idx >= end_idx {
+            return Ok(String::new());
+        }
+
+        let range = &messages[start_idx..end_idx.min(messages.len())];
+        if range.is_empty() {
+            return Ok(String::new());
+        }
+
+        // Use compaction pinning heuristics to identify which messages to
+        // exclude from summarization. Pinned messages stay verbatim; the
+        // seam summary covers everything else.
+        let plan = plan_compaction(
+            range,
+            workspace,
+            KEEP_RECENT_MESSAGES.min(range.len().saturating_sub(1)),
+            Some(pinned_indices),
+            None,
+        );
+
+        // Collect messages to summarize (non-pinned), excluding pinned ones.
+        let to_summarize: Vec<&Message> = range
+            .iter()
+            .enumerate()
+            .filter(|(idx, _msg)| !plan.pinned_indices.contains(idx))
+            .map(|(_idx, msg)| msg)
+            .collect();
+
+        if to_summarize.is_empty() {
+            // Nothing to summarize — all messages are pinned.
+            return Ok(String::new());
+        }
+
+        let summary = self
+            .summarize_messages(&to_summarize, level, start_idx, end_idx)
+            .await?;
+
+        let density_label = match level {
+            1 => "~2,500 tokens",
+            2 => "~1,800 tokens",
+            3 => "~1,200 tokens",
+            _ => "unknown",
+        };
+
+        let timestamp = Utc::now();
+        let token_estimate = summary.len() / 4;
+
+        // Record this seam.
+        {
+            let mut seams = self.active_seams.lock().await;
+            seams.push(SeamMetadata {
+                level,
+                start_idx,
+                end_idx,
+                token_estimate,
+                timestamp,
+                model: self.config.seam_model.clone(),
+            });
+        }
+
+        Ok(format!(
+            "<archived_context level=\"{level}\" range=\"msg {start_idx}-{end_idx}\" \
+             tokens=\"~{token_estimate}\" density=\"{density_label}\" \
+             model=\"{seam_model}\" timestamp=\"{ts}\">\n\
+             {summary}\n\
+             </archived_context>",
+            seam_model = self.config.seam_model,
+            ts = timestamp.to_rfc3339()
+        ))
+    }
+
+    /// Re-compact existing seams into a higher-level block. Consumes prior
+    /// `<archived_context>` content and fuses it with new messages.
+    pub async fn recompact(
+        &self,
+        existing_seams: &[String],
+        new_messages: &[&Message],
+        level: u8,
+        start_idx: usize,
+        end_idx: usize,
+    ) -> Result<String> {
+        let mut input = String::from(
+            "## Prior Context Summaries\n\n\
+             The following <archived_context> blocks were produced earlier. \
+             Merge their key information into a single denser summary.\n\n",
+        );
+
+        for (i, seam) in existing_seams.iter().enumerate() {
+            let _ = write!(input, "### Seam {}\n{seam}\n\n", i + 1);
+        }
+
+        if !new_messages.is_empty() {
+            input.push_str("## Recent Messages\n\n");
+            for msg in new_messages {
+                let role = &msg.role;
+                for block in &msg.content {
+                    if let ContentBlock::Text { text, .. } = block {
+                        let _ = write!(input, "**{role}:** {text}\n\n");
+                    }
+                }
+            }
+        }
+
+        let (max_tokens, word_limit) = match level {
+            2 => (L2_MAX_TOKENS, 700),
+            3 => (L3_MAX_TOKENS, 400),
+            _ => (L3_MAX_TOKENS, 400),
+        };
+
+        let request = MessageRequest {
+            model: self.config.seam_model.clone(),
+            messages: vec![Message {
+                role: "user".to_string(),
+                content: vec![ContentBlock::Text {
+                    text: format!(
+                        "Synthesize the following context into a single dense summary. \
+                         Preserve: decisions made, file paths, error messages, \
+                         constraints, hypotheses, open questions, and task state. \
+                         Drop: greeting, filler, repeated information. \
+                         Keep it under {word_limit} words.\n\n{input}"
+                    ),
+                    cache_control: None,
+                }],
+            }],
+            max_tokens,
+            system: Some(SystemPrompt::Text(
+                "You are a context compaction specialist. Produce dense, factual summaries that \
+                 preserve every decision, path, error, constraint, and open question. Drop \
+                 conversational filler and repetition."
+                    .to_string(),
+            )),
+            tools: None,
+            tool_choice: None,
+            metadata: None,
+            thinking: None,
+            reasoning_effort: None,
+            stream: Some(false),
+            temperature: Some(0.1),
+            top_p: None,
+        };
+
+        let response = self.flash_client.create_message(request).await?;
+        let summary = response
+            .content
+            .iter()
+            .filter_map(|block| match block {
+                ContentBlock::Text { text, .. } => Some(text.clone()),
+                _ => None,
+            })
+            .collect::<Vec<_>>()
+            .join("\n");
+
+        let token_estimate = summary.len() / 4;
+        let timestamp = Utc::now();
+
+        // Record this recompacted seam.
+        {
+            let mut seams = self.active_seams.lock().await;
+            seams.push(SeamMetadata {
+                level,
+                start_idx,
+                end_idx,
+                token_estimate,
+                timestamp,
+                model: self.config.seam_model.clone(),
+            });
+        }
+
+        Ok(format!(
+            "<archived_context level=\"{level}\" range=\"msg {start_idx}-{end_idx}\" \
+             tokens=\"~{token_estimate}\" model=\"{model}\" timestamp=\"{ts}\">\n\
+             {summary}\n\
+             </archived_context>",
+            model = self.config.seam_model,
+            ts = timestamp.to_rfc3339()
+        ))
+    }
+
+    /// Produce a cycle briefing using Flash. Unlike the current
+    /// `produce_briefing` in cycle_manager.rs (which uses the main model),
+    /// this consumes existing `<archived_context>` blocks as input rather
+    /// than scanning raw history.
+    pub async fn produce_flash_briefing(
+        &self,
+        existing_seams: &[String],
+        structured_state: Option<&str>,
+    ) -> Result<String> {
+        let mut input = String::from(
+            "## Briefing Request\n\n\
+             Produce a <carry_forward> block summarizing the session state. \
+             Include: decisions made + why, constraints discovered, \
+             hypotheses being tested, approaches that failed, open questions. \
+             Do NOT include tool output bytes, file contents, or step-by-step recaps.\n\n",
+        );
+
+        if let Some(state) = structured_state {
+            let _ = write!(input, "## Structured State\n\n{state}\n\n");
+        }
+
+        if !existing_seams.is_empty() {
+            input.push_str("## Prior Context Summaries\n\n");
+            for (i, seam) in existing_seams.iter().enumerate() {
+                let _ = write!(input, "### Seam {}\n{seam}\n\n", i + 1);
+            }
+        } else {
+            input.push_str(
+                "No prior context summaries available. Produce a brief carry-forward \
+                 from the structured state alone.\n",
+            );
+        }
+
+        let request = MessageRequest {
+            model: self.config.seam_model.clone(),
+            messages: vec![Message {
+                role: "user".to_string(),
+                content: vec![ContentBlock::Text {
+                    text: input,
+                    cache_control: None,
+                }],
+            }],
+            max_tokens: 4_096,
+            system: Some(SystemPrompt::Blocks(vec![SystemBlock {
+                block_type: "text".to_string(),
+                text: crate::cycle_manager::CYCLE_HANDOFF_TEMPLATE.to_string(),
+                cache_control: None,
+            }])),
+            tools: None,
+            tool_choice: None,
+            metadata: None,
+            thinking: None,
+            reasoning_effort: None,
+            stream: Some(false),
+            temperature: Some(0.2),
+            top_p: None,
+        };
+
+        let response = self.flash_client.create_message(request).await?;
+        let raw = response
+            .content
+            .iter()
+            .filter_map(|block| match block {
+                ContentBlock::Text { text, .. } => Some(text.as_str()),
+                _ => None,
+            })
+            .collect::<Vec<_>>()
+            .join("\n");
+
+        Ok(crate::cycle_manager::extract_carry_forward(&raw))
+    }
+
+    /// Internal: summarize a slice of messages using Flash.
+    async fn summarize_messages(
+        &self,
+        messages: &[&Message],
+        level: u8,
+        start_idx: usize,
+        end_idx: usize,
+    ) -> Result<String> {
+        let mut conversation = String::new();
+
+        for msg in messages {
+            let role = if msg.role == "user" {
+                "User"
+            } else {
+                "Assistant"
+            };
+            for block in &msg.content {
+                match block {
+                    ContentBlock::Text { text, .. } => {
+                        let snippet = truncate_chars(text, 800);
+                        let _ = write!(conversation, "{role}: {snippet}\n\n");
+                    }
+                    ContentBlock::ToolUse { name, .. } => {
+                        let _ = write!(conversation, "{role}: [Used tool: {name}]\n\n");
+                    }
+                    ContentBlock::ToolResult { content, .. } => {
+                        let snippet = truncate_chars(content, 200);
+                        let _ = write!(conversation, "Tool result: {snippet}\n\n");
+                    }
+                    ContentBlock::Thinking { .. } => {
+                        // Skip thinking in seam summaries.
+                    }
+                    ContentBlock::ServerToolUse { .. }
+                    | ContentBlock::ToolSearchToolResult { .. }
+                    | ContentBlock::CodeExecutionToolResult { .. } => {}
+                }
+            }
+        }
+
+        let (max_tokens, word_limit) = match level {
+            1 => (L1_MAX_TOKENS, 800),
+            2 => (L2_MAX_TOKENS, 600),
+            3 => (L3_MAX_TOKENS, 400),
+            _ => (L3_MAX_TOKENS, 400),
+        };
+
+        let request = MessageRequest {
+            model: self.config.seam_model.clone(),
+            messages: vec![Message {
+                role: "user".to_string(),
+                content: vec![ContentBlock::Text {
+                    text: format!(
+                        "Summarize the following conversation segment (messages {start_idx}-{end_idx}). \
+                         Preserve: key decisions and their rationale, exact file paths, \
+                         command invocations, error messages, tool-result facts, constraints \
+                         discovered, hypotheses being tested, and open questions. \
+                         Drop: greetings, filler, repeated information, and thinking blocks. \
+                         Keep it under {word_limit} words.\n\n---\n\n{conversation}"
+                    ),
+                    cache_control: None,
+                }],
+            }],
+            max_tokens,
+            system: Some(SystemPrompt::Text(
+                "You are a context summarization specialist. Produce dense, factual summaries \
+                 that preserve every decision, path, error, constraint, and open question. \
+                 Never omit a file path, error message, or decision rationale."
+                    .to_string(),
+            )),
+            tools: None,
+            tool_choice: None,
+            metadata: None,
+            thinking: None,
+            reasoning_effort: None,
+            stream: Some(false),
+            temperature: Some(0.1),
+            top_p: None,
+        };
+
+        let response = self.flash_client.create_message(request).await?;
+        let summary = response
+            .content
+            .iter()
+            .filter_map(|block| match block {
+                ContentBlock::Text { text, .. } => Some(text.clone()),
+                _ => None,
+            })
+            .collect::<Vec<_>>()
+            .join("\n");
+
+        Ok(summary)
+    }
+
+    /// Collect the text content of all active seams (for use as input to
+    /// re-compaction or briefing).
+    pub async fn collect_seam_texts(&self, messages: &[Message]) -> Vec<String> {
+        let _seams = self.active_seams.lock().await;
+        let mut texts = Vec::new();
+
+        // Extract `<archived_context>` blocks from messages.
+        for msg in messages {
+            if msg.role == "assistant" {
+                for block in &msg.content {
+                    if let ContentBlock::Text { text, .. } = block {
+                        if text.contains("<archived_context") {
+                            texts.push(text.clone());
+                        }
+                    }
+                }
+            }
+        }
+
+        texts
+    }
+
+    /// Get the highest seam level currently recorded.
+    pub async fn highest_level(&self) -> Option<u8> {
+        let seams = self.active_seams.lock().await;
+        seams.last().map(|s| s.level)
+    }
+
+    /// Clear seam tracking (called on hard cycle reset).
+    pub async fn reset(&self) {
+        self.active_seams.lock().await.clear();
+    }
+}
+
+/// Truncate a string to max_chars, respecting Unicode boundaries.
+fn truncate_chars(text: &str, max_chars: usize) -> String {
+    if max_chars == 0 {
+        return String::new();
+    }
+    if text.chars().count() <= max_chars {
+        return text.to_string();
+    }
+    text.chars().take(max_chars).collect()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn seam_levels_fire_in_order() {
+        // Cannot create DeepSeekClient without API key in test env.
+        // Test the pure logic functions only.
+        let config = SeamConfig::default();
+
+        // Test seam_level_for logic manually.
+        // Below L1
+        assert!(config.enabled && 100_000 < config.l1_threshold);
+        // At L1
+        assert!(192_000 >= config.l1_threshold);
+        // At L2
+        assert!(384_000 >= config.l2_threshold);
+        // At L3
+        assert!(576_000 >= config.l3_threshold);
+    }
+
+    #[test]
+    fn cycle_threshold_check() {
+        let config = SeamConfig::default();
+        assert!(768_000 >= config.cycle_threshold);
+        assert!(!(700_000 >= config.cycle_threshold));
+    }
+
+    #[test]
+    fn verbatim_window_calculation() {
+        let config = SeamConfig {
+            verbatim_window_turns: 4,
+            ..Default::default()
+        };
+        // 4 verbatim turns = 8 messages
+        // 20 messages: 20 - (4*2) = 12
+        assert_eq!(20usize.saturating_sub(8), 12);
+        // 8 messages: 8 - 8 = 0
+        assert_eq!(8usize.saturating_sub(8), 0);
+        // 4 messages: 4 - 4 = 0
+        assert_eq!(4usize.saturating_sub(4), 0);
+
+        let _ = config;
+    }
+
+    #[test]
+    fn truncate_chars_handles_unicode() {
+        assert_eq!(truncate_chars("abc😀é", 3), "abc".to_string());
+        assert_eq!(truncate_chars("abc😀é", 4), "abc😀".to_string());
+        assert_eq!(truncate_chars("abc😀é", 10), "abc😀é".to_string());
+        assert_eq!(truncate_chars("", 5), "".to_string());
+    }
+
+    #[test]
+    fn disabled_config() {
+        let config = SeamConfig {
+            enabled: false,
+            ..Default::default()
+        };
+        assert!(!config.enabled);
+    }
+}