diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1faf2f4d..aeb0e1a7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,91 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.8.34] - 2026-05-13
+
+A polish, terminal-protocol, and internal-cleanup release. The model-facing
+surface is stable; this cycle focused on prefix-cache stability metrics,
+broader terminal protocol coverage, bundled skills, and shrinking the
+mega-files that had grown around the agent loop and TUI.
+
+### Added
+
+- **Prefix-cache stability tracking.** A footer chip surfaces how stable
+  the cached prefix has been across recent turns (inspired by Reasonix),
+  so users can spot cache-busting edits before cost climbs.
+- **Bundled DeepSeek-native workflow skills.** A starter set of skills
+  ships in-binary so a fresh install has a usable `/skills` catalog
+  without external assets.
+- **Native Kitty + Ghostty notification protocols.** `OSC 99` (Kitty)
+  and `OSC 777` (Ghostty) are now first-class alongside the existing
+  desktop notification fallback.
+- **Theme picker with more presets.** Catppuccin, Tokyo Night, Dracula,
+  and Gruvbox join the built-in palette set; `/theme` now shows a
+  live picker.
+- **Chunked parallel-safe tool execution.** The engine batches
+  side-effect-free tool calls into a chunked dispatch so independent
+  reads/searches finish in one turn instead of serialising round-trip
+  by round-trip.
+- **Cancel-all shell jobs.** A single action stops every running
+  background shell command instead of cancelling them one-by-one.
+- **Session title in composer border.** The top-right of the composer
+  shows the derived session title so the active thread is visible
+  without opening the sessions panel.
+
+### Changed
+
+- **`crates/tui/src/tui/ui.rs` split into focused modules.** The
+  former 10k-line single-file TUI dispatcher is decomposed into smaller
+  modules with clearer responsibilities so reviewing a UI change does
+  not require holding the entire surface in head.
+- **`crates/tui/src/core/engine.rs` reduced.** Helper clusters moved
+  into the existing `core/engine/` submodule directory next to the
+  turn loop and tool execution code, making the agent-loop core
+  easier to read end-to-end.
+- **Structured tracing on tool dispatch.** Tool entry, exit, duration,
+  and result/error are emitted through `tracing` spans so
+  `RUST_LOG=deepseek_cli::tools=debug` produces a coherent timeline
+  instead of scattered ad-hoc prints.
+- **`/init` updates `AGENTS.md` in place** instead of refusing when
+  the file already exists, so adding new project guidance does not
+  require manual stitching.
+- **Reasoning tokens included in cost calculations**, and the cost
+  display auto-switches to CNY when the session locale is `zh-Hans`.
+- **Stale repo-root development docs removed.** `TAKEOVER_PROMPT.md`
+  (v0.8.6 era), `PROMPT_ANALYSIS.md`, and the redundant
+  `DEPENDENCY_GRAPH.md` no longer ship in releases; `docs/ARCHITECTURE.md`
+  remains the canonical crate-layout reference.
+
+### Fixed
+
+- **Auth keys checked against the saved provider on startup**, so a
+  stored DeepSeek key is no longer rejected after switching providers
+  mid-session.
+- **Auto router skipped for decisive local routes**, removing an
+  extra model round-trip on prompts the dispatcher can route directly.
+- **Reasoning content stripped for generic providers** that do not
+  understand the `reasoning_content` field, preventing HTTP 400s when
+  pointing at an OpenAI-compatible gateway that lacks DeepSeek
+  thinking semantics.
+- **`FocusGained` debounced** so terminals (Tabby) that emit rapid
+  focus events no longer trigger a repaint flicker loop.
+- **MCP HTTP transport defaults `Accept: application/json,
+  text/event-stream`** and persists `Mcp-Session-Id` across requests,
+  matching the spec for resumable streams.
+- **Shell output tail preserved when truncating**, so the last lines
+  of a long command output (usually the error trailer) survive the
+  in-transcript summary.
+- **Prefix cache preserved while pruning tool results.** Old
+  side-effect tool payloads no longer invalidate the prefix that
+  the next turn would otherwise reuse.
+- **Review sub-agents prevented from spawning further sub-agents**
+  (#1489), keeping recursive depth bounded.
+- **Help overlay closes cleanly** and repaints without a stale frame.
+- **Pinyin `/skills` alias dispatched correctly** so Chinese-locale
+  users reach the same surface.
+- **VTE flicker terminals get reduced motion** by default to avoid
+  thrashing on terminals that mishandle frequent partial redraws.
+
 ## [0.8.33] - 2026-05-12
 
 A sub-agent and RLM renovation release. The model-facing delegation
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 5fb3b3d5..106d3b4f 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -140,7 +140,7 @@ crates/
 ```
 
 See [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md) for the live data flow across
-these crates and [DEPENDENCY_GRAPH.md](DEPENDENCY_GRAPH.md) for build ordering.
+these crates, including the bottom-up build order.
 
 ## Submitting Changes
 
diff --git a/Cargo.lock b/Cargo.lock
index 68fa2b46..cacb5036 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1160,7 +1160,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-agent"
-version = "0.8.33"
+version = "0.8.34"
 dependencies = [
  "deepseek-config",
  "serde",
@@ -1168,7 +1168,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-app-server"
-version = "0.8.33"
+version = "0.8.34"
 dependencies = [
  "anyhow",
  "axum",
@@ -1190,7 +1190,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-config"
-version = "0.8.33"
+version = "0.8.34"
 dependencies = [
  "anyhow",
  "deepseek-secrets",
@@ -1202,7 +1202,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-core"
-version = "0.8.33"
+version = "0.8.34"
 dependencies = [
  "anyhow",
  "chrono",
@@ -1220,7 +1220,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-execpolicy"
-version = "0.8.33"
+version = "0.8.34"
 dependencies = [
  "anyhow",
  "deepseek-protocol",
@@ -1229,7 +1229,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-hooks"
-version = "0.8.33"
+version = "0.8.34"
 dependencies = [
  "anyhow",
  "async-trait",
@@ -1243,7 +1243,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-mcp"
-version = "0.8.33"
+version = "0.8.34"
 dependencies = [
  "anyhow",
  "serde",
@@ -1252,7 +1252,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-protocol"
-version = "0.8.33"
+version = "0.8.34"
 dependencies = [
  "serde",
  "serde_json",
@@ -1260,7 +1260,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-secrets"
-version = "0.8.33"
+version = "0.8.34"
 dependencies = [
  "dirs",
  "keyring",
@@ -1273,7 +1273,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-state"
-version = "0.8.33"
+version = "0.8.34"
 dependencies = [
  "anyhow",
  "chrono",
@@ -1285,7 +1285,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-tools"
-version = "0.8.33"
+version = "0.8.34"
 dependencies = [
  "anyhow",
  "async-trait",
@@ -1298,7 +1298,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-tui"
-version = "0.8.33"
+version = "0.8.34"
 dependencies = [
  "anyhow",
  "arboard",
@@ -1361,7 +1361,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-tui-cli"
-version = "0.8.33"
+version = "0.8.34"
 dependencies = [
  "anyhow",
  "chrono",
@@ -1386,7 +1386,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-tui-core"
-version = "0.8.33"
+version = "0.8.34"
 
 [[package]]
 name = "deltae"
diff --git a/Cargo.toml b/Cargo.toml
index 13632448..c1e3e08a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -19,7 +19,7 @@ default-members = ["crates/cli", "crates/app-server", "crates/tui"]
 resolver = "2"
 
 [workspace.package]
-version = "0.8.33"
+version = "0.8.34"
 edition = "2024"
 # Rust 1.88 stabilized `let_chains` in `if`/`while` conditions, which the
 # codebase relies on extensively. Cargo enforces this so users on older
diff --git a/DEPENDENCY_GRAPH.md b/DEPENDENCY_GRAPH.md
deleted file mode 100644
index 823d4dfe..00000000
--- a/DEPENDENCY_GRAPH.md
+++ /dev/null
@@ -1,65 +0,0 @@
-# Dependency Graph
-
-## Crate Dependencies (from Cargo.toml)
-
-```
-deepseek-tui (binary: `deepseek-tui`)
-  (no workspace deps — monolith source under crates/tui/src/)
-
-deepseek-tui-cli (binary: `deepseek`)
-  <- deepseek-agent
-  <- deepseek-app-server
-  <- deepseek-config
-  <- deepseek-execpolicy
-  <- deepseek-mcp
-  <- deepseek-state
-
-deepseek-app-server
-  <- deepseek-agent
-  <- deepseek-config
-  <- deepseek-core
-  <- deepseek-execpolicy
-  <- deepseek-hooks
-  <- deepseek-mcp
-  <- deepseek-protocol
-  <- deepseek-state
-  <- deepseek-tools
-
-deepseek-core (agent loop)
-  <- deepseek-agent
-  <- deepseek-config
-  <- deepseek-execpolicy
-  <- deepseek-hooks
-  <- deepseek-mcp
-  <- deepseek-protocol
-  <- deepseek-state
-  <- deepseek-tools
-
-deepseek-tools      <- deepseek-protocol
-deepseek-mcp        <- deepseek-protocol
-deepseek-hooks      <- deepseek-protocol
-deepseek-execpolicy <- deepseek-protocol
-deepseek-agent      <- deepseek-config
-
-deepseek-config     (leaf — no internal deps)
-deepseek-protocol   (leaf — no internal deps)
-deepseek-state      (leaf — no internal deps)
-deepseek-tui-core   (leaf — no internal deps)
-```
-
-Note: `deepseek-tui` has zero workspace deps because it still compiles the
-monolith source tree (`crates/tui/src/main.rs`). The crate split is
-structural — source migration into individual workspace crates is
-incremental.
-
-## Build Order (bottom-up)
-
-```
-Layer 0 (leaves):  deepseek-protocol, deepseek-config, deepseek-state, deepseek-tui-core
-Layer 1:           deepseek-tools, deepseek-mcp, deepseek-hooks, deepseek-execpolicy
-Layer 2:           deepseek-agent
-Layer 3:           deepseek-core
-Layer 4:           deepseek-app-server, deepseek-tui
-Layer 5:           deepseek-tui-cli
-```
-
diff --git a/PROMPT_ANALYSIS.md b/PROMPT_ANALYSIS.md
deleted file mode 100644
index 675c97d8..00000000
--- a/PROMPT_ANALYSIS.md
+++ /dev/null
@@ -1,312 +0,0 @@
-# System Prompt Analysis — "Mismanaged Genius" Hypothesis
-
-## Methodology
-
-Read every prompt layer (`base.md`, mode overlays, personality, approval policies),
-traced the assembly logic in `prompts.rs`, and compared against what DeepSeek V4 can
-actually do vs what the prompt currently encourages.
-
----
-
-## Summary: The Prompt Is Cautious, Not Strategic
-
-The current prompt has excellent safety rails — clear "when NOT to use" guidance,
-anti-hallucination instructions, and decomposition philosophy. But it treats the
-model's most powerful capabilities (RLM, sub-agents, parallel tool execution) as
-**specialty escape hatches** rather than **default strategic tools**. The result:
-a capable model that hesitates to parallelize, underuses its fan-out abilities, and
-serializes work that could be done concurrently.
-
-The prompt was written when the model was less reliable and needed guardrails. V4
-models can handle more autonomy — the prompt should reflect that.
-
----
-
-## Gap-by-Gap Analysis
-
-### Gap 1: RLM Is Framed as a Last Resort, Not a Strategic Tool
-
-**Current text** (`base.md`, "RLM Is a Specialty Tool"):
-> `rlm` is for one specific shape of work: a long input that genuinely does not fit
-> in your context. Reach for it ONLY when direct reasoning over the input is impossible
-> because of its size.
-
-**Problem**: RLM is actually three tools in one:
-1. Chunk-and-process for long inputs (the only case the prompt acknowledges)
-2. Parallel `llm_query_batched` for multi-angle analysis (e.g., "classify these 20 items")
-3. `rlm_query` for recursive decomposition of problems that benefit from sub-LLM critique
-
-The prompt actively discourages cases 2 and 3. A model that could classify 20 files in
-parallel instead reads them one at a time. A model that could get a "second opinion" on
-its reasoning from a sub-LLM instead trusts its first pass.
-
-**Suggested rewrite** — replace the restrictive framing with a capability guide:
-
-```
-## RLM — When to Use It
-
-RLM loads input into a Python REPL where you write code that calls sub-LLM helpers
-(`llm_query`, `llm_query_batched`, `rlm_query`). Three patterns, not one:
-
-**CHUNK** — A single input that genuinely doesn't fit in your context window (a whole file
-> 50K tokens, a long transcript, a multi-document corpus). Split it, process each chunk,
-synthesize.
-
-**BATCH** — Many independent items that each need LLM attention (classify 20 entries,
-extract fields from 30 documents, score 15 candidates). Use `llm_query_batched` for
-parallel execution — it fans out to the same DeepSeek client and finishes in one turn
-what would take 15 sequential reads.
-
-**RECURSE** — A problem that benefits from decomposition + critique. Use `rlm_query` to
-have a sub-LLM review your reasoning, identify gaps, or explore alternative approaches.
-The sub-LLM returns a synthesized answer you verify against live tool output.
-
-**When NOT to use RLM**: a single short file you can read directly; a simple
-classification on 3 items; interactive iterative exploration (RLM is one-shot batch).
-For those, `read_file`, `grep_files`, or `agent_spawn` are faster and cheaper.
-```
-
-### Gap 2: Sub-Agents Are "Implementation, Not Exploration"
-
-**Current text** (`base.md`, "When NOT to use `agent_spawn`"):
-> You haven't first laid out a plan with `checklist_write`. Sub-agents are
-> implementation, not exploration.
-
-**Problem**: This directly contradicts the Plan mode prompt, which correctly says
-"Spawn read-only sub-agents for parallel investigation." But the Agent mode prompt
-gets the restrictive version. The result: in Agent mode (where most work happens),
-the model treats sub-agents as a last step ("now implement the plan") rather than a
-discovery tool ("investigate these 4 things in parallel to understand the problem").
-
-**Reality**: Sub-agents are the BEST tool for parallel exploration. A single
-`agent_spawn` call that fans out to 3 read-only children investigating different
-modules is faster AND more thorough than reading them sequentially.
-
-**Suggested rewrite** — move sub-agent guidance from "when NOT to use" to a positive
-section:
-
-```
-## Sub-Agent Strategy
-
-Sub-agents are cheap — DeepSeek V4 Flash costs $0.14/M input. Use them liberally for
-parallel work:
-
-- **Parallel investigation**: When you need to understand 3+ independent files or
-  modules, spawn one read-only sub-agent per target. They run concurrently and return
-  structured findings you synthesize.
-
-- **Parallel implementation**: After a plan is laid out (`checklist_write` +
-  `update_plan`), spawn one sub-agent per independent leaf task. Each does one
-  thing well; you integrate results.
-
-- **Solo tasks**: A single read, a single search, a focused question — do these
-  yourself. Spawning has overhead; one-turn reads are faster direct.
-
-- **Sequential work**: If step B depends on step A's output, run A yourself, then
-  decide whether to spawn B based on what A found.
-```
-
-### Gap 3: No "Batch Everything" Instinct
-
-**Current text** (`base.md`, "Your V4 Characteristics"):
-> **Parallel execution.** Batch independent reads, searches, and greps into a single
-> turn. Never serialize operations that can run concurrently — parallel tool calls
-> share the same turn and finish faster.
-
-**Problem**: This instruction is correct but buried in a V4 Characteristics section
-the model may not internalize as a behavioral rule. The model often fires one tool,
-waits for the result, then fires another — even when both are independent.
-
-**Suggested addition** — add a concrete heuristic at the top of the toolbox section:
-
-```
-## Parallel-First Heuristic
-
-Before you fire any tool, scan your plan: is there another tool you could run
-concurrently? If two operations don't depend on each other, batch them. Examples:
-
-- Reading 3 files → 3 `read_file` calls in one turn
-- Searching for 2 patterns → 2 `grep_files` calls in one turn
-- Checking git status AND reading a config → `git_status` + `read_file` in one turn
-
-The dispatcher runs parallel tool calls simultaneously. Serializing independent
-operations wastes the user's time and your context budget.
-```
-
-### Gap 4: Thinking Budget Too Conservative for V4
-
-**Current text** (`base.md`, "Thinking Budget"):
-| Task type | Thinking depth | Rationale |
-|-----------|---------------|-----------|
-| Simple factual lookup | Skip | Answer is immediate |
-| Code generation (single function) | Light | Pattern-matching |
-
-**Problem**: V4 models have 1M context and produce thinking tokens that improve
-output quality even for "simple" tasks. Skipping thinking on a factual lookup is
-correct. But "Light" for code generation understates the value of thinking — a
-30-second think before writing a function catches edge cases, checks against
-project conventions, and prevents rework.
-
-**Suggested rewrite** — bump the defaults up one tier:
-
-| Task type | Thinking depth | Rationale |
-|-----------|---------------|-----------|
-| Simple factual lookup (read, search) | Skip | Answer is immediate |
-| Tool output interpretation | Light | Verify result matches intent |
-| Code generation (single function) | Medium | Conventions, edge cases, context fit |
-| Multi-file refactor | Medium | Cross-file dependencies |
-| Debugging (error to root cause) | Deep | Hypothesis generation |
-| Architecture design | Deep | Trade-offs, constraints |
-| Security review | Deep | Adversarial reasoning |
-
-### Gap 5: No "Verify Before Claiming" Pattern
-
-**Current state**: The subagent output format (`subagent_output_format.md`) has an
-EVIDENCE section that requires concrete artifact citations. This is excellent. But
-the main prompt (`base.md`) doesn't establish this as a general habit.
-
-**Problem**: The model sometimes reads a file, then writes a patch based on its
-memory of the file rather than re-reading the specific lines it's changing. Or it
-claims a shell command succeeded based on exit code 0 without checking the output.
-
-**Suggested addition** — add to the "Decomposition Philosophy" section:
-
-```
-## Verification Principle
-
-After every tool call that produces a result you'll act on, verify before
-proceeding:
-- File reads: confirm the line numbers you're about to patch are what you think
-- Shell commands: check stdout, not just exit code
-- Search results: confirm the match is what you expected
-- Sub-agent results: cross-check one finding against a direct `read_file`
-
-Don't claim a change worked until you've observed evidence. Don't trust memory
-over live tool output.
-```
-
-### Gap 6: No Composition Heuristic for Complex Work
-
-**Current state**: The prompt says "For complex initiatives, layer `update_plan`
-above `checklist_write`." This is correct but vague. The model sometimes creates
-a plan, creates a checklist, and then works through the checklist without
-re-evaluating the plan.
-
-**Suggested addition**:
-
-```
-## Composition Pattern for Multi-Step Work
-
-For any task estimated to take 5+ steps:
-
-1. `update_plan` — 3-6 high-level phases (status: pending)
-2. `checklist_write` — concrete leaf tasks under the first phase (mark first
-   `in_progress`)
-3. Execute phase 1, updating checklist as you go
-4. After each phase completes, re-read your plan: does phase 2 still make sense?
-   Update the plan if new information changes the approach.
-5. When a phase reveals sub-problems, add them to the checklist or spawn
-   investigation sub-agents — don't guess.
-```
-
-### Gap 7: Approval Mode Contradiction
-
-**Current state**: The Agent mode approval policy says "Any write, patch, shell
-execution, sub-agent spawn, or CSV batch operation will ask for approval first."
-But the "Key principle" says "make your work visible" and encourages
-`checklist_write` to populate the sidebar.
-
-**Problem**: In Agent mode, the model often waits for approval on EACH step
-individually. A batch of 3 `edit_file` calls requires 3 separate approval rounds.
-The prompt should encourage batching approvals: present the full plan, get
-approval once, then execute all writes in parallel.
-
-**Suggested addition** — add to the Agent mode overlay:
-
-```
-## Efficient Approvals
-
-When your plan includes multiple writes, present them together:
-1. Show `checklist_write` with all write steps listed
-2. Request approval for the batch ("I need to make 3 edits across 2 files...")
-3. Once approved, execute all writes in one turn (parallel `edit_file` /
-   `apply_patch` calls)
-
-Don't sequence approvals one at a time. The user wants context, not interruption.
-```
-
----
-
-## Concrete Prompt Changes
-
-### 1. `base.md` — Replace "RLM Is a Specialty Tool" section
-
-Remove the current restrictive "RLM Is a Specialty Tool" section entirely.
-Replace with the "RLM — When to Use It" section from Gap 1 above.
-
-### 2. `base.md` — Replace "When NOT to use `agent_spawn`"
-
-Remove the bullet about sub-agents from the "When NOT to use" section.
-Move it to a new positive "Sub-Agent Strategy" section (Gap 2 above) placed
-immediately after the "Decomposition Philosophy" section.
-
-### 3. `base.md` — Add "Parallel-First Heuristic"
-
-Insert after the toolbox reference section, before "When NOT to use."
-(Gap 3 above.)
-
-### 4. `base.md` — Bump thinking budget defaults
-
-Change the "Code generation (single function)" row from Light → Medium.
-(Gap 4 above.) Single-line change.
-
-### 5. `base.md` — Add "Verification Principle"
-
-Insert as a sub-heading under "Decomposition Philosophy."
-(Gap 5 above.)
-
-### 6. `base.md` — Add "Composition Pattern"
-
-Insert as a sub-heading under "Decomposition Philosophy," after
-"Verification Principle."
-(Gap 6 above.)
-
-### 7. `modes/agent.md` — Add "Efficient Approvals"
-
-Insert at the end of the Agent mode overlay.
-(Gap 7 above.)
-
----
-
-## What NOT to Change
-
-- **"When NOT to use `exec_shell`"** — this guidance is correct and important.
-  Typed tools beat shell-outs for reliability.
-- **"When NOT to use `edit_file` / `apply_patch`"** — tool selection rules are
-  good and prevent blind patching.
-- **Preamble rhythm** — the tone guidance is well-calibrated.
-- **Output formatting** — terminal constraints are real; the guidance is correct.
-- **Context management** — the ~80% compaction suggestion is practical.
-- **Sub-agent sentinel protocol** — the integration pattern is well-defined.
-
----
-
-## Risk Assessment
-
-**Risk: Over-parallelization**. A model told to "batch everything" might spawn
-sub-agents for trivial reads. Mitigation: the "Solo tasks" bullet in the new
-sub-agent strategy section explicitly says "do these yourself."
-
-**Risk: Over-thinking**. Bumping the thinking budget might waste tokens on
-simple code generation. Mitigation: "Medium" for single-function generation is
-still conservative; the model can self-regulate with the existing guidance
-"skip for lookups."
-
-**Risk: RLM over-use**. Framing RLM as a strategic tool might cause inappropriate
-use for tasks better served by `agent_spawn`. Mitigation: the new "When NOT to
-use RLM" bullet covers the common failure modes.
-
-**Risk: Cache busting**. Adding text to the system prompt changes its byte
-representation, which busts the prefix cache for the first turn after the change.
-Mitigation: this is a one-time cost; subsequent turns hit the cache at the new
-prompt boundary.
diff --git a/TAKEOVER_PROMPT.md b/TAKEOVER_PROMPT.md
deleted file mode 100644
index cf7d211b..00000000
--- a/TAKEOVER_PROMPT.md
+++ /dev/null
@@ -1,212 +0,0 @@
-# v0.8.6 Takeover Prompt — Fresh DeepSeek V4 Session
-
-You are taking over the v0.8.6 sprint for `github.com/Hmbown/DeepSeek-TUI`.
-A previous DeepSeek session kept getting interrupted because the parent session
-grew too large during long-running work. The user has now pruned local saved
-sessions, but that is only temporary relief. Your job is to stabilize the branch
-and fix the product so long-running agent work survives by default.
-
-## Prime Directive
-
-Do not run this as one long sequential parent session.
-
-The parent session is the coordinator. Use `agent_spawn` for tool-carrying work,
-use `rlm` for batch classification/synthesis over long issue lists or docs, and
-keep the parent transcript small. If you find yourself reading files one by one
-for the same topic, stop and delegate.
-
-## Immediate Emergency
-
-Start with #402:
-
-- `#402 P0: make long-running sessions survivable by default (Codex-style compaction + bounded transcript state)`
-
-This is now the top priority because it caused the interrupted handoff loop.
-The issue body names the exact gap versus `/Volumes/VIXinSSD/codex-main`:
-
-- DeepSeek TUI keeps unbounded `api_messages` and visible `history`.
-- `auto_compact = false` and the capacity controller is off by default.
-- saved sessions serialize full `messages: Vec<Message>` snapshots.
-- the important mocked engine tests for compaction/subagents/parallel execution
-  are still ignored because the engine takes a concrete `DeepSeekClient`.
-- Codex has runtime pre/mid-turn compaction, replacement history, persisted
-  compacted rollout items, and sanitized/last-N subagent fork behavior.
-
-Do not treat this as docs or prompt tuning. Implement runtime guardrails.
-
-## Current Branch State To Verify
-
-Branch should be `feat/v0.8.6`. The prior interrupted session had dirty work.
-Verify before trusting any claim:
-
-1. `git status --short --branch`
-2. `cargo check --workspace --all-targets --locked`
-3. `cargo test --workspace --all-features --locked` if check passes
-4. read `AGENTS.md`, `V086_BRIEF.md`, `docs/ARCHITECTURE.md`, and issue #402
-
-Known partial work from the interrupted session:
-
-- Goal mode command dispatch (`/goal`) — inspect `crates/tui/src/commands/goal.rs`
-- File tree pane — inspect `crates/tui/src/tui/file_tree.rs`
-- user-defined command plumbing — inspect `crates/tui/src/commands/user_commands.rs`
-- localization/sidebar/rendering changes across `crates/tui/src/*`
-
-Do not overwrite unrelated dirty files. Work with the existing changes.
-
-## Updated v0.8.6 Issue Set
-
-The original brief said 23 issues, but the live v0.8.6 label now includes more.
-Refresh live state with:
-
-```bash
-gh issue list --label v0.8.6 --state open --limit 100 --json number,title,body,labels
-```
-
-New or especially relevant additions:
-
-- `#402` P0 long-running session survivability: runtime compaction, bounded transcript/session persistence.
-- `#401` prune overly defensive assertions: remove brittle prompt-substring/snapshot-style tests.
-- `#400` chat/sidebar text bleed-through: timestamp fragments persist across cells when scrolling.
-- `#399` lag/freeze audit: sync git on UI thread, unbounded history Vec, file-tree blocking walk.
-- `#398` codex-mcp parity: agent-style MCP server tool plus `deepseek mcp add/list/get/remove`.
-
-Existing high-priority v0.8.6 issues still include:
-
-- `#397` Goal mode
-- `#396` per-turn cache hit chip
-- `#395` cycle-boundary visualization
-- `#394` file-tree pane
-- `#393` share session URL
-- `#392` `/model auto`
-- `#391` user-defined slash commands
-- `#390` profile hot-switch
-- `#389` inline LSP diagnostics
-- `#388` crash-recovery prompt
-- `#387` self-update
-- `#386` `/init`
-- `#385` `/diff`
-- `#384` `/undo`
-- `#383` `/edit`
-- `#382` collapse Steer/Queue/Immediate
-- `#380` inline diff highlighting
-- `#379` smart clipboard
-- `#378` docs polish
-- `#377` shrink App state
-- `#376` native-copy escape
-- `#375` right-click context menu
-- `#374` clickable file:line
-- `#373` Tasks panel ignores shell jobs
-
-## First-Hour Execution Plan
-
-Do this as a fanout, not a serial survey.
-
-1. Parent: create a checklist with lanes below, then run one batched read/status
-   turn: `git status`, `gh issue list --label v0.8.6`, focused `rg` for
-   compaction/session/history/capacity, and the initial cargo check.
-
-2. Spawn sub-agent A: #402 runtime/session survivability.
-   Ownership: `crates/tui/src/core/engine.rs`, `crates/tui/src/compaction.rs`,
-   `crates/tui/src/session_manager.rs`, `crates/tui/src/tui/app.rs`,
-   `crates/tui/tests/integration_mock_llm.rs`, and relevant config docs.
-   Task: design and implement the smallest runtime guardrail slice that bounds
-   parent model history/session persistence and unblocks real integration tests.
-
-3. Spawn sub-agent B: current dirty-tree compile repair.
-   Ownership: partial v0.8.6 files from the interrupted session:
-   `commands/goal.rs`, `commands/user_commands.rs`, `tui/file_tree.rs`,
-   `commands/mod.rs`, `localization.rs`, `tui/sidebar.rs`, `tui/ui.rs`.
-   Task: make the branch compile without widening scope.
-
-4. Spawn sub-agent C: UI performance/bleed-through lane (#399/#400/#394).
-   Ownership: transcript rendering/cache, sidebar rendering, file-tree traversal.
-   Task: fix the regression and identify any blocking synchronous UI work.
-
-5. Spawn sub-agent D: issue/test hygiene lane (#401 plus ignored mock tests).
-   Ownership: brittle tests, prompt snapshot tests, and ignored integration tests.
-   Task: remove brittle assertions where appropriate and convert #402 acceptance
-   criteria into real tests.
-
-6. Spawn sub-agent E only if needed: MCP parity (#398) or command surface
-   follow-through (#391/#397). Keep it separate from #402 so the P0 fix is not
-   tangled with feature work.
-
-## RLM Usage
-
-Use `rlm` when the input is large enough that pasting/reading it in the parent
-would bloat the session. Good RLM tasks here:
-
-- classify all live `v0.8.6` issue bodies into independent implementation lanes;
-- compare #402 against Codex files by giving RLM extracted snippets from both
-  repos and asking for a bounded acceptance checklist;
-- batch-review a long test list for brittle assertions related to #401;
-- summarize long cargo/clippy output into file-owned fix clusters.
-
-Inside RLM, use `llm_query_batched()` for independent classifications and
-`rlm_query()` only for recursive critique/decomposition. The parent should get
-the final synthesis, not every intermediate chunk.
-
-## Session Survival Rules
-
-- Keep at most 5 sub-agents running.
-- After spawning agents, keep doing non-overlapping local coordination work.
-- Use `agent_wait` only when blocked on results.
-- Use `agent_result` for completed agents and summarize results into the parent.
-- Suggest `/compact` at 60% context, but do not rely on that as the product fix.
-- If the parent reaches 3 sequential turns on the same topic, spawn or RLM it.
-- Do not paste full logs into the parent. Store logs as artifacts or ask RLM to
-  summarize them.
-
-## PR Workflow
-
-Use GitHub PRs as an extra review surface. Do not let a giant local branch pile
-up without outside checks.
-
-- Prefer small PRs by issue or tightly related lane: #402 can be its own PR,
-  compile-repair can be its own PR, UI performance/regression fixes can be their
-  own PR, and command-surface features can be separate.
-- Push work branches and open PRs early once each slice compiles and has focused
-  tests. Include `Closes #...` only when the PR actually satisfies the issue.
-- Let CI and any GitHub AI/code-review agents inspect the code. Treat review
-  comments as real work: address them with follow-up commits rather than
-  hand-waving them away.
-- When a PR comes back clean, merge it into the target branch and continue from
-  the updated branch. When it comes back with requested fixes, make the fixes,
-  rerun the relevant gates, and wait for the updated checks before merging.
-- Keep the parent session tracking PR state with `gh pr view`, `gh pr checks`,
-  and `gh issue view`; do not manually close issues unless acceptance is
-  verified and the merge did not close them automatically.
-
-## Verification Gates
-
-Before claiming anything is done:
-
-```bash
-cargo fmt --all -- --check
-cargo check --workspace --all-targets --locked
-cargo test --workspace --all-features --locked
-cargo clippy --workspace --all-targets --all-features --locked -- -D warnings
-```
-
-For #402 specifically, also add or enable focused tests proving:
-
-- compaction/cycle guardrail runs before dangerous context growth;
-- live `api_messages` or equivalent model history is bounded after compaction;
-- visible transcript/session persistence is bounded or virtualized;
-- sub-agent result ingestion into the parent is summarized/bounded;
-- child fork history can use sanitized last-N behavior;
-- session save/checkpoint does not rewrite arbitrary huge full transcripts.
-
-## Final Report Format
-
-Use these headings:
-
-- Implemented
-- Verified
-- Issues safe to close
-- Issues still open and why
-- Commands run
-- Residual risks
-
-Be explicit about what is local-only, what is committed, what is pushed, and what
-is merely planned. Do not close issues unless acceptance criteria are verified.
diff --git a/crates/agent/Cargo.toml b/crates/agent/Cargo.toml
index 69145890..5deb9e28 100644
--- a/crates/agent/Cargo.toml
+++ b/crates/agent/Cargo.toml
@@ -7,5 +7,5 @@ repository.workspace = true
 description = "Model/provider registry and fallback strategy for DeepSeek workspace architecture"
 
 [dependencies]
-deepseek-config = { path = "../config", version = "0.8.33" }
+deepseek-config = { path = "../config", version = "0.8.34" }
 serde.workspace = true
diff --git a/crates/app-server/Cargo.toml b/crates/app-server/Cargo.toml
index e9f2dd62..33cbed9f 100644
--- a/crates/app-server/Cargo.toml
+++ b/crates/app-server/Cargo.toml
@@ -10,15 +10,15 @@ description = "Codex-style app-server transport for DeepSeek workspace architect
 anyhow.workspace = true
 axum.workspace = true
 clap.workspace = true
-deepseek-agent = { path = "../agent", version = "0.8.33" }
-deepseek-config = { path = "../config", version = "0.8.33" }
-deepseek-core = { path = "../core", version = "0.8.33" }
-deepseek-execpolicy = { path = "../execpolicy", version = "0.8.33" }
-deepseek-hooks = { path = "../hooks", version = "0.8.33" }
-deepseek-mcp = { path = "../mcp", version = "0.8.33" }
-deepseek-protocol = { path = "../protocol", version = "0.8.33" }
-deepseek-state = { path = "../state", version = "0.8.33" }
-deepseek-tools = { path = "../tools", version = "0.8.33" }
+deepseek-agent = { path = "../agent", version = "0.8.34" }
+deepseek-config = { path = "../config", version = "0.8.34" }
+deepseek-core = { path = "../core", version = "0.8.34" }
+deepseek-execpolicy = { path = "../execpolicy", version = "0.8.34" }
+deepseek-hooks = { path = "../hooks", version = "0.8.34" }
+deepseek-mcp = { path = "../mcp", version = "0.8.34" }
+deepseek-protocol = { path = "../protocol", version = "0.8.34" }
+deepseek-state = { path = "../state", version = "0.8.34" }
+deepseek-tools = { path = "../tools", version = "0.8.34" }
 serde.workspace = true
 serde_json.workspace = true
 tokio.workspace = true
diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml
index 53d59366..fa6cb1fc 100644
--- a/crates/cli/Cargo.toml
+++ b/crates/cli/Cargo.toml
@@ -14,13 +14,13 @@ path = "src/main.rs"
 anyhow.workspace = true
 clap.workspace = true
 clap_complete.workspace = true
-deepseek-agent = { path = "../agent", version = "0.8.33" }
-deepseek-app-server = { path = "../app-server", version = "0.8.33" }
-deepseek-config = { path = "../config", version = "0.8.33" }
-deepseek-execpolicy = { path = "../execpolicy", version = "0.8.33" }
-deepseek-mcp = { path = "../mcp", version = "0.8.33" }
-deepseek-secrets = { path = "../secrets", version = "0.8.33" }
-deepseek-state = { path = "../state", version = "0.8.33" }
+deepseek-agent = { path = "../agent", version = "0.8.34" }
+deepseek-app-server = { path = "../app-server", version = "0.8.34" }
+deepseek-config = { path = "../config", version = "0.8.34" }
+deepseek-execpolicy = { path = "../execpolicy", version = "0.8.34" }
+deepseek-mcp = { path = "../mcp", version = "0.8.34" }
+deepseek-secrets = { path = "../secrets", version = "0.8.34" }
+deepseek-state = { path = "../state", version = "0.8.34" }
 chrono.workspace = true
 dirs.workspace = true
 serde.workspace = true
diff --git a/crates/config/Cargo.toml b/crates/config/Cargo.toml
index c8793743..148db37c 100644
--- a/crates/config/Cargo.toml
+++ b/crates/config/Cargo.toml
@@ -8,7 +8,7 @@ description = "Config schema and precedence model for DeepSeek workspace archite
 
 [dependencies]
 anyhow.workspace = true
-deepseek-secrets = { path = "../secrets", version = "0.8.33" }
+deepseek-secrets = { path = "../secrets", version = "0.8.34" }
 dirs.workspace = true
 serde.workspace = true
 toml.workspace = true
diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml
index 50f2598c..b70ee484 100644
--- a/crates/core/Cargo.toml
+++ b/crates/core/Cargo.toml
@@ -9,13 +9,13 @@ description = "Core runtime boundaries for DeepSeek workspace architecture"
 [dependencies]
 anyhow.workspace = true
 chrono.workspace = true
-deepseek-agent = { path = "../agent", version = "0.8.33" }
-deepseek-config = { path = "../config", version = "0.8.33" }
-deepseek-execpolicy = { path = "../execpolicy", version = "0.8.33" }
-deepseek-hooks = { path = "../hooks", version = "0.8.33" }
-deepseek-mcp = { path = "../mcp", version = "0.8.33" }
-deepseek-protocol = { path = "../protocol", version = "0.8.33" }
-deepseek-state = { path = "../state", version = "0.8.33" }
-deepseek-tools = { path = "../tools", version = "0.8.33" }
+deepseek-agent = { path = "../agent", version = "0.8.34" }
+deepseek-config = { path = "../config", version = "0.8.34" }
+deepseek-execpolicy = { path = "../execpolicy", version = "0.8.34" }
+deepseek-hooks = { path = "../hooks", version = "0.8.34" }
+deepseek-mcp = { path = "../mcp", version = "0.8.34" }
+deepseek-protocol = { path = "../protocol", version = "0.8.34" }
+deepseek-state = { path = "../state", version = "0.8.34" }
+deepseek-tools = { path = "../tools", version = "0.8.34" }
 serde_json.workspace = true
 uuid.workspace = true
diff --git a/crates/execpolicy/Cargo.toml b/crates/execpolicy/Cargo.toml
index 51f4b341..691f8c03 100644
--- a/crates/execpolicy/Cargo.toml
+++ b/crates/execpolicy/Cargo.toml
@@ -8,5 +8,5 @@ description = "Execution policy and approval model parity for DeepSeek workspace
 
 [dependencies]
 anyhow.workspace = true
-deepseek-protocol = { path = "../protocol", version = "0.8.33" }
+deepseek-protocol = { path = "../protocol", version = "0.8.34" }
 serde.workspace = true
diff --git a/crates/hooks/Cargo.toml b/crates/hooks/Cargo.toml
index 855b2c30..901f71ff 100644
--- a/crates/hooks/Cargo.toml
+++ b/crates/hooks/Cargo.toml
@@ -10,7 +10,7 @@ description = "Hook dispatch and notifications parity for DeepSeek workspace arc
 anyhow.workspace = true
 async-trait.workspace = true
 chrono.workspace = true
-deepseek-protocol = { path = "../protocol", version = "0.8.33" }
+deepseek-protocol = { path = "../protocol", version = "0.8.34" }
 reqwest.workspace = true
 serde.workspace = true
 serde_json.workspace = true
diff --git a/crates/tools/Cargo.toml b/crates/tools/Cargo.toml
index bb1c5306..07525aef 100644
--- a/crates/tools/Cargo.toml
+++ b/crates/tools/Cargo.toml
@@ -9,7 +9,7 @@ description = "Tool invocation lifecycle, schema validation, and scheduler paral
 [dependencies]
 anyhow.workspace = true
 async-trait.workspace = true
-deepseek-protocol = { path = "../protocol", version = "0.8.33" }
+deepseek-protocol = { path = "../protocol", version = "0.8.34" }
 serde.workspace = true
 serde_json.workspace = true
 tokio.workspace = true
diff --git a/crates/tui/Cargo.toml b/crates/tui/Cargo.toml
index 44873c5f..25c949ba 100644
--- a/crates/tui/Cargo.toml
+++ b/crates/tui/Cargo.toml
@@ -21,8 +21,8 @@ path = "src/main.rs"
 [dependencies]
 anyhow = "1.0.100"
 arboard = "3.4"
-deepseek-secrets = { path = "../secrets", version = "0.8.33" }
-deepseek-tools = { path = "../tools", version = "0.8.33" }
+deepseek-secrets = { path = "../secrets", version = "0.8.34" }
+deepseek-tools = { path = "../tools", version = "0.8.34" }
 schemaui = { version = "0.12.0", default-features = false, optional = true }
 async-stream = "0.3.6"
 async-trait = "0.1"
diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md
index 7cf0eb2b..7e7a4e4f 100644
--- a/docs/ARCHITECTURE.md
+++ b/docs/ARCHITECTURE.md
@@ -7,7 +7,7 @@ Current boundary note (v0.8.6):
 - Other workspace crates are being split out incrementally, but they are not yet the sole runtime source of truth.
 - The LSP subsystem (`crates/tui/src/lsp/`) is fully wired into the engine's post-tool-execution path
   (`core/engine/lsp_hooks.rs`), providing inline diagnostics after every edit_file/apply_patch/write_file.
-- The swarm agent system was removed in v0.8.5. The active v0.8.33 orchestration surface is persistent sub-agent sessions (`agent_open` / `agent_eval` / `agent_close`) and persistent RLM sessions (`rlm_open` / `rlm_eval` / `rlm_configure` / `rlm_close`).
+- The swarm agent system was removed in v0.8.5. The active v0.8.34 orchestration surface is persistent sub-agent sessions (`agent_open` / `agent_eval` / `agent_close`) and persistent RLM sessions (`rlm_open` / `rlm_eval` / `rlm_configure` / `rlm_close`).
   No model-visible swarm tool remains in the active codebase.
 
 ## High-Level Overview
diff --git a/docs/TOOL_SURFACE.md b/docs/TOOL_SURFACE.md
index 16365dce..9984519b 100644
--- a/docs/TOOL_SURFACE.md
+++ b/docs/TOOL_SURFACE.md
@@ -15,7 +15,7 @@ chosen over the available shell equivalent. Companion to `crates/tui/src/prompts
   for the same backing operation are a model trap — the LLM will alternate
   between them and the cache hit rate suffers.
 
-## Current surface (v0.8.33)
+## Current surface (v0.8.34)
 
 ### File operations
 
@@ -138,7 +138,7 @@ Large logs and command outputs should be artifacts with compact summaries in the
 
 ### Sub-agents
 
-v0.8.33 begins moving large tool outputs toward symbolic handles: tools return
+v0.8.33 began moving large tool outputs toward symbolic handles: tools return
 small `var_handle` objects, and `handle_read` retrieves bounded slices, counts,
 or JSON projections from the backing environment. This keeps the parent
 transcript small while preserving a recovery path to the full payload.
@@ -224,7 +224,7 @@ slots.
 
 ## Removed legacy aliases and surfaces
 
-v0.8.33 removes the old model-facing sub-agent fan-out surface from active
+v0.8.33 removed the old model-facing sub-agent fan-out surface from active
 prompting and tool catalogs. Do not use these names in new active guidance:
 `agent_spawn`, `agent_wait`, `agent_result`, `agent_send_input`,
 `agent_assign`, `agent_resume`, `agent_list`, `spawn_agent`,
@@ -269,7 +269,7 @@ rg -n '"handle_read"|"rlm_open"|"rlm_eval"|"rlm_configure"|"rlm_close"|"agent_op
 rg -n 'handle_read|rlm_open|rlm_eval|rlm_configure|rlm_close|agent_open|agent_eval|agent_close' docs crates/tui/src/prompts crates/tui/src/tools
 ```
 
-The canonical v0.8.33 live names are:
+The canonical v0.8.34 live names are:
 
 - `handle_read`
 - `rlm_open`, `rlm_eval`, `rlm_configure`, `rlm_close`
diff --git a/npm/deepseek-tui/package.json b/npm/deepseek-tui/package.json
index c1c1e679..6afbc722 100644
--- a/npm/deepseek-tui/package.json
+++ b/npm/deepseek-tui/package.json
@@ -1,7 +1,7 @@
 {
   "name": "deepseek-tui",
-  "version": "0.8.33",
-  "deepseekBinaryVersion": "0.8.33",
+  "version": "0.8.34",
+  "deepseekBinaryVersion": "0.8.34",
   "description": "Install and run deepseek and deepseek-tui binaries from GitHub release artifacts.",
   "author": "Hmbown",
   "license": "MIT",
diff --git a/web/lib/facts.generated.ts b/web/lib/facts.generated.ts
index c509eecf..4ff0eca1 100644
--- a/web/lib/facts.generated.ts
+++ b/web/lib/facts.generated.ts
@@ -19,7 +19,7 @@ export interface RepoFacts {
 
 export const FACTS: RepoFacts = {
   "generatedAt": "2026-05-12T22:56:03.599Z",
-  "version": "0.8.33",
+  "version": "0.8.34",
   "crates": [
     "agent",
     "app-server",