fix: relax v4 compaction and transcript scroll

2026-04-23 23:31:18 -05:00
parent 35595f8edc
commit ffa75f07e5
17 changed files with 307 additions and 75 deletions
@@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 ## [Unreleased]

+## [0.4.1] - 2026-04-24
+
+### Fixed
+- DeepSeek V4 tool-result context now preserves large file reads and command outputs instead of compacting noisy tools to a 900-character snippet after 2k characters.
+- Capacity guardrail refresh no longer performs destructive summary compaction unless the normal model-aware compaction thresholds are actually crossed.
+- V4 compaction summaries retain larger tool-result excerpts and summary input when compaction is genuinely needed.
+- The transcript now follows the bottom again when sending a new message, shows an in-app scrollbar when internally scrolled, and leaves mouse capture off in `--no-alt-screen` mode so terminal-native scrolling can work.
+
 ## [0.4.0] - 2026-04-23

 ### Added
@@ -806,7 +806,7 @@ dependencies = [

 [[package]]
 name = "deepseek-agent"
-version = "0.4.0"
+version = "0.4.1"
 dependencies = [
 "deepseek-config",
 "serde",
@@ -814,7 +814,7 @@ dependencies = [

 [[package]]
 name = "deepseek-app-server"
-version = "0.4.0"
+version = "0.4.1"
 dependencies = [
 "anyhow",
 "axum",
@@ -837,7 +837,7 @@ dependencies = [

 [[package]]
 name = "deepseek-config"
-version = "0.4.0"
+version = "0.4.1"
 dependencies = [
 "anyhow",
 "dirs",
@@ -848,7 +848,7 @@ dependencies = [

 [[package]]
 name = "deepseek-core"
-version = "0.4.0"
+version = "0.4.1"
 dependencies = [
 "anyhow",
 "chrono",
@@ -867,7 +867,7 @@ dependencies = [

 [[package]]
 name = "deepseek-execpolicy"
-version = "0.4.0"
+version = "0.4.1"
 dependencies = [
 "anyhow",
 "deepseek-protocol",
@@ -876,7 +876,7 @@ dependencies = [

 [[package]]
 name = "deepseek-hooks"
-version = "0.4.0"
+version = "0.4.1"
 dependencies = [
 "anyhow",
 "async-trait",
@@ -890,7 +890,7 @@ dependencies = [

 [[package]]
 name = "deepseek-mcp"
-version = "0.4.0"
+version = "0.4.1"
 dependencies = [
 "anyhow",
 "deepseek-protocol",
@@ -900,7 +900,7 @@ dependencies = [

 [[package]]
 name = "deepseek-protocol"
-version = "0.4.0"
+version = "0.4.1"
 dependencies = [
 "serde",
 "serde_json",
@@ -908,7 +908,7 @@ dependencies = [

 [[package]]
 name = "deepseek-state"
-version = "0.4.0"
+version = "0.4.1"
 dependencies = [
 "anyhow",
 "chrono",
@@ -920,7 +920,7 @@ dependencies = [

 [[package]]
 name = "deepseek-tools"
-version = "0.4.0"
+version = "0.4.1"
 dependencies = [
 "anyhow",
 "async-trait",
@@ -933,7 +933,7 @@ dependencies = [

 [[package]]
 name = "deepseek-tui"
-version = "0.4.0"
+version = "0.4.1"
 dependencies = [
 "anyhow",
 "arboard",
@@ -987,7 +987,7 @@ dependencies = [

 [[package]]
 name = "deepseek-tui-cli"
-version = "0.4.0"
+version = "0.4.1"
 dependencies = [
 "anyhow",
 "chrono",
@@ -1005,7 +1005,7 @@ dependencies = [

 [[package]]
 name = "deepseek-tui-core"
-version = "0.4.0"
+version = "0.4.1"

 [[package]]
 name = "deranged"
@@ -18,7 +18,7 @@ default-members = ["crates/cli", "crates/app-server", "crates/tui"]
 resolver = "2"

 [workspace.package]
-version = "0.4.0"
+version = "0.4.1"
 edition = "2024"
 license = "MIT"
 repository = "https://github.com/Hmbown/DeepSeek-TUI"
@@ -7,5 +7,5 @@ repository.workspace = true
 description = "Model/provider registry and fallback strategy for DeepSeek workspace architecture"

 [dependencies]
-deepseek-config = { path = "../config", version = "0.4.0" }
+deepseek-config = { path = "../config", version = "0.4.1" }
 serde.workspace = true
@@ -10,15 +10,15 @@ description = "Codex-style app-server transport for DeepSeek workspace architect
 anyhow.workspace = true
 axum.workspace = true
 clap.workspace = true
-deepseek-agent = { path = "../agent", version = "0.4.0" }
-deepseek-config = { path = "../config", version = "0.4.0" }
-deepseek-core = { path = "../core", version = "0.4.0" }
-deepseek-execpolicy = { path = "../execpolicy", version = "0.4.0" }
-deepseek-hooks = { path = "../hooks", version = "0.4.0" }
-deepseek-mcp = { path = "../mcp", version = "0.4.0" }
-deepseek-protocol = { path = "../protocol", version = "0.4.0" }
-deepseek-state = { path = "../state", version = "0.4.0" }
-deepseek-tools = { path = "../tools", version = "0.4.0" }
+deepseek-agent = { path = "../agent", version = "0.4.1" }
+deepseek-config = { path = "../config", version = "0.4.1" }
+deepseek-core = { path = "../core", version = "0.4.1" }
+deepseek-execpolicy = { path = "../execpolicy", version = "0.4.1" }
+deepseek-hooks = { path = "../hooks", version = "0.4.1" }
+deepseek-mcp = { path = "../mcp", version = "0.4.1" }
+deepseek-protocol = { path = "../protocol", version = "0.4.1" }
+deepseek-state = { path = "../state", version = "0.4.1" }
+deepseek-tools = { path = "../tools", version = "0.4.1" }
 serde.workspace = true
 serde_json.workspace = true
 tokio.workspace = true
@@ -14,12 +14,12 @@ path = "src/main.rs"
 anyhow.workspace = true
 clap.workspace = true
 clap_complete.workspace = true
-deepseek-agent = { path = "../agent", version = "0.4.0" }
-deepseek-app-server = { path = "../app-server", version = "0.4.0" }
-deepseek-config = { path = "../config", version = "0.4.0" }
-deepseek-execpolicy = { path = "../execpolicy", version = "0.4.0" }
-deepseek-mcp = { path = "../mcp", version = "0.4.0" }
-deepseek-state = { path = "../state", version = "0.4.0" }
+deepseek-agent = { path = "../agent", version = "0.4.1" }
+deepseek-app-server = { path = "../app-server", version = "0.4.1" }
+deepseek-config = { path = "../config", version = "0.4.1" }
+deepseek-execpolicy = { path = "../execpolicy", version = "0.4.1" }
+deepseek-mcp = { path = "../mcp", version = "0.4.1" }
+deepseek-state = { path = "../state", version = "0.4.1" }
 chrono.workspace = true
 serde_json.workspace = true
 tokio.workspace = true
@@ -9,14 +9,14 @@ description = "Core runtime boundaries for DeepSeek workspace architecture"
 [dependencies]
 anyhow.workspace = true
 chrono.workspace = true
-deepseek-agent = { path = "../agent", version = "0.4.0" }
-deepseek-config = { path = "../config", version = "0.4.0" }
-deepseek-execpolicy = { path = "../execpolicy", version = "0.4.0" }
-deepseek-hooks = { path = "../hooks", version = "0.4.0" }
-deepseek-mcp = { path = "../mcp", version = "0.4.0" }
-deepseek-protocol = { path = "../protocol", version = "0.4.0" }
-deepseek-state = { path = "../state", version = "0.4.0" }
-deepseek-tools = { path = "../tools", version = "0.4.0" }
+deepseek-agent = { path = "../agent", version = "0.4.1" }
+deepseek-config = { path = "../config", version = "0.4.1" }
+deepseek-execpolicy = { path = "../execpolicy", version = "0.4.1" }
+deepseek-hooks = { path = "../hooks", version = "0.4.1" }
+deepseek-mcp = { path = "../mcp", version = "0.4.1" }
+deepseek-protocol = { path = "../protocol", version = "0.4.1" }
+deepseek-state = { path = "../state", version = "0.4.1" }
+deepseek-tools = { path = "../tools", version = "0.4.1" }
 serde_json.workspace = true
 tokio.workspace = true
 uuid.workspace = true
@@ -8,5 +8,5 @@ description = "Execution policy and approval model parity for DeepSeek workspace

 [dependencies]
 anyhow.workspace = true
-deepseek-protocol = { path = "../protocol", version = "0.4.0" }
+deepseek-protocol = { path = "../protocol", version = "0.4.1" }
 serde.workspace = true
@@ -10,7 +10,7 @@ description = "Hook dispatch and notifications parity for DeepSeek workspace arc
 anyhow.workspace = true
 async-trait.workspace = true
 chrono.workspace = true
-deepseek-protocol = { path = "../protocol", version = "0.4.0" }
+deepseek-protocol = { path = "../protocol", version = "0.4.1" }
 reqwest.workspace = true
 serde.workspace = true
 serde_json.workspace = true
@@ -8,6 +8,6 @@ description = "MCP server lifecycle and tool proxy compatibility for DeepSeek wo

 [dependencies]
 anyhow.workspace = true
-deepseek-protocol = { path = "../protocol", version = "0.4.0" }
+deepseek-protocol = { path = "../protocol", version = "0.4.1" }
 serde.workspace = true
 serde_json.workspace = true
@@ -9,7 +9,7 @@ description = "Tool invocation lifecycle, schema validation, and scheduler paral
 [dependencies]
 anyhow.workspace = true
 async-trait.workspace = true
-deepseek-protocol = { path = "../protocol", version = "0.4.0" }
+deepseek-protocol = { path = "../protocol", version = "0.4.1" }
 serde.workspace = true
 serde_json.workspace = true
 tokio.workspace = true
@@ -14,6 +14,7 @@ use crate::llm_client::LlmClient;
 use crate::logging;
 use crate::models::{
    CacheControl, ContentBlock, Message, MessageRequest, SystemBlock, SystemPrompt,
+    context_window_for_model,
 };

 /// Configuration for conversation compaction behavior.
@@ -47,6 +48,50 @@ const SUMMARY_TOOL_RESULT_SNIPPET_CHARS: usize = 240;
 const SUMMARY_INPUT_MAX_CHARS: usize = 24_000;
 const SUMMARY_INPUT_HEAD_CHARS: usize = 14_000;
 const SUMMARY_INPUT_TAIL_CHARS: usize = 6_000;
+const LARGE_CONTEXT_SUMMARY_TEXT_SNIPPET_CHARS: usize = 2_000;
+const LARGE_CONTEXT_SUMMARY_TOOL_RESULT_SNIPPET_CHARS: usize = 4_000;
+const LARGE_CONTEXT_SUMMARY_INPUT_MAX_CHARS: usize = 120_000;
+const LARGE_CONTEXT_SUMMARY_INPUT_HEAD_CHARS: usize = 72_000;
+const LARGE_CONTEXT_SUMMARY_INPUT_TAIL_CHARS: usize = 36_000;
+const LARGE_CONTEXT_SUMMARY_MAX_TOKENS: u32 = 2_048;
+const LARGE_CONTEXT_WINDOW_TOKENS: u32 = 500_000;
+
+#[derive(Debug, Clone, Copy)]
+struct SummaryInputLimits {
+    text_snippet_chars: usize,
+    tool_result_snippet_chars: usize,
+    input_max_chars: usize,
+    input_head_chars: usize,
+    input_tail_chars: usize,
+    max_tokens: u32,
+    word_limit: usize,
+}
+
+fn summary_input_limits_for_model(model: &str) -> SummaryInputLimits {
+    let is_large_context =
+        context_window_for_model(model).is_some_and(|window| window >= LARGE_CONTEXT_WINDOW_TOKENS);
+    if is_large_context {
+        SummaryInputLimits {
+            text_snippet_chars: LARGE_CONTEXT_SUMMARY_TEXT_SNIPPET_CHARS,
+            tool_result_snippet_chars: LARGE_CONTEXT_SUMMARY_TOOL_RESULT_SNIPPET_CHARS,
+            input_max_chars: LARGE_CONTEXT_SUMMARY_INPUT_MAX_CHARS,
+            input_head_chars: LARGE_CONTEXT_SUMMARY_INPUT_HEAD_CHARS,
+            input_tail_chars: LARGE_CONTEXT_SUMMARY_INPUT_TAIL_CHARS,
+            max_tokens: LARGE_CONTEXT_SUMMARY_MAX_TOKENS,
+            word_limit: 900,
+        }
+    } else {
+        SummaryInputLimits {
+            text_snippet_chars: SUMMARY_TEXT_SNIPPET_CHARS,
+            tool_result_snippet_chars: SUMMARY_TOOL_RESULT_SNIPPET_CHARS,
+            input_max_chars: SUMMARY_INPUT_MAX_CHARS,
+            input_head_chars: SUMMARY_INPUT_HEAD_CHARS,
+            input_tail_chars: SUMMARY_INPUT_TAIL_CHARS,
+            max_tokens: 1_024,
+            word_limit: 500,
+        }
+    }
+}

 #[derive(Debug, Clone, Default)]
 struct CompactionPlan {
@@ -757,6 +802,7 @@ async fn create_summary(
    messages: &[Message],
    model: &str,
 ) -> Result<String> {
+    let limits = summary_input_limits_for_model(model);
    // Format messages for summarization
    let mut conversation_text = String::new();
    for msg in messages {
@@ -768,14 +814,14 @@ async fn create_summary(
        for block in &msg.content {
            match block {
                ContentBlock::Text { text, .. } => {
-                    let snippet = truncate_chars(text, SUMMARY_TEXT_SNIPPET_CHARS);
+                    let snippet = truncate_chars(text, limits.text_snippet_chars);
                    let _ = write!(conversation_text, "{role}: {snippet}\n\n");
                }
                ContentBlock::ToolUse { name, .. } => {
                    let _ = write!(conversation_text, "{role}: [Used tool: {name}]\n\n");
                }
                ContentBlock::ToolResult { content, .. } => {
-                    let snippet = truncate_chars(content, SUMMARY_TOOL_RESULT_SNIPPET_CHARS);
+                    let snippet = truncate_chars(content, limits.tool_result_snippet_chars);
                    let _ = write!(conversation_text, "Tool result: {}\n\n", snippet);
                }
                ContentBlock::Thinking { .. } => {
@@ -789,9 +835,9 @@ async fn create_summary(
    }

    let conversation_chars = conversation_text.chars().count();
-    if conversation_chars > SUMMARY_INPUT_MAX_CHARS {
-        let head = truncate_chars(&conversation_text, SUMMARY_INPUT_HEAD_CHARS).to_string();
-        let tail = tail_chars(&conversation_text, SUMMARY_INPUT_TAIL_CHARS);
+    if conversation_chars > limits.input_max_chars {
+        let head = truncate_chars(&conversation_text, limits.input_head_chars).to_string();
+        let tail = tail_chars(&conversation_text, limits.input_tail_chars);
        let omitted = conversation_chars
            .saturating_sub(head.chars().count())
            .saturating_sub(tail.chars().count());
@@ -806,14 +852,16 @@ async fn create_summary(
            content: vec![ContentBlock::Text {
                text: format!(
                    "Summarize the following conversation in a concise but comprehensive way. \
-                     Preserve key information, decisions made, and any important context. \
-                     Tool outputs may be abbreviated. \
-                     Keep it under 500 words.\n\n---\n\n{conversation_text}"
+                     Preserve key information, decisions made, exact file paths, commands, \
+                     errors, and tool-result facts needed to continue the work. \
+                     Tool outputs may be abbreviated only when they are repetitive. \
+                     Keep it under {} words.\n\n---\n\n{conversation_text}",
+                    limits.word_limit
                ),
                cache_control: None,
            }],
        }],
-        max_tokens: 1024,
+        max_tokens: limits.max_tokens,
        system: Some(SystemPrompt::Text(
            "You are a helpful assistant that creates concise conversation summaries.".to_string(),
        )),
@@ -1035,6 +1083,16 @@ mod tests {
        assert!(!is_transient_error(&validation_err));
    }

+    #[test]
+    fn summary_limits_expand_for_v4_context() {
+        let legacy = summary_input_limits_for_model("deepseek-v3.2-128k");
+        let v4 = summary_input_limits_for_model("deepseek-v4-pro");
+
+        assert!(v4.input_max_chars > legacy.input_max_chars);
+        assert!(v4.tool_result_snippet_chars > legacy.tool_result_snippet_chars);
+        assert!(v4.max_tokens > legacy.max_tokens);
+    }
+
    #[test]
    fn estimate_tokens_empty_messages() {
        let messages: Vec<Message> = vec![];
@@ -357,6 +357,14 @@ const TOOL_RESULT_CONTEXT_HARD_LIMIT_CHARS: usize = 12_000;
 const TOOL_RESULT_CONTEXT_SOFT_LIMIT_CHARS: usize = 2_000;
 /// Snippet length kept when compacting tool output for model context.
 const TOOL_RESULT_CONTEXT_SNIPPET_CHARS: usize = 900;
+/// Hard cap for tool output inserted into a large-context model.
+const LARGE_CONTEXT_TOOL_RESULT_HARD_LIMIT_CHARS: usize = 180_000;
+/// Soft cap for known noisy tools inserted into a large-context model.
+const LARGE_CONTEXT_TOOL_RESULT_SOFT_LIMIT_CHARS: usize = 60_000;
+/// Snippet length kept when compacting large-context tool output.
+const LARGE_CONTEXT_TOOL_RESULT_SNIPPET_CHARS: usize = 40_000;
+/// Context window size at which tool output limits can be relaxed.
+const LARGE_CONTEXT_WINDOW_TOKENS: u32 = 500_000;
 /// Max chars to keep from metadata-provided output summaries.
 const TOOL_RESULT_METADATA_SUMMARY_CHARS: usize = 320;
 const COMPACTION_SUMMARY_MARKER: &str = "Conversation Summary (Auto-Generated)";
@@ -1052,6 +1060,30 @@ fn summarize_text(text: &str, limit: usize) -> String {
    out
 }

+fn summarize_text_head_tail(text: &str, limit: usize) -> String {
+    let total = text.chars().count();
+    if total <= limit {
+        return text.to_string();
+    }
+    if limit <= 20 {
+        return summarize_text(text, limit);
+    }
+
+    let marker = "\n\n[... output truncated for context ...]\n\n";
+    let marker_len = marker.chars().count();
+    if limit <= marker_len + 20 {
+        return summarize_text(text, limit);
+    }
+
+    let remaining = limit - marker_len;
+    let head_len = remaining.saturating_mul(2) / 3;
+    let tail_len = remaining.saturating_sub(head_len);
+    let head: String = text.chars().take(head_len).collect();
+    let tail_vec: Vec<char> = text.chars().rev().take(tail_len).collect();
+    let tail: String = tail_vec.into_iter().rev().collect();
+    format!("{head}{marker}{tail}")
+}
+
 fn tool_result_is_noisy(tool_name: &str) -> bool {
    matches!(
        tool_name,
@@ -1076,20 +1108,51 @@ fn tool_result_metadata_summary(metadata: Option<&serde_json::Value>) -> Option<
    None
 }

-pub(crate) fn compact_tool_result_for_context(tool_name: &str, output: &ToolResult) -> String {
+#[derive(Debug, Clone, Copy)]
+struct ToolResultContextLimits {
+    hard_limit_chars: usize,
+    noisy_soft_limit_chars: usize,
+    snippet_chars: usize,
+}
+
+fn tool_result_context_limits_for_model(model: &str) -> ToolResultContextLimits {
+    let is_large_context =
+        context_window_for_model(model).is_some_and(|window| window >= LARGE_CONTEXT_WINDOW_TOKENS);
+
+    if is_large_context {
+        ToolResultContextLimits {
+            hard_limit_chars: LARGE_CONTEXT_TOOL_RESULT_HARD_LIMIT_CHARS,
+            noisy_soft_limit_chars: LARGE_CONTEXT_TOOL_RESULT_SOFT_LIMIT_CHARS,
+            snippet_chars: LARGE_CONTEXT_TOOL_RESULT_SNIPPET_CHARS,
+        }
+    } else {
+        ToolResultContextLimits {
+            hard_limit_chars: TOOL_RESULT_CONTEXT_HARD_LIMIT_CHARS,
+            noisy_soft_limit_chars: TOOL_RESULT_CONTEXT_SOFT_LIMIT_CHARS,
+            snippet_chars: TOOL_RESULT_CONTEXT_SNIPPET_CHARS,
+        }
+    }
+}
+
+pub(crate) fn compact_tool_result_for_context(
+    model: &str,
+    tool_name: &str,
+    output: &ToolResult,
+) -> String {
    let raw = output.content.trim();
    if raw.is_empty() {
        return String::new();
    }

+    let limits = tool_result_context_limits_for_model(model);
    let raw_chars = raw.chars().count();
-    let should_compact = raw_chars > TOOL_RESULT_CONTEXT_HARD_LIMIT_CHARS
-        || (tool_result_is_noisy(tool_name) && raw_chars > TOOL_RESULT_CONTEXT_SOFT_LIMIT_CHARS);
+    let should_compact = raw_chars > limits.hard_limit_chars
+        || (tool_result_is_noisy(tool_name) && raw_chars > limits.noisy_soft_limit_chars);
    if !should_compact {
        return raw.to_string();
    }

-    let snippet = summarize_text(raw, TOOL_RESULT_CONTEXT_SNIPPET_CHARS);
+    let snippet = summarize_text_head_tail(raw, limits.snippet_chars);
    let omitted = raw_chars.saturating_sub(snippet.chars().count());
    let summary = tool_result_metadata_summary(output.metadata.as_ref());

@@ -3369,8 +3432,11 @@ impl Engine {
                            "tool_name": outcome.name.clone(),
                            "success": output.success,
                        }));
-                        let output_for_context =
-                            compact_tool_result_for_context(&outcome.name, &output);
+                        let output_for_context = compact_tool_result_for_context(
+                            &self.session.model,
+                            &outcome.name,
+                            &output,
+                        );
                        let output_content = output.content;

                        tool_call.set_result(output_content.clone(), duration);
@@ -3767,7 +3833,15 @@ impl Engine {
        let compaction_paths = self.session.working_set.top_paths(24);

        let mut refreshed = false;
-        if let Some(client) = client {
+        let should_run_summary_compaction = self.config.compaction.enabled
+            && should_compact(
+                &self.session.messages,
+                &self.config.compaction,
+                Some(&self.session.workspace),
+                Some(&compaction_pins),
+                Some(&compaction_paths),
+            );
+        if should_run_summary_compaction && let Some(client) = client {
            match compact_messages_safe(
                client,
                &self.session.messages,
@@ -3799,8 +3873,10 @@ impl Engine {
        if !refreshed {
            let target_budget = context_input_budget(&self.session.model, TURN_MAX_OUTPUT_TOKENS)
                .unwrap_or(self.config.compaction.token_threshold.max(1));
-            let trimmed = self.trim_oldest_messages_to_budget(target_budget);
-            refreshed = trimmed > 0;
+            if self.estimated_input_tokens() > target_budget {
+                let trimmed = self.trim_oldest_messages_to_budget(target_budget);
+                refreshed = trimmed > 0;
+            }
        }

        if !refreshed {
@@ -227,6 +227,20 @@ fn context_budget_reserves_output_and_headroom() {
    assert_eq!(budget, expected);
 }

+#[test]
+fn v4_tool_outputs_keep_large_file_reads_in_context() {
+    let content = "0123456789abcdef\n".repeat(2_000);
+    let output = ToolResult::success(content.clone());
+
+    let v4_context = compact_tool_result_for_context("deepseek-v4-pro", "exec_shell", &output);
+    assert_eq!(v4_context, content.trim());
+
+    let legacy_context =
+        compact_tool_result_for_context("deepseek-v3.2-128k", "exec_shell", &output);
+    assert!(legacy_context.contains("output compacted to protect context"));
+    assert!(legacy_context.len() < v4_context.len());
+}
+
 #[test]
 fn refresh_system_prompt_places_working_set_after_stable_prefix() {
    let tmp = tempdir().expect("tempdir");
@@ -295,6 +309,49 @@ fn compaction_summary_stays_before_volatile_working_set() {
    assert_eq!(working_set_index, blocks.len() - 1);
 }

+#[tokio::test]
+async fn pre_request_refresh_skips_compaction_below_normal_threshold() {
+    let capacity = CapacityControllerConfig {
+        enabled: true,
+        low_risk_max: 0.0,
+        medium_risk_max: 1.0,
+        min_turns_before_guardrail: 0,
+        ..Default::default()
+    };
+
+    let mut engine = build_engine_with_capacity(capacity.clone());
+    engine.config.capacity = capacity.clone();
+    engine.capacity_controller = CapacityController::new(capacity);
+    engine.turn_counter = 5;
+    engine
+        .capacity_controller
+        .mark_turn_start(engine.turn_counter);
+    engine.session.model = "deepseek-v4-pro".to_string();
+    engine.config.model = "deepseek-v4-pro".to_string();
+
+    for i in 0..20 {
+        engine.session.messages.push(Message {
+            role: "user".to_string(),
+            content: vec![ContentBlock::Text {
+                text: format!("small message {i}"),
+                cache_control: None,
+            }],
+        });
+    }
+
+    let before = engine.estimated_input_tokens();
+    let before_len = engine.session.messages.len();
+    let turn = TurnContext::new(10);
+    let applied = engine
+        .run_capacity_pre_request_checkpoint(&turn, None, AppMode::Agent)
+        .await;
+    let after = engine.estimated_input_tokens();
+
+    assert!(!applied);
+    assert_eq!(after, before);
+    assert_eq!(engine.session.messages.len(), before_len);
+}
+
 #[tokio::test]
 async fn pre_request_refresh_invoked_when_medium_risk() {
    let capacity = CapacityControllerConfig {
@@ -112,8 +112,9 @@ pub async fn run_tui(config: &Config, options: TuiOptions) -> Result<()> {
    let mut stdout = io::stdout();
    if use_alt_screen {
        execute!(stdout, EnterAlternateScreen)?;
+        execute!(stdout, EnableMouseCapture)?;
    }
-    execute!(stdout, EnableBracketedPaste, EnableMouseCapture)?;
+    execute!(stdout, EnableBracketedPaste)?;
    let backend = CrosstermBackend::new(stdout);
    let mut terminal = Terminal::new(backend)?;
    let event_broker = EventBroker::new();
@@ -268,11 +269,10 @@ pub async fn run_tui(config: &Config, options: TuiOptions) -> Result<()> {
    if use_alt_screen {
        execute!(terminal.backend_mut(), LeaveAlternateScreen)?;
    }
-    execute!(
-        terminal.backend_mut(),
-        DisableBracketedPaste,
-        DisableMouseCapture
-    )?;
+    if use_alt_screen {
+        execute!(terminal.backend_mut(), DisableMouseCapture)?;
+    }
+    execute!(terminal.backend_mut(), DisableBracketedPaste)?;
    terminal.show_cursor()?;

    result
@@ -466,7 +466,7 @@ async fn run_event_loop(
                        let tool_content = match &result {
                            Ok(output) => sanitize_stream_chunk(
                                &crate::core::engine::compact_tool_result_for_context(
-                                    &name, output,
+                                    &app.model, &name, output,
                                ),
                            ),
                            Err(err) => sanitize_stream_chunk(&format!("Error: {err}")),
@@ -1857,6 +1857,7 @@ async fn dispatch_user_message(
    app.add_message(HistoryCell::User {
        content: message.display.clone(),
    });
+    app.scroll_to_bottom();
    app.api_messages.push(Message {
        role: "user".to_string(),
        content: vec![ContentBlock::Text {
@@ -3144,12 +3145,9 @@ fn resume_terminal(
    enable_raw_mode()?;
    if use_alt_screen {
        execute!(terminal.backend_mut(), EnterAlternateScreen)?;
+        execute!(terminal.backend_mut(), EnableMouseCapture)?;
    }
-    execute!(
-        terminal.backend_mut(),
-        EnableMouseCapture,
-        EnableBracketedPaste
-    )?;
+    execute!(terminal.backend_mut(), EnableBracketedPaste)?;
    terminal.clear()?;
    Ok(())
 }
@@ -18,7 +18,10 @@ use ratatui::{
    prelude::Stylize,
    style::{Color, Modifier, Style},
    text::{Line, Span},
-    widgets::{Block, Borders, Clear, Padding, Paragraph, Widget, Wrap},
+    widgets::{
+        Block, Borders, Clear, Padding, Paragraph, Scrollbar, ScrollbarOrientation, ScrollbarState,
+        StatefulWidget, Widget, Wrap,
+    },
 };
 use unicode_segmentation::UnicodeSegmentation;
 use unicode_width::{UnicodeWidthChar, UnicodeWidthStr};
@@ -29,6 +32,14 @@ const COMPOSER_PANEL_HEIGHT: u16 = 2;
 pub struct ChatWidget {
    content_area: Rect,
    lines: Vec<Line<'static>>,
+    scrollbar: Option<TranscriptScrollbar>,
+}
+
+#[derive(Debug, Clone, Copy)]
+struct TranscriptScrollbar {
+    top: usize,
+    visible: usize,
+    total: usize,
 }

 impl ChatWidget {
@@ -47,6 +58,7 @@ impl ChatWidget {
            return Self {
                content_area,
                lines,
+                scrollbar: None,
            };
        }

@@ -105,9 +117,18 @@ impl ChatWidget {
            pad_lines_to_bottom(&mut lines, visible_lines);
        }

+        let scrollbar = (total_lines > visible_lines && content_area.width > 1).then_some(
+            TranscriptScrollbar {
+                top,
+                visible: visible_lines,
+                total: total_lines,
+            },
+        );
+
        Self {
            content_area,
            lines,
+            scrollbar,
        }
    }
 }
@@ -116,6 +137,20 @@ impl Renderable for ChatWidget {
    fn render(&self, _area: Rect, buf: &mut Buffer) {
        let paragraph = Paragraph::new(self.lines.clone());
        paragraph.render(self.content_area, buf);
+
+        if let Some(scrollbar) = self.scrollbar {
+            let mut state = ScrollbarState::new(scrollbar.total)
+                .position(scrollbar.top)
+                .viewport_content_length(scrollbar.visible);
+            Scrollbar::new(ScrollbarOrientation::VerticalRight)
+                .begin_symbol(None)
+                .end_symbol(None)
+                .track_symbol(Some("│"))
+                .track_style(Style::default().fg(palette::BORDER_COLOR))
+                .thumb_symbol("┃")
+                .thumb_style(Style::default().fg(palette::DEEPSEEK_SKY))
+                .render(self.content_area, buf, &mut state);
+        }
    }

    fn desired_height(&self, _width: u16) -> u16 {
@@ -1,7 +1,7 @@
 {
  "name": "deepseek-tui",
-  "version": "0.4.0",
-  "deepseekBinaryVersion": "0.4.0",
+  "version": "0.4.1",
+  "deepseekBinaryVersion": "0.4.1",
  "description": "Install and run deepseek and deepseek-tui binaries from GitHub release artifacts.",
  "author": "Hmbown",
  "license": "MIT",