From 921949e35f6160c238c4e897998bdf97137793d0 Mon Sep 17 00:00:00 2001
From: Hu Qiantao <huqiantao@HudeMacBook-Air.local>
Date: Tue, 2 Jun 2026 00:16:58 +0800
Subject: [PATCH 1/4] feat(client): add cross-session prompt base section disk
 cache
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add prompt_persist.rs module that caches the immutable base section of
the system prompt on disk for cross-session reuse. The base section
(mode prompt, project context, skills, context management, compaction
template) is stable across sessions for the same workspace. By caching
this section and reusing it when the SHA-256 matches, we can skip the
entire base-section assembly on session start and immediately provide
byte-identical bytes to the API.

This is especially valuable for DeepSeek's service-side prefix cache:
when the base section bytes are identical across sessions, the server
can reuse its cached KV states for the entire base section, giving ~90%
discount on cached tokens.

Cache layout:
  ~/.codewhale/prompt_cache/<system_hash>.bin   — the base section text
  ~/.codewhale/prompt_cache/<system_hash>.meta  — JSON metadata

The cache key is the SHA-256 of the base section text. The metadata
includes the workspace path and its mtime for invalidation on workspace
changes. Stale entries are evicted lazily based on age and workspace
mtime consistency.

The module exposes three public functions:
- load_cached_base_section(): try to load a cached base section
- save_cached_base_section(): save a base section to disk
- evict_stale_entries(): clean up old cache entries

This is the infrastructure layer only. Wiring it into the prompt
assembly pipeline (splitting base_section() + volatile_section()) will
be done in a follow-up change.
---
 crates/tui/src/main.rs           |   1 +
 crates/tui/src/prompt_persist.rs | 258 +++++++++++++++++++++++++++++++
 2 files changed, 259 insertions(+)
 create mode 100644 crates/tui/src/prompt_persist.rs

diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs
index df867d98..2362fd95 100644
--- a/crates/tui/src/main.rs
+++ b/crates/tui/src/main.rs
@@ -52,6 +52,7 @@ mod prefix_cache;
 mod pricing;
 mod project_context;
 mod project_doc;
+mod prompt_persist;
 mod prompt_zones;
 mod prompts;
 mod purge;
diff --git a/crates/tui/src/prompt_persist.rs b/crates/tui/src/prompt_persist.rs
new file mode 100644
index 00000000..ff2f6450
--- /dev/null
+++ b/crates/tui/src/prompt_persist.rs
@@ -0,0 +1,258 @@
+//! Cross-session persistence for the immutable base section of the system
+//! prompt.
+//!
+//! ## Why
+//!
+//! DeepSeek's KV prefix cache matches byte sequences from the start of the
+//! system prompt. The base section (mode prompt, project context, skills,
+//! context management, compaction template) is stable across sessions for
+//! the same workspace. By caching this section on disk and reusing it when
+//! the SHA-256 matches, we can skip the entire base-section assembly on
+//! session start and immediately provide byte-identical bytes to the API.
+//!
+//! This is especially valuable for the DeepSeek service-side prefix cache:
+//! when the base section bytes are identical across sessions, the server
+//! can reuse its cached KV states for the entire base section, giving
+//! ~90% discount on cached tokens.
+//!
+//! ## Cache layout
+//!
+//! ```text
+//! ~/.codewhale/prompt_cache/
+//!   <system_hash>.bin   — the serialized base section text
+//!   <system_hash>.meta  — JSON metadata (workspace path, mtime, timestamp)
+//! ```
+//!
+//! The cache key is the SHA-256 of the base section text, computed by
+//! `PrefixFingerprint::compute`. The metadata file includes the workspace
+//! path and its mtime so that workspace changes invalidate the cache even
+//! if the base section hash happens to collide (extremely unlikely with
+//! SHA-256, but cheap to guard against).
+
+use std::fs;
+use std::path::{Path, PathBuf};
+use std::time::SystemTime;
+
+use serde::{Deserialize, Serialize};
+
+use crate::logging;
+
+/// Metadata stored alongside a cached base section.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[allow(dead_code)]
+struct CacheMetadata {
+    /// Absolute path to the workspace that produced this base section.
+    workspace: PathBuf,
+    /// Modification time of the workspace directory at cache-write time.
+    /// Used as a secondary invalidation signal: if the workspace mtime
+    /// changed, the cache is stale even if the base section hash matches
+    /// (which would require a hash collision).
+    workspace_mtime_secs: u64,
+    /// Unix timestamp when the cache was written.
+    cached_at_secs: u64,
+}
+
+/// Return the directory where prompt caches are stored.
+///
+/// Creates the directory if it doesn't exist.
+#[allow(dead_code)]
+fn cache_dir() -> Option<PathBuf> {
+    let home = dirs::home_dir()?;
+    let dir = home.join(".codewhale").join("prompt_cache");
+    if let Err(err) = fs::create_dir_all(&dir) {
+        logging::warn(format!("Failed to create prompt cache dir: {err}"));
+        return None;
+    }
+    Some(dir)
+}
+
+/// Get the modification time of a directory as seconds since epoch.
+#[allow(dead_code)]
+fn dir_mtime_secs(path: &Path) -> u64 {
+    fs::metadata(path)
+        .and_then(|m| m.modified())
+        .ok()
+        .and_then(|t| t.duration_since(SystemTime::UNIX_EPOCH).ok())
+        .map(|d| d.as_secs())
+        .unwrap_or(0)
+}
+
+/// Try to load a cached base section from disk.
+///
+/// Returns `Some(text)` if a valid cache entry exists for the given hash
+/// and workspace, or `None` if the cache is missing, stale, or corrupt.
+#[allow(dead_code)]
+pub fn load_cached_base_section(base_hash: &str, workspace: &Path) -> Option<String> {
+    let dir = cache_dir()?;
+    let bin_path = dir.join(format!("{base_hash}.bin"));
+    let meta_path = dir.join(format!("{base_hash}.meta"));
+
+    // Check that both files exist.
+    if !bin_path.exists() || !meta_path.exists() {
+        return None;
+    }
+
+    // Read and validate metadata.
+    let meta_bytes = fs::read(&meta_path).ok()?;
+    let meta: CacheMetadata = serde_json::from_slice(&meta_bytes).ok()?;
+
+    // Verify workspace path matches.
+    if meta.workspace != workspace {
+        return None;
+    }
+
+    // Verify workspace mtime hasn't changed (guards against hash collisions).
+    let current_mtime = dir_mtime_secs(workspace);
+    if current_mtime != meta.workspace_mtime_secs {
+        logging::info(format!(
+            "Prompt cache stale: workspace mtime changed ({meta_mtime} → {current_mtime})",
+            meta_mtime = meta.workspace_mtime_secs
+        ));
+        return None;
+    }
+
+    // Read the cached base section.
+    let text = fs::read_to_string(&bin_path).ok()?;
+    logging::info(format!(
+        "Prompt cache hit: {base_hash} ({} bytes)",
+        text.len()
+    ));
+    Some(text)
+}
+
+/// Save a base section to disk for cross-session reuse.
+///
+/// The cache key is `base_hash` (SHA-256 of the base section text). The
+/// metadata includes the workspace path and its mtime for invalidation.
+#[allow(dead_code)]
+pub fn save_cached_base_section(base_hash: &str, base_text: &str, workspace: &Path) {
+    let dir = match cache_dir() {
+        Some(d) => d,
+        None => return,
+    };
+
+    let bin_path = dir.join(format!("{base_hash}.bin"));
+    let meta_path = dir.join(format!("{base_hash}.meta"));
+
+    // Write the base section text.
+    if let Err(err) = fs::write(&bin_path, base_text) {
+        logging::warn(format!("Failed to write prompt cache bin: {err}"));
+        return;
+    }
+
+    // Write the metadata.
+    let meta = CacheMetadata {
+        workspace: workspace.to_path_buf(),
+        workspace_mtime_secs: dir_mtime_secs(workspace),
+        cached_at_secs: SystemTime::now()
+            .duration_since(SystemTime::UNIX_EPOCH)
+            .map(|d| d.as_secs())
+            .unwrap_or(0),
+    };
+    if let Err(err) = fs::write(&meta_path, serde_json::to_vec(&meta).unwrap_or_default()) {
+        logging::warn(format!("Failed to write prompt cache meta: {err}"));
+    }
+
+    logging::info(format!("Prompt cache saved: {base_hash}"));
+}
+
+/// Evict stale cache entries.
+///
+/// Removes cache entries older than `max_age_secs` or whose workspace
+/// mtime no longer matches. This is a best-effort cleanup; it runs
+/// lazily when the cache is accessed.
+#[allow(dead_code)]
+pub fn evict_stale_entries(max_age_secs: u64) {
+    let dir = match cache_dir() {
+        Some(d) => d,
+        None => return,
+    };
+
+    let now = SystemTime::now()
+        .duration_since(SystemTime::UNIX_EPOCH)
+        .map(|d| d.as_secs())
+        .unwrap_or(0);
+
+    let entries = match fs::read_dir(&dir) {
+        Ok(e) => e,
+        Err(_) => return,
+    };
+
+    for entry in entries.flatten() {
+        let path = entry.path();
+        if path.extension().is_some_and(|e| e == "meta")
+            && let Ok(bytes) = fs::read(&path)
+            && let Ok(meta) = serde_json::from_slice::<CacheMetadata>(&bytes)
+        {
+            let stale = now.saturating_sub(meta.cached_at_secs) > max_age_secs;
+            let workspace_gone = !meta.workspace.exists();
+            let mtime_changed =
+                workspace_gone || dir_mtime_secs(&meta.workspace) != meta.workspace_mtime_secs;
+
+            if stale || workspace_gone || mtime_changed {
+                let hash = path
+                    .file_stem()
+                    .and_then(|s| s.to_str())
+                    .unwrap_or("unknown");
+                let _ = fs::remove_file(&path);
+                let _ = fs::remove_file(path.with_extension("bin"));
+                logging::info(format!("Evicted prompt cache: {hash}"));
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tempfile::tempdir;
+
+    #[test]
+    fn save_and_load_round_trip() {
+        let tmp = tempdir().expect("tempdir");
+        let workspace = tmp.path();
+        let hash = "abc123";
+        let text = "Hello, world!";
+
+        save_cached_base_section(hash, text, workspace);
+        let loaded = load_cached_base_section(hash, workspace);
+        assert_eq!(loaded.as_deref(), Some(text));
+    }
+
+    #[test]
+    fn load_returns_none_for_missing_cache() {
+        let tmp = tempdir().expect("tempdir");
+        assert!(load_cached_base_section("nonexistent", tmp.path()).is_none());
+    }
+
+    #[test]
+    fn load_returns_none_for_wrong_workspace() {
+        let tmp1 = tempdir().expect("tempdir");
+        let tmp2 = tempdir().expect("tempdir");
+        let hash = "def456";
+        let text = "cached content";
+
+        save_cached_base_section(hash, text, tmp1.path());
+        assert!(load_cached_base_section(hash, tmp2.path()).is_none());
+    }
+
+    #[test]
+    fn evict_preserves_fresh_entries() {
+        let tmp = tempdir().expect("tempdir");
+        let workspace = tmp.path();
+        let hash = "fresh_entry";
+        let text = "fresh content";
+
+        save_cached_base_section(hash, text, workspace);
+
+        // Evict entries older than 3600 seconds (1 hour). Fresh entries
+        // should survive.
+        evict_stale_entries(3600);
+
+        // The entry should still be there since it was just saved.
+        assert_eq!(
+            load_cached_base_section(hash, workspace).as_deref(),
+            Some(text)
+        );
+    }
+}

From 3468b25cf3c63ad10d3d8fa7cbf262e4c2edf666 Mon Sep 17 00:00:00 2001
From: huqiantao <huqiantao@users.noreply.github.com>
Date: Sun, 7 Jun 2026 18:58:17 +0800
Subject: [PATCH 2/4] fix: critical bugs in tools, client, and commands

1. Fix UTF-8 boundary panic in clean_pdf_text (tools/file.rs:295)
   - rfind returns byte index of char start, i+1 may not be char boundary
   - Use char-aware byte offset calculation instead

2. Fix integer overflow in context_lines (tools/search.rs:103)
   - Clamp model-provided context_lines to 1000 to prevent massive allocations
   - On 32-bit, usize::try_from(u64::MAX) falls back to usize::MAX causing overflow

3. Fix u64->usize truncating cast (tools/file.rs:117,127)
   - Use usize::try_from() with proper error instead of silent truncation
   - Prevents reading from wrong line on 32-bit platforms

4. Fix ContentBlockStop wrong index in SSE stream cleanup (client/chat.rs:435)
   - saturating_sub(1) on 0u32 wraps to u32::MAX when stream breaks during thinking
   - Merge thinking/text close into single guard to avoid duplicate stops

5. Fix missing providers in provider_accepts_reasoning_content (client/chat.rs:1966)
   - Add SiliconflowCn and Volcengine which are in apply_reasoning_effort
   - Without this, non-DeepSeek reasoning models on these providers lose thinking traces

6. Fix TOCTOU double-call in run_skill_by_name (commands/mod.rs:671)
   - Replace is_some()+unwrap() with if-let-Some pattern
   - Prevents potential panic if state changes between calls

7. Fix incomplete hex decoding in from_api_tool_name (client.rs:54-76)
   - Require exactly 6 hex digits to match encoder output
   - Short sequences from malformed model output now pass through as-is

8. Fix token count u64->u32 truncation (client.rs:1298-1299)
   - Use .min(u32::MAX) saturating cast consistent with sanitizer at line 1807
   - Prevents silent wraparound for extremely large token counts

9. Fix HTTP response body read errors silently swallowed (client/chat.rs:170, client.rs:750)
   - Replace unwrap_or_default() with .context()? propagation
   - Connection drops mid-body now surface as clear error instead of JSON parse failure
---
 crates/tui/src/client.rs       | 14 ++++++++++----
 crates/tui/src/client/chat.rs  | 20 +++++++++++++-------
 crates/tui/src/commands/mod.rs |  4 ++--
 crates/tui/src/tools/file.rs   | 17 ++++++++++++++---
 crates/tui/src/tools/search.rs |  5 +++--
 5 files changed, 42 insertions(+), 18 deletions(-)

diff --git a/crates/tui/src/client.rs b/crates/tui/src/client.rs
index 25c92188..9efc2b13 100644
--- a/crates/tui/src/client.rs
+++ b/crates/tui/src/client.rs
@@ -61,7 +61,10 @@ pub(super) fn from_api_tool_name(name: &str) -> String {
                     break;
                 }
             }
-            if let Ok(code) = u32::from_str_radix(&hex, 16)
+            // Only decode if we got exactly 6 hex digits (matching encoder output).
+            // Fewer digits means a truncated/malformed sequence — pass through as-is.
+            if hex.len() == 6
+                && let Ok(code) = u32::from_str_radix(&hex, 16)
                 && let Some(decoded) = std::char::from_u32(code)
             {
                 if let Some('-') = iter.peek().copied() {
@@ -747,7 +750,10 @@ impl DeepSeekClient {
             );
             anyhow::bail!("Failed to list models: HTTP {status}: {error_text}");
         }
-        let response_text = response.text().await.unwrap_or_default();
+        let response_text = response
+            .text()
+            .await
+            .context("Failed to read models response body")?;
 
         parse_models_response(&response_text)
     }
@@ -1295,8 +1301,8 @@ pub(super) fn parse_usage(usage: Option<&Value>) -> Usage {
     });
 
     Usage {
-        input_tokens: input_tokens as u32,
-        output_tokens: output_tokens as u32,
+        input_tokens: input_tokens.min(u64::from(u32::MAX)) as u32,
+        output_tokens: output_tokens.min(u64::from(u32::MAX)) as u32,
         prompt_cache_hit_tokens,
         prompt_cache_miss_tokens,
         reasoning_tokens,
diff --git a/crates/tui/src/client/chat.rs b/crates/tui/src/client/chat.rs
index e8fb42c4..751698a7 100644
--- a/crates/tui/src/client/chat.rs
+++ b/crates/tui/src/client/chat.rs
@@ -167,7 +167,10 @@ impl DeepSeekClient {
             anyhow::bail!("Failed to call DeepSeek Chat API: HTTP {status}: {error_text}");
         }
 
-        let response_text = response.text().await.unwrap_or_default();
+        let response_text = response
+            .text()
+            .await
+            .context("Failed to read Chat API response body")?;
         let value: Value =
             serde_json::from_str(&response_text).context("Failed to parse Chat API JSON")?;
         parse_chat_message(&value)
@@ -431,12 +434,13 @@ impl DeepSeekClient {
                 }
             }
 
-            // Close any open blocks
-            if thinking_started {
-                yield Ok(StreamEvent::ContentBlockStop { index: content_index.saturating_sub(1) });
-            }
-            if text_started {
-                yield Ok(StreamEvent::ContentBlockStop { index: content_index.saturating_sub(1) });
+            // Close any open blocks — use the current content_index
+            // (which points to the next unused slot, so -1 gives the
+            // last-opened block) but guard against underflow when no
+            // content block was ever opened.
+            if thinking_started || text_started {
+                let idx = content_index.saturating_sub(1);
+                yield Ok(StreamEvent::ContentBlockStop { index: idx });
             }
 
             release_stream_buffer(byte_buf);
@@ -1974,6 +1978,8 @@ fn provider_accepts_reasoning_content(provider: ApiProvider) -> bool {
             | ApiProvider::Novita
             | ApiProvider::Fireworks
             | ApiProvider::Siliconflow
+            | ApiProvider::SiliconflowCn
+            | ApiProvider::Volcengine
             | ApiProvider::Arcee
             | ApiProvider::Sglang
     )
diff --git a/crates/tui/src/commands/mod.rs b/crates/tui/src/commands/mod.rs
index dd10da10..54dc6280 100644
--- a/crates/tui/src/commands/mod.rs
+++ b/crates/tui/src/commands/mod.rs
@@ -668,8 +668,8 @@ pub fn execute(cmd: &str, app: &mut App) -> CommandResult {
         _ => {
             // Third source: skills (lowest precedence after native and user-config).
             // Try to run a skill whose name matches the command.
-            if skills::run_skill_by_name(app, command, arg).is_some() {
-                return skills::run_skill_by_name(app, command, arg).unwrap();
+            if let Some(result) = skills::run_skill_by_name(app, command, arg) {
+                return result;
             }
             let suggestions = suggest_command_names(command, 3);
             if suggestions.is_empty() {
diff --git a/crates/tui/src/tools/file.rs b/crates/tui/src/tools/file.rs
index 671f1366..3600c933 100644
--- a/crates/tui/src/tools/file.rs
+++ b/crates/tui/src/tools/file.rs
@@ -114,7 +114,9 @@ impl ToolSpec for ReadFileTool {
                     "start_line must be 1-based and greater than 0".to_string(),
                 ));
             }
-            Some(v) => v as usize,
+            Some(v) => usize::try_from(v).map_err(|_| {
+                ToolError::invalid_input("start_line exceeds platform addressable range".to_string())
+            })?,
             None => 1,
         };
 
@@ -124,7 +126,14 @@ impl ToolSpec for ReadFileTool {
                     "max_lines must be greater than 0".to_string(),
                 ));
             }
-            Some(v) => std::cmp::min(v as usize, HARD_MAX_READ_LINES),
+            Some(v) => {
+                let converted = usize::try_from(v).map_err(|_| {
+                    ToolError::invalid_input(
+                        "max_lines exceeds platform addressable range".to_string(),
+                    )
+                })?;
+                std::cmp::min(converted, HARD_MAX_READ_LINES)
+            }
             None => DEFAULT_READ_LINES,
         };
 
@@ -292,7 +301,9 @@ fn clean_pdf_text(raw: &str) -> String {
     if any_content {
         let start = out.find(|c: char| c != '\n').unwrap_or(0);
         // Walk back from end to find the last non-newline character.
-        let end = out.rfind(|c: char| c != '\n').map_or(out.len(), |i| i + 1);
+        let end = out
+            .rfind(|c: char| c != '\n')
+            .map_or(out.len(), |i| i + out[i..].chars().next().map_or(1, |c| c.len_utf8()));
         out[start..end].to_string()
     } else {
         String::new()
diff --git a/crates/tui/src/tools/search.rs b/crates/tui/src/tools/search.rs
index 221d760b..0174011a 100644
--- a/crates/tui/src/tools/search.rs
+++ b/crates/tui/src/tools/search.rs
@@ -100,8 +100,9 @@ impl ToolSpec for GrepFilesTool {
     async fn execute(&self, input: Value, context: &ToolContext) -> Result<ToolResult, ToolError> {
         let pattern_str = required_str(&input, "pattern")?;
         let path_str = optional_str(&input, "path").unwrap_or(".");
-        let context_lines =
-            usize::try_from(optional_u64(&input, "context_lines", 2)).unwrap_or(usize::MAX);
+        let context_lines = usize::try_from(optional_u64(&input, "context_lines", 2))
+            .unwrap_or(usize::MAX)
+            .min(1000);
         let case_insensitive = optional_bool(&input, "case_insensitive", false);
         let max_results = usize::try_from(optional_u64(&input, "max_results", MAX_RESULTS as u64))
             .unwrap_or(MAX_RESULTS);

From 4d1ffa4b88424d27cfe97603edd85443c13b73ec Mon Sep 17 00:00:00 2001
From: huqiantao <huqiantao@users.noreply.github.com>
Date: Sun, 7 Jun 2026 19:32:58 +0800
Subject: [PATCH 3/4] fix: use content_index directly for ContentBlockStop
 cleanup

The content_index is only incremented AFTER a block is closed, not
when opened. Using saturating_sub(1) would close the wrong block.
The reviewer correctly identified this logic error.
---
 crates/tui/src/client/chat.rs | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/crates/tui/src/client/chat.rs b/crates/tui/src/client/chat.rs
index 751698a7..4488506b 100644
--- a/crates/tui/src/client/chat.rs
+++ b/crates/tui/src/client/chat.rs
@@ -434,13 +434,11 @@ impl DeepSeekClient {
                 }
             }
 
-            // Close any open blocks — use the current content_index
-            // (which points to the next unused slot, so -1 gives the
-            // last-opened block) but guard against underflow when no
-            // content block was ever opened.
+            // Close any open blocks — content_index points to the
+            // currently active open block (it is only incremented
+            // *after* a block is closed, not when opened).
             if thinking_started || text_started {
-                let idx = content_index.saturating_sub(1);
-                yield Ok(StreamEvent::ContentBlockStop { index: idx });
+                yield Ok(StreamEvent::ContentBlockStop { index: content_index });
             }
 
             release_stream_buffer(byte_buf);

From 186b5b463f6acb86d2c8cb788d7df87a6850ed0d Mon Sep 17 00:00:00 2001
From: huqiantao <huqiantao@users.noreply.github.com>
Date: Sun, 7 Jun 2026 19:44:25 +0800
Subject: [PATCH 4/4] style: apply cargo fmt formatting to tools/file.rs

---
 crates/tui/src/tools/file.rs | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/crates/tui/src/tools/file.rs b/crates/tui/src/tools/file.rs
index 3600c933..27dd2741 100644
--- a/crates/tui/src/tools/file.rs
+++ b/crates/tui/src/tools/file.rs
@@ -115,7 +115,9 @@ impl ToolSpec for ReadFileTool {
                 ));
             }
             Some(v) => usize::try_from(v).map_err(|_| {
-                ToolError::invalid_input("start_line exceeds platform addressable range".to_string())
+                ToolError::invalid_input(
+                    "start_line exceeds platform addressable range".to_string(),
+                )
             })?,
             None => 1,
         };
@@ -301,9 +303,9 @@ fn clean_pdf_text(raw: &str) -> String {
     if any_content {
         let start = out.find(|c: char| c != '\n').unwrap_or(0);
         // Walk back from end to find the last non-newline character.
-        let end = out
-            .rfind(|c: char| c != '\n')
-            .map_or(out.len(), |i| i + out[i..].chars().next().map_or(1, |c| c.len_utf8()));
+        let end = out.rfind(|c: char| c != '\n').map_or(out.len(), |i| {
+            i + out[i..].chars().next().map_or(1, |c| c.len_utf8())
+        });
         out[start..end].to_string()
     } else {
         String::new()