From 5e647e99465d51dc900e64f1af75a5d059c38c24 Mon Sep 17 00:00:00 2001 From: Friende <35026241+pengyou200902@users.noreply.github.com> Date: Thu, 7 May 2026 04:21:11 -0400 Subject: [PATCH] fix(memory): truncate at UTF-8 char boundary Summary: - Truncate oversized memory prompt content at the previous valid UTF-8 character boundary. - Preserve the existing memory cap and truncation marker behavior. - Add regression coverage for accented and emoji content crossing the byte cutoff. Maintainer verification on current origin/main: - cargo test -p deepseek-tui memory --locked - cargo fmt --all -- --check - git diff --check origin/main...HEAD --- crates/tui/src/memory.rs | 54 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/crates/tui/src/memory.rs b/crates/tui/src/memory.rs index 8a61b6ef..fa7da78d 100644 --- a/crates/tui/src/memory.rs +++ b/crates/tui/src/memory.rs @@ -56,7 +56,8 @@ pub fn as_system_block(content: &str, source: &Path) -> Option { let display = source.display(); let payload = if content.len() > MAX_MEMORY_SIZE { - let mut head = content[..MAX_MEMORY_SIZE].to_string(); + let cutoff = previous_char_boundary(content, MAX_MEMORY_SIZE); + let mut head = content[..cutoff].to_string(); head.push_str("\n…(truncated, raise [memory].max_size or trim memory.md)"); head } else { @@ -68,6 +69,13 @@ pub fn as_system_block(content: &str, source: &Path) -> Option { )) } +fn previous_char_boundary(value: &str, mut index: usize) -> usize { + while !value.is_char_boundary(index) { + index -= 1; + } + index +} + /// Compose the `` block for the system prompt, honouring the /// opt-in toggle. Returns `None` when the feature is disabled or the file /// is missing / empty so the caller doesn't have to check both conditions. @@ -161,6 +169,50 @@ mod tests { assert!(block.contains("(truncated")); } + #[test] + fn as_system_block_truncates_non_ascii_at_char_boundary() { + let mut content = "x".repeat(MAX_MEMORY_SIZE - 1); + content.push('é'); + content.push_str("tail"); + + let block = as_system_block(&content, Path::new("/tmp/m.md")).unwrap(); + let payload = block + .strip_prefix("\n") + .unwrap() + .strip_suffix("\n") + .unwrap(); + let (head, marker) = payload + .split_once("\n…(truncated, raise [memory].max_size or trim memory.md)") + .unwrap(); + + assert_eq!(head.len(), MAX_MEMORY_SIZE - 1); + assert!(head.bytes().all(|byte| byte == b'x')); + assert_eq!(marker, ""); + } + + #[test] + fn as_system_block_truncates_emoji_at_char_boundary() { + let mut content = "x".repeat(MAX_MEMORY_SIZE - 1); + content.push('😀'); + content.push_str("tail"); + + let block = as_system_block(&content, Path::new("/tmp/m.md")).unwrap(); + assert!(block.contains("…(truncated, raise [memory].max_size or trim memory.md)")); + + let payload = block + .strip_prefix("\n") + .unwrap() + .strip_suffix("\n") + .unwrap(); + let head = payload + .strip_suffix("\n…(truncated, raise [memory].max_size or trim memory.md)") + .unwrap(); + + assert!(head.len() <= MAX_MEMORY_SIZE); + assert_eq!(head.len(), MAX_MEMORY_SIZE - 1); + assert!(head.bytes().all(|byte| byte == b'x')); + } + #[test] fn append_entry_creates_file_and_writes_one_bullet() { let tmp = tempdir().unwrap();