fix(memory): truncate at UTF-8 char boundary

Summary:
- Truncate oversized memory prompt content at the previous valid UTF-8 character boundary.
- Preserve the existing memory cap and truncation marker behavior.
- Add regression coverage for accented and emoji content crossing the byte cutoff.

Maintainer verification on current origin/main:
- cargo test -p deepseek-tui memory --locked
- cargo fmt --all -- --check
- git diff --check origin/main...HEAD
This commit is contained in:
Friende
2026-05-07 04:21:11 -04:00
committed by GitHub
parent 82b9e46254
commit 5e647e9946
+53 -1
View File
@@ -56,7 +56,8 @@ pub fn as_system_block(content: &str, source: &Path) -> Option<String> {
let display = source.display();
let payload = if content.len() > MAX_MEMORY_SIZE {
let mut head = content[..MAX_MEMORY_SIZE].to_string();
let cutoff = previous_char_boundary(content, MAX_MEMORY_SIZE);
let mut head = content[..cutoff].to_string();
head.push_str("\n…(truncated, raise [memory].max_size or trim memory.md)");
head
} else {
@@ -68,6 +69,13 @@ pub fn as_system_block(content: &str, source: &Path) -> Option<String> {
))
}
fn previous_char_boundary(value: &str, mut index: usize) -> usize {
while !value.is_char_boundary(index) {
index -= 1;
}
index
}
/// Compose the `<user_memory>` block for the system prompt, honouring the
/// opt-in toggle. Returns `None` when the feature is disabled or the file
/// is missing / empty so the caller doesn't have to check both conditions.
@@ -161,6 +169,50 @@ mod tests {
assert!(block.contains("(truncated"));
}
#[test]
fn as_system_block_truncates_non_ascii_at_char_boundary() {
let mut content = "x".repeat(MAX_MEMORY_SIZE - 1);
content.push('é');
content.push_str("tail");
let block = as_system_block(&content, Path::new("/tmp/m.md")).unwrap();
let payload = block
.strip_prefix("<user_memory source=\"/tmp/m.md\">\n")
.unwrap()
.strip_suffix("\n</user_memory>")
.unwrap();
let (head, marker) = payload
.split_once("\n…(truncated, raise [memory].max_size or trim memory.md)")
.unwrap();
assert_eq!(head.len(), MAX_MEMORY_SIZE - 1);
assert!(head.bytes().all(|byte| byte == b'x'));
assert_eq!(marker, "");
}
#[test]
fn as_system_block_truncates_emoji_at_char_boundary() {
let mut content = "x".repeat(MAX_MEMORY_SIZE - 1);
content.push('😀');
content.push_str("tail");
let block = as_system_block(&content, Path::new("/tmp/m.md")).unwrap();
assert!(block.contains("…(truncated, raise [memory].max_size or trim memory.md)"));
let payload = block
.strip_prefix("<user_memory source=\"/tmp/m.md\">\n")
.unwrap()
.strip_suffix("\n</user_memory>")
.unwrap();
let head = payload
.strip_suffix("\n…(truncated, raise [memory].max_size or trim memory.md)")
.unwrap();
assert!(head.len() <= MAX_MEMORY_SIZE);
assert_eq!(head.len(), MAX_MEMORY_SIZE - 1);
assert!(head.bytes().all(|byte| byte == b'x'));
}
#[test]
fn append_entry_creates_file_and_writes_one_bullet() {
let tmp = tempdir().unwrap();