From 03d72840e672d5a269edb80208cd09248951f91d Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Mon, 4 May 2026 14:50:24 -0500 Subject: [PATCH] test(tui): pin Chinese / IME character input contract for the composer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds two regression tests to crates/tui/src/tui/paste.rs::tests that nail down what is currently a working code path but was not previously covered by name: * `ime_chinese_chars_route_through_to_composer` — simulates the macOS/Windows IME commit pattern (one `KeyCode::Char(c)` event per Chinese codepoint with realistic ~50 ms gaps so the paste-burst heuristic doesn't false-positive). Asserts that "你好世界" lands in `app.input` verbatim and that `cursor_position` advances by one per codepoint, not per UTF-8 byte. The non-ASCII branch in `handle_paste_burst_key` (paste.rs:42) is the structural anchor; this test pins it so a future "filter to ASCII for the paste-burst detector" change would surface immediately. * `bracketed_paste_preserves_chinese_and_mixed_text` — pastes a mix of CJK and Latin text ("你好世界 hello 世界 café") through the bracketed-paste path (`insert_paste_text` → `normalize_paste_text` → `insert_str`) and confirms every codepoint survives plus the cursor tracks codepoints, not bytes. Why these tests, why now: a community report surfaced the question "can users input Chinese characters" without specifying the exact failure mode. Code review of the input data path turned up nothing broken, and these tests confirm the data path is correct end-to-end for both single-char IME commits and bulk bracketed paste. The tests serve as evidence (the data path is provably fine) and as a guard against future regressions to Chinese-input support. The tests cost nothing at runtime and build under `cfg(test)` only. If users are still seeing a Chinese-input failure after this lands, the candidates worth investigating in priority order are: (1) display layer — `wrap_input_lines` / `cursor_row_col` may be miscounting double-width CJK cells; (2) terminal-specific delivery — certain IMEs / terminals don't emit the events crossterm expects; (3) locale at launch — `LC_ALL=C` in non-interactive shells breaks UTF-8 input upstream of crossterm. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/tui/src/tui/paste.rs | 51 +++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/crates/tui/src/tui/paste.rs b/crates/tui/src/tui/paste.rs index 4be88928..1a96f0f8 100644 --- a/crates/tui/src/tui/paste.rs +++ b/crates/tui/src/tui/paste.rs @@ -201,6 +201,57 @@ mod tests { assert_eq!(app.input, "?"); } + /// Pin the IME-input contract: macOS/Windows input methods commit + /// each Chinese character as a single `KeyCode::Char(c)` event + /// after the candidate popup closes. Each codepoint fits in a + /// `char` (no surrogate pair concerns for BMP chars), so a + /// straightforward sequence of plain-char events must land in + /// `app.input` verbatim — no ASCII filter, no byte-vs-char index + /// drift, no paste-burst false-positive that buffers the chars + /// indefinitely. + #[test] + fn ime_chinese_chars_route_through_to_composer() { + let mut app = test_app(); + let t0 = Instant::now(); + + // Type the four Chinese codepoints "你好世界" one event at a + // time, with realistic ~50ms gaps so the paste-burst heuristic + // doesn't classify them as a paste burst. + for (i, ch) in "你好世界".chars().enumerate() { + let now = t0 + Duration::from_millis(50 * i as u64); + let _ = handle_paste_burst_key(&mut app, &plain(ch), now); + } + + // Past the active-flush delay so any buffered burst commits. + let after = t0 + + Duration::from_millis(50 * 4) + + crate::tui::paste_burst::PasteBurst::recommended_active_flush_delay(); + let _ = app.flush_paste_burst_if_due(after); + + assert_eq!( + app.input, "你好世界", + "IME-typed Chinese characters must land in composer verbatim" + ); + assert_eq!( + app.cursor_position, 4, + "cursor advances by one per codepoint, not per UTF-8 byte" + ); + } + + /// Pin the bracketed-paste contract for CJK content: pasted + /// Chinese text (e.g. when a user copies a question from a + /// Chinese website and pastes into the composer) must preserve + /// every codepoint and not double-count multi-byte chars in the + /// cursor position. + #[test] + fn bracketed_paste_preserves_chinese_and_mixed_text() { + let mut app = test_app(); + app.insert_paste_text("你好世界 hello 世界 café"); + assert_eq!(app.input, "你好世界 hello 世界 café"); + // 4 + 1 + 5 + 1 + 2 + 1 + 4 = 18 codepoints (counting é as one). + assert_eq!(app.cursor_position, 18); + } + #[test] fn paste_burst_detection_can_be_disabled_without_disabling_bracketed_paste() { let mut app = test_app();