fix(tui): filter terminal control sequence fragments from composer

Prevent leaked OSC/Kitty terminal control fragments from landing in composer input while preserving legitimate overlapping user text.
This commit is contained in:
Hunter Bown
2026-05-23 13:18:00 -05:00
committed by GitHub
parent 6356a810c5
commit 96b903aa39
+273 -5
View File
@@ -406,10 +406,23 @@ fn sanitize_api_key_text(text: &str) -> String {
}
fn strip_raw_mouse_report_runs(input: &str, cursor: usize) -> Option<(String, usize)> {
let chars: Vec<char> = input.chars().collect();
let mut output = String::with_capacity(input.len());
// First pass: strip the well-defined control-sequence fragment
// shapes that crossterm sometimes hands us as `Char(c)` keystrokes
// when its event reader is interrupted mid-sequence during dense
// streaming output (#1915). This covers OSC 8 hyperlink fragments
// (`]8;;URL`, including the closing `]8;;`) and Kitty keyboard
// protocol fragments (`[?…u`, `[>…u`, `[?u`).
let (after_fragments, after_fragments_cursor, fragments_changed) =
strip_control_sequence_fragments(input, cursor);
// Second pass: the existing run-based filter handles SGR mouse
// reports (`[<35;44;18M`) and the multi-terminator burst shape
// (`5;46;18M;48;18M`) introduced in e63a4ba4a. It operates on a
// narrow char set so it can't be confused with user-typed text.
let chars: Vec<char> = after_fragments.chars().collect();
let mut output = String::with_capacity(after_fragments.len());
let mut new_cursor = 0usize;
let mut changed = false;
let mut changed = fragments_changed;
let mut index = 0usize;
while index < chars.len() {
@@ -433,7 +446,7 @@ fn strip_raw_mouse_report_runs(input: &str, cursor: usize) -> Option<(String, us
continue;
}
for (offset, ch) in run.iter().copied().enumerate() {
if start + offset < cursor {
if start + offset < after_fragments_cursor {
new_cursor += 1;
}
output.push(ch);
@@ -441,7 +454,7 @@ fn strip_raw_mouse_report_runs(input: &str, cursor: usize) -> Option<(String, us
continue;
}
if index < cursor {
if index < after_fragments_cursor {
new_cursor += 1;
}
output.push(chars[index]);
@@ -566,6 +579,155 @@ fn looks_like_raw_mouse_report_fragment(run: &[char]) -> bool {
&& run.iter().any(|ch| matches!(ch, 'M' | 'm'))
}
/// Scan `input` for control-sequence fragment shapes (#1915) — OSC 8
/// hyperlinks and Kitty keyboard protocol responses — and excise each
/// match. Returns `(output, new_cursor, changed)`. Cursor positions
/// inside an excised fragment are moved to the fragment's start.
///
/// The match shapes are deliberately narrow so legitimate text like
/// `[is this ok?]` or a typed URL survives untouched:
///
/// - **OSC 8**: `(\x1b?)] 8 ; ...` consuming everything up to the
/// first BEL (`\x07`), `\x1b\\`, lone `\\`, or the next `\x1b]8;`
/// block — terminator characters are optional because crossterm may
/// have already consumed them.
/// - **Kitty CSI**: `(\x1b?) [ (? | > | =) ... u` — the `?`/`>`/`=`
/// private-parameter prefix is what distinguishes a Kitty response
/// from a user-typed `[…u` (which is exceedingly rare and would
/// need an explicit private-parameter byte to be a real CSI).
fn strip_control_sequence_fragments(input: &str, cursor: usize) -> (String, usize, bool) {
let chars: Vec<char> = input.chars().collect();
let mut output = String::with_capacity(input.len());
let mut new_cursor = 0usize;
let mut changed = false;
let mut index = 0usize;
while index < chars.len() {
if let Some(end) = match_osc8_fragment(&chars, index) {
// The excised span contributes nothing to `output`, so
// `new_cursor` simply doesn't tick for any of those
// characters. A cursor that was inside the span ends up at
// the fragment's start position in the rewritten input,
// which matches the existing run-stripper's behavior.
index = end;
changed = true;
continue;
}
if let Some(end) = match_kitty_csi_fragment(&chars, index) {
index = end;
changed = true;
continue;
}
if index < cursor {
new_cursor += 1;
}
output.push(chars[index]);
index += 1;
}
let cursor = new_cursor.min(char_count(&output));
(output, cursor, changed)
}
/// If an OSC 8 hyperlink fragment starts at `chars[start]`, return its
/// end index (exclusive). The leading `ESC` is optional because
/// crossterm's event parser often consumes it before reclassifying the
/// tail as keystrokes.
fn match_osc8_fragment(chars: &[char], start: usize) -> Option<usize> {
let body_start = if chars.get(start) == Some(&'\x1b')
&& chars.get(start + 1) == Some(&']')
&& chars.get(start + 2) == Some(&'8')
&& chars.get(start + 3) == Some(&';')
{
start + 4
} else if chars.get(start) == Some(&']')
&& chars.get(start + 1) == Some(&'8')
&& chars.get(start + 2) == Some(&';')
{
start + 3
} else {
return None;
};
// After `]8;` we expect the OSC 8 payload: an optional second `;`
// (params separator), then the URL (or empty for the closing
// wrapper), then a terminator. We deliberately stop at the first
// ASCII whitespace so a typed `]8;` followed by real prose can't
// swallow the user's words — real OSC 8 URLs don't contain spaces.
let mut end = body_start;
while end < chars.len() {
let ch = chars[end];
// BEL terminator.
if ch == '\x07' {
return Some(end + 1);
}
// `ESC \\` string terminator (ST).
if ch == '\x1b' && chars.get(end + 1) == Some(&'\\') {
return Some(end + 2);
}
// Lone `\\` — crossterm sometimes delivers ST with the leading
// ESC already consumed, leaving just `\\` as a Char keystroke.
if ch == '\\' {
return Some(end + 1);
}
// Start of the next OSC 8 wrapper (closing `]8;;` glued to the
// body) — close the current fragment here so the next iteration
// matches that one separately.
if ch == '\x1b' && chars.get(end + 1) == Some(&']') {
return Some(end);
}
if ch == ']' && chars.get(end + 1) == Some(&'8') && chars.get(end + 2) == Some(&';') {
return Some(end);
}
if ch.is_whitespace() {
// We never crossed a terminator, so this isn't a real
// fragment — give up rather than eat user prose.
return None;
}
end += 1;
}
// Reached end of input without a terminator or whitespace. Treat as
// a fragment in flight (its tail will arrive on a later keystroke
// and get filtered then).
Some(end)
}
/// If a Kitty keyboard protocol CSI fragment starts at `chars[start]`,
/// return its end index (exclusive). Shape: `(ESC)? [ (? | > | =)
/// [0-9;:]* u`. The private-parameter byte (`?`, `>`, `=`) is what
/// keeps this distinct from text the user might plausibly type.
fn match_kitty_csi_fragment(chars: &[char], start: usize) -> Option<usize> {
let after_csi = if chars.get(start) == Some(&'\x1b') && chars.get(start + 1) == Some(&'[') {
start + 2
} else if chars.get(start) == Some(&'[') {
start + 1
} else {
return None;
};
let priv_byte = chars.get(after_csi)?;
if !matches!(priv_byte, '?' | '>' | '=') {
return None;
}
let mut end = after_csi + 1;
while end < chars.len() {
let ch = chars[end];
if ch == 'u' {
return Some(end + 1);
}
if ch.is_ascii_digit() || ch == ';' || ch == ':' {
end += 1;
continue;
}
return None;
}
None
}
const MAX_SUBMITTED_INPUT_CHARS: usize = 16_000;
const MAX_DRAFT_HISTORY: usize = 50;
@@ -4679,6 +4841,112 @@ mod tests {
assert_eq!(app.input, "Size 12;34M");
}
// === Bug #1915: broader terminal control-sequence fragments leaking
// into the composer during dense streaming output. The narrow SGR
// mouse-report filter installed in e63a4ba4a covers `[<…M` style
// bursts, but not OSC 8 hyperlink fragments (`]8;;http…`) or Kitty
// keyboard protocol responses (`[?u`, `[>1u`). These can arrive when
// crossterm's event reader is mid-sequence and the unparsed tail is
// delivered as individual Char(c) keystrokes that land in the input.
#[test]
fn composer_strips_osc8_hyperlink_fragment() {
let mut app = App::new(test_options(false), &Config::default());
app.use_mouse_capture = true;
app.insert_str("draft ");
// OSC 8 prefix with URL body but no terminator delivered yet —
// exactly what crossterm hands us if its event reader is
// interrupted mid-sequence and the leading ESC is consumed by the
// parser before the rest gets reclassified as Char(c).
app.insert_str("]8;;https://example.com");
assert_eq!(app.input, "draft ");
assert_eq!(app.cursor_position, "draft ".chars().count());
}
#[test]
fn composer_strips_closing_osc8_fragment() {
let mut app = App::new(test_options(false), &Config::default());
app.use_mouse_capture = true;
app.insert_str("hello ");
// The closing wrapper `]8;;` (with a stray ST `\\` from a
// chopped escape) can arrive on its own when the parser ate
// the start of the sequence in a previous read but caught the
// tail as keystrokes.
app.insert_str("]8;;\\");
assert_eq!(app.input, "hello ");
assert_eq!(app.cursor_position, "hello ".chars().count());
}
#[test]
fn composer_strips_kitty_keyboard_protocol_fragment() {
let mut app = App::new(test_options(false), &Config::default());
app.use_mouse_capture = true;
app.insert_str("ready ");
// Kitty keyboard protocol responses look like `\x1b[?1u`,
// `\x1b[>1u`, or `\x1b[?u`. With the ESC consumed, the tail
// shape is `[?…u` or `[>…u`.
app.insert_str("[?1u[>1u[?u");
assert_eq!(app.input, "ready ");
assert_eq!(app.cursor_position, "ready ".chars().count());
}
#[test]
fn composer_strips_mixed_control_sequence_burst() {
let mut app = App::new(test_options(false), &Config::default());
app.use_mouse_capture = true;
app.insert_str("hi");
// Mixed dense burst combining all three fragment families
// described in #1915.
app.insert_str("[<35;44;18M]8;;https://example.com[?1u");
assert_eq!(app.input, "hi");
assert_eq!(app.cursor_position, 2);
}
#[test]
fn composer_keeps_legitimate_url_text_with_mouse_capture_enabled() {
let mut app = App::new(test_options(false), &Config::default());
app.use_mouse_capture = true;
// URLs typed by the user must survive the filter — only
// recognized control-sequence shapes are stripped.
app.insert_str("see https://example.com/path?a=1&b=2 for info");
assert_eq!(app.input, "see https://example.com/path?a=1&b=2 for info");
}
#[test]
fn composer_keeps_legitimate_bracket_question_text() {
let mut app = App::new(test_options(false), &Config::default());
app.use_mouse_capture = true;
// Text that uses brackets, question marks, and lowercase `u` —
// shapes that overlap Kitty fragments — must not be eaten.
app.insert_str("[is this ok?] sure");
assert_eq!(app.input, "[is this ok?] sure");
}
#[test]
fn composer_keeps_legitimate_closing_bracket_digit_text() {
let mut app = App::new(test_options(false), &Config::default());
app.use_mouse_capture = true;
// Plain `]8` followed by spaces and words must survive — only
// the OSC 8 shape `]8;` (with the mandatory `;` separator)
// should be treated as a fragment.
app.insert_str("array[]8 elements");
assert_eq!(app.input, "array[]8 elements");
}
// initial_onboarding_state tests
// These pin the logic that decides whether the TUI shows the
// onboarding flow (Welcome → Language → ApiKey → …) or goes