fix(tui): filter terminal control sequence fragments from composer
Prevent leaked OSC/Kitty terminal control fragments from landing in composer input while preserving legitimate overlapping user text.
This commit is contained in:
+273
-5
@@ -406,10 +406,23 @@ fn sanitize_api_key_text(text: &str) -> String {
|
||||
}
|
||||
|
||||
fn strip_raw_mouse_report_runs(input: &str, cursor: usize) -> Option<(String, usize)> {
|
||||
let chars: Vec<char> = input.chars().collect();
|
||||
let mut output = String::with_capacity(input.len());
|
||||
// First pass: strip the well-defined control-sequence fragment
|
||||
// shapes that crossterm sometimes hands us as `Char(c)` keystrokes
|
||||
// when its event reader is interrupted mid-sequence during dense
|
||||
// streaming output (#1915). This covers OSC 8 hyperlink fragments
|
||||
// (`]8;;URL`, including the closing `]8;;`) and Kitty keyboard
|
||||
// protocol fragments (`[?…u`, `[>…u`, `[?u`).
|
||||
let (after_fragments, after_fragments_cursor, fragments_changed) =
|
||||
strip_control_sequence_fragments(input, cursor);
|
||||
|
||||
// Second pass: the existing run-based filter handles SGR mouse
|
||||
// reports (`[<35;44;18M`) and the multi-terminator burst shape
|
||||
// (`5;46;18M;48;18M`) introduced in e63a4ba4a. It operates on a
|
||||
// narrow char set so it can't be confused with user-typed text.
|
||||
let chars: Vec<char> = after_fragments.chars().collect();
|
||||
let mut output = String::with_capacity(after_fragments.len());
|
||||
let mut new_cursor = 0usize;
|
||||
let mut changed = false;
|
||||
let mut changed = fragments_changed;
|
||||
let mut index = 0usize;
|
||||
|
||||
while index < chars.len() {
|
||||
@@ -433,7 +446,7 @@ fn strip_raw_mouse_report_runs(input: &str, cursor: usize) -> Option<(String, us
|
||||
continue;
|
||||
}
|
||||
for (offset, ch) in run.iter().copied().enumerate() {
|
||||
if start + offset < cursor {
|
||||
if start + offset < after_fragments_cursor {
|
||||
new_cursor += 1;
|
||||
}
|
||||
output.push(ch);
|
||||
@@ -441,7 +454,7 @@ fn strip_raw_mouse_report_runs(input: &str, cursor: usize) -> Option<(String, us
|
||||
continue;
|
||||
}
|
||||
|
||||
if index < cursor {
|
||||
if index < after_fragments_cursor {
|
||||
new_cursor += 1;
|
||||
}
|
||||
output.push(chars[index]);
|
||||
@@ -566,6 +579,155 @@ fn looks_like_raw_mouse_report_fragment(run: &[char]) -> bool {
|
||||
&& run.iter().any(|ch| matches!(ch, 'M' | 'm'))
|
||||
}
|
||||
|
||||
/// Scan `input` for control-sequence fragment shapes (#1915) — OSC 8
|
||||
/// hyperlinks and Kitty keyboard protocol responses — and excise each
|
||||
/// match. Returns `(output, new_cursor, changed)`. Cursor positions
|
||||
/// inside an excised fragment are moved to the fragment's start.
|
||||
///
|
||||
/// The match shapes are deliberately narrow so legitimate text like
|
||||
/// `[is this ok?]` or a typed URL survives untouched:
|
||||
///
|
||||
/// - **OSC 8**: `(\x1b?)] 8 ; ...` consuming everything up to the
|
||||
/// first BEL (`\x07`), `\x1b\\`, lone `\\`, or the next `\x1b]8;`
|
||||
/// block — terminator characters are optional because crossterm may
|
||||
/// have already consumed them.
|
||||
/// - **Kitty CSI**: `(\x1b?) [ (? | > | =) ... u` — the `?`/`>`/`=`
|
||||
/// private-parameter prefix is what distinguishes a Kitty response
|
||||
/// from a user-typed `[…u` (which is exceedingly rare and would
|
||||
/// need an explicit private-parameter byte to be a real CSI).
|
||||
fn strip_control_sequence_fragments(input: &str, cursor: usize) -> (String, usize, bool) {
|
||||
let chars: Vec<char> = input.chars().collect();
|
||||
let mut output = String::with_capacity(input.len());
|
||||
let mut new_cursor = 0usize;
|
||||
let mut changed = false;
|
||||
let mut index = 0usize;
|
||||
|
||||
while index < chars.len() {
|
||||
if let Some(end) = match_osc8_fragment(&chars, index) {
|
||||
// The excised span contributes nothing to `output`, so
|
||||
// `new_cursor` simply doesn't tick for any of those
|
||||
// characters. A cursor that was inside the span ends up at
|
||||
// the fragment's start position in the rewritten input,
|
||||
// which matches the existing run-stripper's behavior.
|
||||
index = end;
|
||||
changed = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(end) = match_kitty_csi_fragment(&chars, index) {
|
||||
index = end;
|
||||
changed = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if index < cursor {
|
||||
new_cursor += 1;
|
||||
}
|
||||
output.push(chars[index]);
|
||||
index += 1;
|
||||
}
|
||||
|
||||
let cursor = new_cursor.min(char_count(&output));
|
||||
(output, cursor, changed)
|
||||
}
|
||||
|
||||
/// If an OSC 8 hyperlink fragment starts at `chars[start]`, return its
|
||||
/// end index (exclusive). The leading `ESC` is optional because
|
||||
/// crossterm's event parser often consumes it before reclassifying the
|
||||
/// tail as keystrokes.
|
||||
fn match_osc8_fragment(chars: &[char], start: usize) -> Option<usize> {
|
||||
let body_start = if chars.get(start) == Some(&'\x1b')
|
||||
&& chars.get(start + 1) == Some(&']')
|
||||
&& chars.get(start + 2) == Some(&'8')
|
||||
&& chars.get(start + 3) == Some(&';')
|
||||
{
|
||||
start + 4
|
||||
} else if chars.get(start) == Some(&']')
|
||||
&& chars.get(start + 1) == Some(&'8')
|
||||
&& chars.get(start + 2) == Some(&';')
|
||||
{
|
||||
start + 3
|
||||
} else {
|
||||
return None;
|
||||
};
|
||||
|
||||
// After `]8;` we expect the OSC 8 payload: an optional second `;`
|
||||
// (params separator), then the URL (or empty for the closing
|
||||
// wrapper), then a terminator. We deliberately stop at the first
|
||||
// ASCII whitespace so a typed `]8;` followed by real prose can't
|
||||
// swallow the user's words — real OSC 8 URLs don't contain spaces.
|
||||
let mut end = body_start;
|
||||
while end < chars.len() {
|
||||
let ch = chars[end];
|
||||
// BEL terminator.
|
||||
if ch == '\x07' {
|
||||
return Some(end + 1);
|
||||
}
|
||||
// `ESC \\` string terminator (ST).
|
||||
if ch == '\x1b' && chars.get(end + 1) == Some(&'\\') {
|
||||
return Some(end + 2);
|
||||
}
|
||||
// Lone `\\` — crossterm sometimes delivers ST with the leading
|
||||
// ESC already consumed, leaving just `\\` as a Char keystroke.
|
||||
if ch == '\\' {
|
||||
return Some(end + 1);
|
||||
}
|
||||
// Start of the next OSC 8 wrapper (closing `]8;;` glued to the
|
||||
// body) — close the current fragment here so the next iteration
|
||||
// matches that one separately.
|
||||
if ch == '\x1b' && chars.get(end + 1) == Some(&']') {
|
||||
return Some(end);
|
||||
}
|
||||
if ch == ']' && chars.get(end + 1) == Some(&'8') && chars.get(end + 2) == Some(&';') {
|
||||
return Some(end);
|
||||
}
|
||||
if ch.is_whitespace() {
|
||||
// We never crossed a terminator, so this isn't a real
|
||||
// fragment — give up rather than eat user prose.
|
||||
return None;
|
||||
}
|
||||
end += 1;
|
||||
}
|
||||
|
||||
// Reached end of input without a terminator or whitespace. Treat as
|
||||
// a fragment in flight (its tail will arrive on a later keystroke
|
||||
// and get filtered then).
|
||||
Some(end)
|
||||
}
|
||||
|
||||
/// If a Kitty keyboard protocol CSI fragment starts at `chars[start]`,
|
||||
/// return its end index (exclusive). Shape: `(ESC)? [ (? | > | =)
|
||||
/// [0-9;:]* u`. The private-parameter byte (`?`, `>`, `=`) is what
|
||||
/// keeps this distinct from text the user might plausibly type.
|
||||
fn match_kitty_csi_fragment(chars: &[char], start: usize) -> Option<usize> {
|
||||
let after_csi = if chars.get(start) == Some(&'\x1b') && chars.get(start + 1) == Some(&'[') {
|
||||
start + 2
|
||||
} else if chars.get(start) == Some(&'[') {
|
||||
start + 1
|
||||
} else {
|
||||
return None;
|
||||
};
|
||||
|
||||
let priv_byte = chars.get(after_csi)?;
|
||||
if !matches!(priv_byte, '?' | '>' | '=') {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut end = after_csi + 1;
|
||||
while end < chars.len() {
|
||||
let ch = chars[end];
|
||||
if ch == 'u' {
|
||||
return Some(end + 1);
|
||||
}
|
||||
if ch.is_ascii_digit() || ch == ';' || ch == ':' {
|
||||
end += 1;
|
||||
continue;
|
||||
}
|
||||
return None;
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
const MAX_SUBMITTED_INPUT_CHARS: usize = 16_000;
|
||||
const MAX_DRAFT_HISTORY: usize = 50;
|
||||
|
||||
@@ -4679,6 +4841,112 @@ mod tests {
|
||||
assert_eq!(app.input, "Size 12;34M");
|
||||
}
|
||||
|
||||
// === Bug #1915: broader terminal control-sequence fragments leaking
|
||||
// into the composer during dense streaming output. The narrow SGR
|
||||
// mouse-report filter installed in e63a4ba4a covers `[<…M` style
|
||||
// bursts, but not OSC 8 hyperlink fragments (`]8;;http…`) or Kitty
|
||||
// keyboard protocol responses (`[?u`, `[>1u`). These can arrive when
|
||||
// crossterm's event reader is mid-sequence and the unparsed tail is
|
||||
// delivered as individual Char(c) keystrokes that land in the input.
|
||||
|
||||
#[test]
|
||||
fn composer_strips_osc8_hyperlink_fragment() {
|
||||
let mut app = App::new(test_options(false), &Config::default());
|
||||
app.use_mouse_capture = true;
|
||||
app.insert_str("draft ");
|
||||
|
||||
// OSC 8 prefix with URL body but no terminator delivered yet —
|
||||
// exactly what crossterm hands us if its event reader is
|
||||
// interrupted mid-sequence and the leading ESC is consumed by the
|
||||
// parser before the rest gets reclassified as Char(c).
|
||||
app.insert_str("]8;;https://example.com");
|
||||
|
||||
assert_eq!(app.input, "draft ");
|
||||
assert_eq!(app.cursor_position, "draft ".chars().count());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn composer_strips_closing_osc8_fragment() {
|
||||
let mut app = App::new(test_options(false), &Config::default());
|
||||
app.use_mouse_capture = true;
|
||||
app.insert_str("hello ");
|
||||
|
||||
// The closing wrapper `]8;;` (with a stray ST `\\` from a
|
||||
// chopped escape) can arrive on its own when the parser ate
|
||||
// the start of the sequence in a previous read but caught the
|
||||
// tail as keystrokes.
|
||||
app.insert_str("]8;;\\");
|
||||
|
||||
assert_eq!(app.input, "hello ");
|
||||
assert_eq!(app.cursor_position, "hello ".chars().count());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn composer_strips_kitty_keyboard_protocol_fragment() {
|
||||
let mut app = App::new(test_options(false), &Config::default());
|
||||
app.use_mouse_capture = true;
|
||||
app.insert_str("ready ");
|
||||
|
||||
// Kitty keyboard protocol responses look like `\x1b[?1u`,
|
||||
// `\x1b[>1u`, or `\x1b[?u`. With the ESC consumed, the tail
|
||||
// shape is `[?…u` or `[>…u`.
|
||||
app.insert_str("[?1u[>1u[?u");
|
||||
|
||||
assert_eq!(app.input, "ready ");
|
||||
assert_eq!(app.cursor_position, "ready ".chars().count());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn composer_strips_mixed_control_sequence_burst() {
|
||||
let mut app = App::new(test_options(false), &Config::default());
|
||||
app.use_mouse_capture = true;
|
||||
app.insert_str("hi");
|
||||
|
||||
// Mixed dense burst combining all three fragment families
|
||||
// described in #1915.
|
||||
app.insert_str("[<35;44;18M]8;;https://example.com[?1u");
|
||||
|
||||
assert_eq!(app.input, "hi");
|
||||
assert_eq!(app.cursor_position, 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn composer_keeps_legitimate_url_text_with_mouse_capture_enabled() {
|
||||
let mut app = App::new(test_options(false), &Config::default());
|
||||
app.use_mouse_capture = true;
|
||||
|
||||
// URLs typed by the user must survive the filter — only
|
||||
// recognized control-sequence shapes are stripped.
|
||||
app.insert_str("see https://example.com/path?a=1&b=2 for info");
|
||||
|
||||
assert_eq!(app.input, "see https://example.com/path?a=1&b=2 for info");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn composer_keeps_legitimate_bracket_question_text() {
|
||||
let mut app = App::new(test_options(false), &Config::default());
|
||||
app.use_mouse_capture = true;
|
||||
|
||||
// Text that uses brackets, question marks, and lowercase `u` —
|
||||
// shapes that overlap Kitty fragments — must not be eaten.
|
||||
app.insert_str("[is this ok?] sure");
|
||||
|
||||
assert_eq!(app.input, "[is this ok?] sure");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn composer_keeps_legitimate_closing_bracket_digit_text() {
|
||||
let mut app = App::new(test_options(false), &Config::default());
|
||||
app.use_mouse_capture = true;
|
||||
|
||||
// Plain `]8` followed by spaces and words must survive — only
|
||||
// the OSC 8 shape `]8;` (with the mandatory `;` separator)
|
||||
// should be treated as a fragment.
|
||||
app.insert_str("array[]8 elements");
|
||||
|
||||
assert_eq!(app.input, "array[]8 elements");
|
||||
}
|
||||
|
||||
// initial_onboarding_state tests
|
||||
// These pin the logic that decides whether the TUI shows the
|
||||
// onboarding flow (Welcome → Language → ApiKey → …) or goes
|
||||
|
||||
Reference in New Issue
Block a user