From d90031f06fbc14d6614359027016a730fad5c432 Mon Sep 17 00:00:00 2001 From: Paulo Aboim Pinto Date: Sun, 7 Jun 2026 16:12:12 +0200 Subject: [PATCH] Add Gherkin acceptance E2E harness example --- Cargo.lock | 364 +++++++++- crates/tui/Cargo.toml | 1 + .../tui/tests/directory_listing_acceptance.rs | 182 +++++ crates/tui/tests/eval_harness.rs | 95 ++- .../features/list_dir_happy_path.feature | 10 + .../tool_lifecycle_happy_path.feature | 31 + crates/tui/tests/tool_lifecycle_acceptance.rs | 630 ++++++++++++++++++ 7 files changed, 1270 insertions(+), 43 deletions(-) create mode 100644 crates/tui/tests/directory_listing_acceptance.rs create mode 100644 crates/tui/tests/features/list_dir_happy_path.feature create mode 100644 crates/tui/tests/features/tool_lifecycle_happy_path.feature create mode 100644 crates/tui/tests/tool_lifecycle_acceptance.rs diff --git a/Cargo.lock b/Cargo.lock index f3b55c66..c1011ba2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -160,7 +160,7 @@ version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -171,7 +171,7 @@ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -724,6 +724,7 @@ dependencies = [ "anstyle", "clap_lex", "strsim 0.11.1", + "terminal_size", ] [[package]] @@ -973,6 +974,7 @@ dependencies = [ "codewhale-tools", "colored", "crossterm 0.28.1", + "cucumber", "dirs", "dotenvy", "fd-lock", @@ -1106,6 +1108,18 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "console" +version = "0.16.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d64e8af5551369d19cf50138de61f1c42074ab970f74e99be916646777f8fc87" +dependencies = [ + "encode_unicode", + "libc", + "unicode-width 0.2.2", + "windows-sys 0.61.2", +] + [[package]] name = "convert_case" version = "0.6.0" @@ -1299,6 +1313,63 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "cucumber" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96a87e18d925b19ebe0fd47ea45316abd216d81ec0879c2448c3f9a0e9da62be" +dependencies = [ + "anyhow", + "clap", + "console", + "cucumber-codegen", + "cucumber-expressions", + "derive_more 2.1.1", + "either", + "futures", + "gherkin", + "globwalk", + "humantime", + "inventory", + "itertools 0.14.0", + "linked-hash-map", + "pin-project", + "ref-cast", + "regex", + "sealed", + "smart-default", +] + +[[package]] +name = "cucumber-codegen" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2fc8a8bbb73af3230db699e8690c5c786655f75eb89e5f18d76055fa1a9a4d" +dependencies = [ + "cucumber-expressions", + "inflections", + "itertools 0.14.0", + "proc-macro2", + "quote", + "regex", + "syn 2.0.117", + "synthez", +] + +[[package]] +name = "cucumber-expressions" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6401038de3af44fe74e6fccdb8a5b7db7ba418f480c8e9ad584c6f65c05a27a6" +dependencies = [ + "derive_more 2.1.1", + "either", + "nom 8.0.0", + "nom_locate", + "regex", + "regex-syntax 0.8.8", +] + [[package]] name = "darling" version = "0.23.0" @@ -1465,6 +1536,7 @@ dependencies = [ "quote", "rustc_version", "syn 2.0.117", + "unicode-xid", ] [[package]] @@ -1522,7 +1594,7 @@ dependencies = [ "libc", "option-ext", "redox_users 0.5.2", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -1638,6 +1710,12 @@ dependencies = [ "log", ] +[[package]] +name = "encode_unicode" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" + [[package]] name = "encoding_rs" version = "0.8.35" @@ -1702,7 +1780,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -1923,9 +2001,9 @@ dependencies = [ [[package]] name = "futures" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" dependencies = [ "futures-channel", "futures-core", @@ -1938,9 +2016,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" dependencies = [ "futures-core", "futures-sink", @@ -1948,15 +2026,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" [[package]] name = "futures-executor" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" dependencies = [ "futures-core", "futures-task", @@ -1965,9 +2043,9 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" +checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" [[package]] name = "futures-lite" @@ -1984,9 +2062,9 @@ dependencies = [ [[package]] name = "futures-macro" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" dependencies = [ "proc-macro2", "quote", @@ -1995,21 +2073,21 @@ dependencies = [ [[package]] name = "futures-sink" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" [[package]] name = "futures-task" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" [[package]] name = "futures-util" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" dependencies = [ "futures-channel", "futures-core", @@ -2019,7 +2097,6 @@ dependencies = [ "futures-task", "memchr", "pin-project-lite", - "pin-utils", "slab", ] @@ -2077,6 +2154,23 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "gherkin" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e2c0d8c632f8a251ce9a8198079b1022adc586ff4e3d33e18debd40eb463b31" +dependencies = [ + "heck", + "peg", + "quote", + "serde", + "serde_json", + "syn 2.0.117", + "textwrap 0.16.2", + "thiserror 2.0.18", + "typed-builder", +] + [[package]] name = "globset" version = "0.4.18" @@ -2090,6 +2184,17 @@ dependencies = [ "regex-syntax 0.8.8", ] +[[package]] +name = "globwalk" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf760ebf69878d9fd8f110c89703d90ce35095324d1f1edcb595c63945ee757" +dependencies = [ + "bitflags 2.12.1", + "ignore", + "walkdir", +] + [[package]] name = "h2" version = "0.4.13" @@ -2240,6 +2345,12 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" +[[package]] +name = "humantime" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" + [[package]] name = "hybrid-array" version = "0.4.11" @@ -2520,6 +2631,12 @@ dependencies = [ "rustversion", ] +[[package]] +name = "inflections" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a257582fdcde896fd96463bf2d40eefea0580021c0712a0e2b028b60b47a837a" + [[package]] name = "inout" version = "0.1.4" @@ -2576,7 +2693,7 @@ checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" dependencies = [ "hermit-abi", "libc", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -2794,6 +2911,12 @@ dependencies = [ "bitflags 2.12.1", ] +[[package]] +name = "linked-hash-map" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" + [[package]] name = "linux-keyutils" version = "0.2.5" @@ -2878,7 +3001,7 @@ dependencies = [ "itoa", "log", "md-5", - "nom", + "nom 7.1.3", "rangemap", "time", "weezl", @@ -3096,13 +3219,33 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "nom" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405" +dependencies = [ + "memchr", +] + +[[package]] +name = "nom_locate" +version = "5.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b577e2d69827c4740cba2b52efaad1c4cc7c73042860b199710b3575c68438d" +dependencies = [ + "bytecount", + "memchr", + "nom 8.0.0", +] + [[package]] name = "nu-ansi-term" version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -3392,6 +3535,33 @@ dependencies = [ "unicode-normalization", ] +[[package]] +name = "peg" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f76678828272f177ac33b7e2ac2e3e73cc6c1cd1e3e387928aa69562fa51367" +dependencies = [ + "peg-macros", + "peg-runtime", +] + +[[package]] +name = "peg-macros" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "636d60acf97633e48d266d7415a9355d4389cea327a193f87df395d88cd2b14d" +dependencies = [ + "peg-runtime", + "proc-macro2", + "quote", +] + +[[package]] +name = "peg-runtime" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9555b1514d2d99d78150d3c799d4c357a3e2c2a8062cd108e93a06d9057629c5" + [[package]] name = "percent-encoding" version = "2.3.2" @@ -3503,6 +3673,26 @@ dependencies = [ "siphasher", ] +[[package]] +name = "pin-project" +version = "1.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2466b2336ed02bcdca6b294417127b90ec92038d1d5c4fbeac971a922e0e0924" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c96395f0a926bc13b1c17622aaddda1ecb55d49c8f1bf9777e4d877800a43f8b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "pin-project-lite" version = "0.2.16" @@ -4038,7 +4228,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.11.0", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -4094,7 +4284,7 @@ dependencies = [ "security-framework 3.5.1", "security-framework-sys", "webpki-root-certs", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -4262,6 +4452,17 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "sealed" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22f968c5ea23d555e670b449c1c5e7b2fc399fdaec1d304a17cd48e288abc107" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "secret-service" version = "4.0.0" @@ -4566,6 +4767,23 @@ version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +[[package]] +name = "smart-default" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eb01866308440fc64d6c44d9e86c5cc17adfe33c4d6eed55da9145044d0ffc1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "smawk" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c388c1b5e93756d0c740965c41e8822f866621d41acbdf6336a6a168f8840c" + [[package]] name = "socket2" version = "0.6.1" @@ -4618,7 +4836,7 @@ dependencies = [ "starlark_syntax", "static_assertions", "strsim 0.10.0", - "textwrap", + "textwrap 0.11.0", "thiserror 1.0.69", ] @@ -4771,6 +4989,39 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "synthez" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d8a928f38f1bc873f28e0d2ba8298ad65374a6ac2241dabd297271531a736cd" +dependencies = [ + "syn 2.0.117", + "synthez-codegen", + "synthez-core", +] + +[[package]] +name = "synthez-codegen" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fb83b8df4238e11746984dfb3819b155cd270de0e25847f45abad56b3671047" +dependencies = [ + "syn 2.0.117", + "synthez-core", +] + +[[package]] +name = "synthez-core" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "906fba967105d822e7c7ed60477b5e76116724d33de68a585681fb253fc30d5c" +dependencies = [ + "proc-macro2", + "quote", + "sealed", + "syn 2.0.117", +] + [[package]] name = "tar" version = "0.4.46" @@ -4792,7 +5043,7 @@ dependencies = [ "getrandom 0.3.4", "once_cell", "rustix 1.1.3", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -4806,6 +5057,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "terminal_size" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "230a1b821ccbd75b185820a1f1ff7b14d21da1e442e22c0863ea5f08771a8874" +dependencies = [ + "rustix 1.1.3", + "windows-sys 0.61.2", +] + [[package]] name = "terminfo" version = "0.9.0" @@ -4813,7 +5074,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4ea810f0692f9f51b382fff5893887bb4580f5fa246fde546e0b13e7fcee662" dependencies = [ "fnv", - "nom", + "nom 7.1.3", "phf", "phf_codegen", ] @@ -4878,6 +5139,17 @@ dependencies = [ "unicode-width 0.1.14", ] +[[package]] +name = "textwrap" +version = "0.16.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c13547615a44dc9c452a8a534638acdf07120d4b6847c8178705da06306a3057" +dependencies = [ + "smawk", + "unicode-linebreak", + "unicode-width 0.2.2", +] + [[package]] name = "thiserror" version = "1.0.69" @@ -5286,6 +5558,26 @@ dependencies = [ "pom", ] +[[package]] +name = "typed-builder" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31aa81521b70f94402501d848ccc0ecaa8f93c8eb6999eb9747e72287757ffda" +dependencies = [ + "typed-builder-macro", +] + +[[package]] +name = "typed-builder-macro" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "076a02dc54dd46795c2e9c8282ed40bcfb1e22747e955de9389a1de28190fb26" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "typenum" version = "1.20.0" @@ -5306,7 +5598,7 @@ checksum = "f2f6fb2847f6742cd76af783a2a2c49e9375d0a111c7bef6f71cd9e738c72d6e" dependencies = [ "memoffset 0.9.1", "tempfile", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -5327,6 +5619,12 @@ version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" +[[package]] +name = "unicode-linebreak" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b09c83c3c29d37506a3e260c08c03743a6bb66a9cd432c6934ab501a190571f" + [[package]] name = "unicode-normalization" version = "0.1.25" @@ -5723,7 +6021,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] diff --git a/crates/tui/Cargo.toml b/crates/tui/Cargo.toml index c4efe10e..c0bb09a2 100644 --- a/crates/tui/Cargo.toml +++ b/crates/tui/Cargo.toml @@ -76,6 +76,7 @@ flate2 = "1.1" sha2 = "0.10" [dev-dependencies] +cucumber = "0.23.0" wiremock = "0.6" pretty_assertions = "1.4" vt100 = "0.15" diff --git a/crates/tui/tests/directory_listing_acceptance.rs b/crates/tui/tests/directory_listing_acceptance.rs new file mode 100644 index 00000000..c998dcf6 --- /dev/null +++ b/crates/tui/tests/directory_listing_acceptance.rs @@ -0,0 +1,182 @@ +//! Cucumber acceptance test for directory listing. + +use std::path::{Path, PathBuf}; +use std::process::Command; + +use cucumber::{World as _, gherkin::Step, given, then, when, writer::Stats as _}; +use tempfile::TempDir; + +const FEATURE_NAME: &str = "Directory listing acceptance"; +const FEATURE_PATH: &str = concat!( + env!("CARGO_MANIFEST_DIR"), + "/tests/features/list_dir_happy_path.feature" +); +const HAPPY_PATH_SCENARIO: &str = "Happy path lists a workspace directory"; + +#[derive(Debug, Default, cucumber::World)] +struct DirectoryListingWorld { + record_dir: Option, + report: Option, + fixture_records: Vec, +} + +#[given("an offline CodeWhale evaluation workspace")] +fn offline_codewhale_evaluation_workspace(world: &mut DirectoryListingWorld) { + world.record_dir = Some(TempDir::new().expect("record tempdir")); +} + +#[when(regex = r#"^the user asks "([^"]+)"$"#)] +fn user_asks(world: &mut DirectoryListingWorld, prompt: String) { + assert_eq!(prompt, "list the current directory"); + + let record_dir = world + .record_dir + .as_ref() + .expect("offline evaluation workspace should be initialized"); + let output = Command::new(codewhale_tui_binary()) + .args(["eval", "--json", "--shell-command", "echo eval-harness"]) + .arg("--record") + .arg(record_dir.path()) + .output() + .expect("run codewhale-tui eval"); + + assert!( + output.status.success(), + "eval command failed\nstdout:\n{}\nstderr:\n{}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + + world.report = Some( + serde_json::from_slice(&output.stdout) + .expect("eval --json should emit a serializable report"), + ); + world.fixture_records = read_jsonl_records(&record_dir.path().join("offline-tool-loop.jsonl")); +} + +#[then(regex = r#"^the simulated LLM should call the "([^"]+)" tool$"#)] +fn simulated_llm_should_call_tool(world: &mut DirectoryListingWorld, expected_tool: String) { + let first_step = first_report_step(world); + + assert_eq!( + first_step.get("kind").and_then(|value| value.as_str()), + Some("List") + ); + assert_eq!( + first_step.get("tool_name").and_then(|value| value.as_str()), + Some(expected_tool.as_str()) + ); + assert_eq!( + first_step.get("success").and_then(|value| value.as_bool()), + Some(true) + ); + + let first_record = world + .fixture_records + .first() + .expect("recorded list_dir fixture"); + assert_eq!( + first_record + .get("request") + .and_then(|request| request.get("step")) + .and_then(|step| step.as_str()), + Some(expected_tool.as_str()) + ); +} + +#[then("the tool output should include:")] +fn tool_output_should_include(world: &mut DirectoryListingWorld, step: &Step) { + let first_step = first_report_step(world); + let list_output = first_step + .get("output") + .and_then(|value| value.as_str()) + .expect("list_dir output"); + + for expected_entry in data_table_column(step, "entry") { + assert!( + list_output.contains(&expected_entry), + "list_dir output should include {expected_entry}: {list_output}" + ); + } +} + +#[tokio::test(flavor = "current_thread")] +async fn happy_path_lists_a_workspace_directory() { + run_scenario(HAPPY_PATH_SCENARIO).await; +} + +async fn run_scenario(name: &'static str) { + let writer = DirectoryListingWorld::cucumber() + .fail_on_skipped() + .with_default_cli() + .filter_run(FEATURE_PATH, move |feature, _, scenario| { + feature.name == FEATURE_NAME && scenario.name == name + }) + .await; + assert_eq!(writer.failed_steps(), 0, "scenario failed: {name}"); + assert_eq!(writer.skipped_steps(), 0, "scenario skipped steps: {name}"); + assert_eq!(writer.passed_steps(), 4, "scenario did not run: {name}"); +} + +fn first_report_step(world: &DirectoryListingWorld) -> &serde_json::Value { + world + .report + .as_ref() + .expect("evaluation report should exist") + .get("steps") + .and_then(|value| value.as_array()) + .and_then(|steps| steps.first()) + .expect("report should include at least one step") +} + +fn data_table_column(step: &Step, header: &str) -> Vec { + let table = step + .table + .as_ref() + .expect("step should include a data table"); + let mut rows = table.rows.iter(); + let header_row = rows.next().expect("data table should include a header"); + let column_index = header_row + .iter() + .position(|value| value == header) + .expect("data table should include expected header"); + + let values: Vec = rows + .map(|row| { + row.get(column_index) + .unwrap_or_else(|| panic!("data table row missing {header} value")) + .clone() + }) + .collect(); + assert!( + !values.is_empty(), + "data table should include at least one {header} value" + ); + values +} + +fn read_jsonl_records(path: &Path) -> Vec { + std::fs::read_to_string(path) + .expect("read fixture records") + .lines() + .filter(|line| !line.trim().is_empty()) + .map(|line| serde_json::from_str(line).expect("fixture line should parse")) + .collect() +} + +fn codewhale_tui_binary() -> PathBuf { + if let Some(path) = option_env!("CARGO_BIN_EXE_codewhale-tui") { + return PathBuf::from(path); + } + if let Ok(path) = std::env::var("CARGO_BIN_EXE_codewhale-tui") { + return PathBuf::from(path); + } + + let mut path = std::env::current_exe().expect("current test executable path"); + path.pop(); + if path.ends_with("deps") { + path.pop(); + } + path.push(format!("codewhale-tui{}", std::env::consts::EXE_SUFFIX)); + path +} diff --git a/crates/tui/tests/eval_harness.rs b/crates/tui/tests/eval_harness.rs index 00a5d26b..f44d8cd9 100644 --- a/crates/tui/tests/eval_harness.rs +++ b/crates/tui/tests/eval_harness.rs @@ -2,13 +2,23 @@ use std::fs; +use tempfile::tempdir; + #[path = "../src/eval.rs"] mod eval; #[path = "../src/shell_dispatcher.rs"] mod shell_dispatcher; -use eval::{EvalHarness, EvalHarnessConfig, ScenarioStepKind}; -use tempfile::tempdir; +use eval::{EvalHarness, EvalHarnessConfig, FixtureRecord, ScenarioStepKind}; + +const HAPPY_PATH_TOOL_LOOP: [ScenarioStepKind; 6] = [ + ScenarioStepKind::List, + ScenarioStepKind::Read, + ScenarioStepKind::Search, + ScenarioStepKind::Edit, + ScenarioStepKind::ApplyPatch, + ScenarioStepKind::ExecShell, +]; #[test] fn runs_offline_tool_loop_successfully() { @@ -26,14 +36,7 @@ fn runs_offline_tool_loop_successfully() { assert!(!run.scenario_name.is_empty()); assert!(run.workspace_summary.file_count >= 3); - for kind in [ - ScenarioStepKind::List, - ScenarioStepKind::Read, - ScenarioStepKind::Search, - ScenarioStepKind::Edit, - ScenarioStepKind::ApplyPatch, - ScenarioStepKind::ExecShell, - ] { + for kind in HAPPY_PATH_TOOL_LOOP { let stats = run .metrics .per_tool @@ -53,6 +56,78 @@ fn runs_offline_tool_loop_successfully() { assert_eq!(report.metrics.success, run.metrics.success); } +#[test] +fn acceptance_happy_path_records_simulated_llm_tool_plan() { + let record_dir = tempdir().expect("tempdir"); + let scenario_name = "issue-2791-happy-path-tool-loop"; + let config = EvalHarnessConfig { + scenario_name: scenario_name.to_string(), + record_dir: Some(record_dir.path().to_path_buf()), + ..EvalHarnessConfig::default() + }; + let harness = EvalHarness::new(config); + + let run = harness.run().expect("happy-path acceptance run"); + + assert!(run.metrics.success, "expected success metrics: {run:#?}"); + assert_eq!(run.metrics.tool_errors, 0); + assert_eq!(run.metrics.steps, HAPPY_PATH_TOOL_LOOP.len()); + + let actual_tool_names: Vec<&str> = run.steps.iter().map(|step| step.tool_name).collect(); + let expected_tool_names: Vec<&str> = HAPPY_PATH_TOOL_LOOP + .iter() + .map(|kind| kind.tool_name()) + .collect(); + assert_eq!(actual_tool_names, expected_tool_names); + + let scenario_file = record_dir.path().join(format!("{scenario_name}.jsonl")); + let records = read_fixture_records(&scenario_file); + assert_eq!(records.len(), HAPPY_PATH_TOOL_LOOP.len()); + + for (record, kind) in records.iter().zip(HAPPY_PATH_TOOL_LOOP) { + assert_eq!( + record.request.get("step").and_then(|value| value.as_str()), + Some(kind.tool_name()) + ); + + let expected_kind = format!("{kind:?}"); + assert_eq!( + record.request.get("kind").and_then(|value| value.as_str()), + Some(expected_kind.as_str()) + ); + + let event = record + .response_events + .first() + .expect("simulated LLM fixture should include a response event"); + assert_eq!( + event.get("type").and_then(|value| value.as_str()), + Some("ok") + ); + assert!( + event + .get("output") + .and_then(|value| value.as_str()) + .is_some_and(|output| !output.is_empty()), + "fixture event should include non-empty tool output" + ); + } + + let notes_path = run.workspace_root().join("notes.txt"); + let notes = fs::read_to_string(¬es_path).expect("notes.txt should exist"); + assert!(notes.contains("edited = true")); + assert!(notes.contains("todo: offline metrics (patched)")); +} + +fn read_fixture_records(path: &std::path::Path) -> Vec { + fs::read_to_string(path) + .expect("read fixture records") + .lines() + .filter(|line| !line.trim().is_empty()) + .map(|line| serde_json::from_str(line).expect("fixture line should parse")) + .collect() +} + #[test] fn records_tool_errors_when_step_fails() { let config = EvalHarnessConfig { diff --git a/crates/tui/tests/features/list_dir_happy_path.feature b/crates/tui/tests/features/list_dir_happy_path.feature new file mode 100644 index 00000000..c677ff82 --- /dev/null +++ b/crates/tui/tests/features/list_dir_happy_path.feature @@ -0,0 +1,10 @@ +Feature: Directory listing acceptance + Scenario: Happy path lists a workspace directory + Given an offline CodeWhale evaluation workspace + When the user asks "list the current directory" + Then the simulated LLM should call the "list_dir" tool + And the tool output should include: + | entry | + | README.md | + | notes.txt | + | src | diff --git a/crates/tui/tests/features/tool_lifecycle_happy_path.feature b/crates/tui/tests/features/tool_lifecycle_happy_path.feature new file mode 100644 index 00000000..43c13f1c --- /dev/null +++ b/crates/tui/tests/features/tool_lifecycle_happy_path.feature @@ -0,0 +1,31 @@ +Feature: Tool call lifecycle + Scenario: Happy path lists the current directory through a tool + # This executable slice asserts the public exec stream and mocked LLM border. + # The PTY screen slice should also assert Statusline state and BlueWhale activity: + # running while the tool is executing, stopped or completed when the turn finishes. + Given an offline CodeWhale workspace containing: + | path | kind | + | README.md | file | + | notes.txt | file | + | src | folder | + And the mocked LLM will request the "list_dir" tool with: + | path | + | . | + And the mocked LLM will answer after the tool result: + | content | + | The directory contains README.md, notes.txt, and src/. | + When the user asks "list the current directory" + Then CodeWhale should send the user request to the mocked LLM + And the public tool lifecycle should show a running tool: + | status | marker | tool | input | + | running | [~] | list_dir | . | + And the public tool result should return directory entries: + | entry | kind | + | README.md | file | + | notes.txt | file | + | src | folder | + And CodeWhale should send the tool result back to the mocked LLM + And the public tool lifecycle should show a completed tool: + | status | marker | tool | input | + | completed | ✓ | list_dir | . | + And the public output should include "The directory contains README.md, notes.txt, and src/." diff --git a/crates/tui/tests/tool_lifecycle_acceptance.rs b/crates/tui/tests/tool_lifecycle_acceptance.rs new file mode 100644 index 00000000..21ecd28c --- /dev/null +++ b/crates/tui/tests/tool_lifecycle_acceptance.rs @@ -0,0 +1,630 @@ +//! Cucumber acceptance test for the public LLM/tool lifecycle. + +use std::io::Read; +use std::path::PathBuf; +use std::process::{Command, Stdio}; +use std::time::Duration; + +use cucumber::{World as _, gherkin::Step, given, then, when, writer::Stats as _}; +use serde_json::{Value, json}; +use tempfile::TempDir; +use wait_timeout::ChildExt; +use wiremock::matchers::{method, path}; +use wiremock::{Mock, MockServer, Request, ResponseTemplate}; + +const FEATURE_NAME: &str = "Tool call lifecycle"; +const FEATURE_PATH: &str = concat!( + env!("CARGO_MANIFEST_DIR"), + "/tests/features/tool_lifecycle_happy_path.feature" +); +const HAPPY_PATH_SCENARIO: &str = "Happy path lists the current directory through a tool"; +const TOOL_CALL_ID: &str = "call_list_dir"; +const TEST_MODEL: &str = "acceptance-model"; + +#[derive(Debug, Default, cucumber::World)] +struct ToolLifecycleWorld { + workspace: Option, + home: Option, + llm_server: Option, + tool_name: Option, + tool_input: Option, + final_answer: Option, + stdout: String, + stderr: String, + events: Vec, + requests: Vec, +} + +#[given("an offline CodeWhale workspace containing:")] +fn offline_codewhale_workspace_containing(world: &mut ToolLifecycleWorld, step: &Step) { + let workspace = TempDir::new().expect("workspace tempdir"); + let home = TempDir::new().expect("home tempdir"); + + for row in data_table_rows(step) { + let relative_path = row_value(&row, "path"); + let kind = row_value(&row, "kind"); + let path = workspace.path().join(relative_path); + match kind.as_str() { + "file" => std::fs::write(&path, "").expect("write workspace file"), + "folder" => std::fs::create_dir_all(&path).expect("create workspace folder"), + other => panic!("unsupported workspace entry kind: {other}"), + } + } + + world.workspace = Some(workspace); + world.home = Some(home); +} + +#[given(regex = r#"^the mocked LLM will request the "([^"]+)" tool with:$"#)] +fn mocked_llm_will_request_tool(world: &mut ToolLifecycleWorld, tool_name: String, step: &Step) { + let rows = data_table_rows(step); + assert_eq!(rows.len(), 1, "tool input table should contain one row"); + let input = Value::Object( + rows[0] + .iter() + .map(|(key, value)| (key.clone(), Value::String(value.clone()))) + .collect(), + ); + + world.tool_name = Some(tool_name); + world.tool_input = Some(input); +} + +#[given("the mocked LLM will answer after the tool result:")] +fn mocked_llm_will_answer_after_tool_result(world: &mut ToolLifecycleWorld, step: &Step) { + let rows = data_table_rows(step); + assert_eq!(rows.len(), 1, "final answer table should contain one row"); + world.final_answer = Some(row_value(&rows[0], "content")); +} + +#[when(regex = r#"^the user asks "([^"]+)"$"#)] +async fn user_asks(world: &mut ToolLifecycleWorld, prompt: String) { + let server = start_mock_llm(world).await; + let output = run_codewhale_exec(world, &server, &prompt); + + world.stdout = String::from_utf8_lossy(&output.stdout).into_owned(); + world.stderr = String::from_utf8_lossy(&output.stderr).into_owned(); + assert!( + output.status.success(), + "codewhale-tui exec failed\nstdout:\n{}\nstderr:\n{}", + world.stdout, + world.stderr + ); + + world.events = parse_stream_events(&world.stdout); + world.requests = server + .received_requests() + .await + .expect("mock server should record requests") + .into_iter() + .filter(|request| request.url.path().ends_with("/chat/completions")) + .map(|request| { + request + .body_json() + .expect("chat request body should be JSON") + }) + .collect(); + world.llm_server = Some(server); +} + +#[then("CodeWhale should send the user request to the mocked LLM")] +fn codewhale_should_send_user_request_to_mocked_llm(world: &mut ToolLifecycleWorld) { + let first_request = world + .requests + .first() + .expect("expected an initial chat request"); + + assert!( + request_contains_user_text(first_request, "list the current directory"), + "initial request should include the user prompt:\n{first_request:#}" + ); + assert!( + !request_contains_tool_result(first_request), + "initial request should not include a tool result:\n{first_request:#}" + ); +} + +#[then("the public tool lifecycle should show a running tool:")] +fn public_tool_lifecycle_should_show_running_tool(world: &mut ToolLifecycleWorld, step: &Step) { + let expected = one_table_row(step); + assert_eq!(row_value(&expected, "status"), "running"); + assert_eq!(row_value(&expected, "marker"), "[~]"); + + let event = tool_use_event(world, &row_value(&expected, "tool")); + assert_eq!( + event.get("input").and_then(|input| input.get("path")), + Some(&json!(row_value(&expected, "input"))) + ); +} + +#[then("the public tool result should return directory entries:")] +fn public_tool_result_should_return_directory_entries(world: &mut ToolLifecycleWorld, step: &Step) { + let output = tool_result_output(world); + let entries: Vec = + serde_json::from_str(output).expect("list_dir result should be JSON entries"); + + for row in data_table_rows(step) { + let expected_name = row_value(&row, "entry"); + let expected_is_dir = match row_value(&row, "kind").as_str() { + "file" => false, + "folder" => true, + other => panic!("unsupported expected entry kind: {other}"), + }; + assert!( + entries.iter().any(|entry| { + entry.get("name").and_then(Value::as_str) == Some(expected_name.as_str()) + && entry.get("is_dir").and_then(Value::as_bool) == Some(expected_is_dir) + }), + "missing {expected_name} in list_dir result:\n{output}" + ); + } +} + +#[then("CodeWhale should send the tool result back to the mocked LLM")] +fn codewhale_should_send_tool_result_back_to_mocked_llm(world: &mut ToolLifecycleWorld) { + let request = world + .requests + .iter() + .find(|request| request_contains_tool_result(request)) + .expect("expected a follow-up chat request containing the tool result"); + let tool_result = tool_result_message(request).expect("tool result message"); + assert_eq!( + tool_result + .get("tool_call_id") + .and_then(serde_json::Value::as_str), + Some(TOOL_CALL_ID) + ); + + let content = tool_result + .get("content") + .and_then(serde_json::Value::as_str) + .expect("tool result content"); + for entry in ["README.md", "notes.txt", "src"] { + assert!( + content.contains(entry), + "tool result sent to LLM should include {entry}:\n{content}" + ); + } +} + +#[then("the public tool lifecycle should show a completed tool:")] +fn public_tool_lifecycle_should_show_completed_tool(world: &mut ToolLifecycleWorld, step: &Step) { + let expected = one_table_row(step); + assert_eq!(row_value(&expected, "status"), "completed"); + assert_eq!(row_value(&expected, "marker"), "✓"); + + let event = tool_result_event(world); + assert_eq!(event.get("status").and_then(Value::as_str), Some("success")); + + let tool_use = tool_use_event(world, &row_value(&expected, "tool")); + assert_eq!( + tool_use.get("input").and_then(|input| input.get("path")), + Some(&json!(row_value(&expected, "input"))) + ); +} + +#[then(regex = r#"^the public output should include "([^"]+)"$"#)] +fn public_output_should_include(world: &mut ToolLifecycleWorld, expected: String) { + let content = world + .events + .iter() + .filter(|event| event.get("type").and_then(Value::as_str) == Some("content")) + .filter_map(|event| event.get("content").and_then(Value::as_str)) + .collect::(); + assert!( + content.contains(&expected), + "public content output should include {expected:?}:\nstdout:\n{}\nstderr:\n{}", + world.stdout, + world.stderr + ); +} + +#[tokio::test(flavor = "current_thread")] +async fn happy_path_lists_current_directory_through_tool() { + run_scenario(HAPPY_PATH_SCENARIO).await; +} + +async fn run_scenario(name: &'static str) { + let writer = ToolLifecycleWorld::cucumber() + .fail_on_skipped() + .with_default_cli() + .filter_run(FEATURE_PATH, move |feature, _, scenario| { + feature.name == FEATURE_NAME && scenario.name == name + }) + .await; + assert_eq!(writer.failed_steps(), 0, "scenario failed: {name}"); + assert_eq!(writer.skipped_steps(), 0, "scenario skipped steps: {name}"); + assert_eq!(writer.passed_steps(), 10, "scenario did not run: {name}"); +} + +async fn start_mock_llm(world: &ToolLifecycleWorld) -> MockServer { + let server = MockServer::start().await; + + Mock::given(method("GET")) + .and(path("/v1/models")) + .respond_with(json_response(json!({ + "object": "list", + "data": [{ "id": TEST_MODEL, "object": "model" }] + }))) + .mount(&server) + .await; + + Mock::given(method("POST")) + .and(path("/v1/chat/completions")) + .and(request_has_tool_result) + .respond_with(sse_response(&final_answer_sse( + world.final_answer.as_ref().expect("final LLM answer"), + ))) + .mount(&server) + .await; + + Mock::given(method("POST")) + .and(path("/v1/chat/completions")) + .and(request_has_no_tool_result) + .respond_with(sse_response(&tool_call_sse( + world.tool_name.as_ref().expect("tool name"), + world.tool_input.as_ref().expect("tool input"), + ))) + .mount(&server) + .await; + + server +} + +fn run_codewhale_exec( + world: &ToolLifecycleWorld, + server: &MockServer, + prompt: &str, +) -> std::process::Output { + let workspace = world + .workspace + .as_ref() + .expect("workspace") + .path() + .to_path_buf(); + let home = world.home.as_ref().expect("home").path().to_path_buf(); + + let mut command = Command::new(codewhale_tui_binary()); + preserve_host_env(&mut command); + command + .current_dir(&workspace) + .arg("--workspace") + .arg(&workspace) + .arg("--no-project-config") + .arg("exec") + .arg("--auto") + .arg("--model") + .arg(TEST_MODEL) + .arg("--output-format") + .arg("stream-json") + .arg(prompt) + .env("HOME", &home) + .env("USERPROFILE", &home) + .env("XDG_CONFIG_HOME", home.join(".config")) + .env("XDG_DATA_HOME", home.join(".local").join("share")) + .env("XDG_CACHE_HOME", home.join(".cache")) + .env( + "CODEWHALE_CONFIG_PATH", + home.join(".codewhale").join("config.toml"), + ) + .env( + "DEEPSEEK_CONFIG_PATH", + home.join(".deepseek").join("config.toml"), + ) + .env("DEEPSEEK_API_KEY", "ci-test-key-not-real") + .env("DEEPSEEK_BASE_URL", server.uri()) + .env("CODEWHALE_BASE_URL", server.uri()) + .env("DEEPSEEK_MODEL", TEST_MODEL) + .env("CODEWHALE_MODEL", TEST_MODEL) + .env("RUST_LOG", "warn") + .stdout(Stdio::piped()) + .stderr(Stdio::piped()); + + std::fs::create_dir_all(home.join(".codewhale")).expect("create codewhale home config dir"); + std::fs::create_dir_all(home.join(".deepseek")).expect("create deepseek home config dir"); + + run_with_timeout(command, Duration::from_secs(45)) +} + +fn run_with_timeout(mut command: Command, timeout: Duration) -> std::process::Output { + let mut child = command.spawn().expect("spawn codewhale-tui exec"); + let status = match child.wait_timeout(timeout).expect("wait for codewhale-tui") { + Some(status) => status, + None => { + let _ = child.kill(); + let _ = child.wait(); + panic!("codewhale-tui exec timed out after {timeout:?}"); + } + }; + + let mut stdout = Vec::new(); + let mut stderr = Vec::new(); + child + .stdout + .take() + .expect("stdout pipe") + .read_to_end(&mut stdout) + .expect("read stdout"); + child + .stderr + .take() + .expect("stderr pipe") + .read_to_end(&mut stderr) + .expect("read stderr"); + + std::process::Output { + status, + stdout, + stderr, + } +} + +fn preserve_host_env(command: &mut Command) { + command.env_clear(); + for key in [ + "PATH", + "PATHEXT", + "SystemRoot", + "SystemDrive", + "WINDIR", + "COMSPEC", + "TEMP", + "TMP", + ] { + if let Some(value) = std::env::var_os(key) { + command.env(key, value); + } + } +} + +fn tool_call_sse(tool_name: &str, tool_input: &Value) -> String { + let arguments = serde_json::to_string(tool_input).expect("tool input arguments"); + [ + sse_chunk(json!({ + "id": "chatcmpl-tool", + "object": "chat.completion.chunk", + "model": TEST_MODEL, + "choices": [{ + "index": 0, + "delta": { + "tool_calls": [{ + "index": 0, + "id": TOOL_CALL_ID, + "type": "function", + "function": { + "name": tool_name, + "arguments": arguments + } + }] + }, + "finish_reason": null + }] + })), + sse_chunk(json!({ + "id": "chatcmpl-tool", + "object": "chat.completion.chunk", + "model": TEST_MODEL, + "choices": [{ + "index": 0, + "delta": {}, + "finish_reason": "tool_calls" + }], + "usage": { + "prompt_tokens": 10, + "completion_tokens": 2, + "total_tokens": 12 + } + })), + "data: [DONE]\n\n".to_string(), + ] + .join("") +} + +fn final_answer_sse(answer: &str) -> String { + [ + sse_chunk(json!({ + "id": "chatcmpl-final", + "object": "chat.completion.chunk", + "model": TEST_MODEL, + "choices": [{ + "index": 0, + "delta": { "content": answer }, + "finish_reason": null + }] + })), + sse_chunk(json!({ + "id": "chatcmpl-final", + "object": "chat.completion.chunk", + "model": TEST_MODEL, + "choices": [{ + "index": 0, + "delta": {}, + "finish_reason": "stop" + }], + "usage": { + "prompt_tokens": 20, + "completion_tokens": 8, + "total_tokens": 28 + } + })), + "data: [DONE]\n\n".to_string(), + ] + .join("") +} + +fn sse_chunk(value: Value) -> String { + format!( + "data: {}\n\n", + serde_json::to_string(&value).expect("SSE JSON") + ) +} + +fn sse_response(body: &str) -> ResponseTemplate { + ResponseTemplate::new(200) + .insert_header("content-type", "text/event-stream") + .insert_header("cache-control", "no-cache") + .set_body_string(body.to_string()) +} + +fn json_response(value: Value) -> ResponseTemplate { + ResponseTemplate::new(200) + .insert_header("content-type", "application/json") + .set_body_json(value) +} + +fn request_has_tool_result(request: &Request) -> bool { + request + .body_json::() + .is_ok_and(|body| request_contains_tool_result(&body)) +} + +fn request_has_no_tool_result(request: &Request) -> bool { + !request_has_tool_result(request) +} + +fn request_contains_tool_result(request: &Value) -> bool { + tool_result_message(request).is_some() +} + +fn tool_result_message(request: &Value) -> Option<&Value> { + request + .get("messages") + .and_then(Value::as_array)? + .iter() + .find(|message| message.get("role").and_then(Value::as_str) == Some("tool")) +} + +fn request_contains_user_text(request: &Value, expected: &str) -> bool { + request + .get("messages") + .and_then(Value::as_array) + .into_iter() + .flatten() + .any(|message| { + message.get("role").and_then(Value::as_str) == Some("user") + && message + .get("content") + .is_some_and(|content| value_contains_text(content, expected)) + }) +} + +fn value_contains_text(value: &Value, expected: &str) -> bool { + match value { + Value::String(text) => text.contains(expected), + Value::Array(values) => values + .iter() + .any(|value| value_contains_text(value, expected)), + Value::Object(values) => values + .values() + .any(|value| value_contains_text(value, expected)), + _ => false, + } +} + +fn parse_stream_events(stdout: &str) -> Vec { + stdout + .lines() + .filter(|line| !line.trim().is_empty()) + .filter_map(|line| { + let json_start = line.find('{')?; + let json_line = &line[json_start..]; + Some(serde_json::from_str(json_line).unwrap_or_else(|err| { + panic!( + "stream-json line should parse: {err}\nline: {line}\njson: {json_line}\nstdout:\n{stdout}" + ) + })) + }) + .collect() +} + +fn tool_use_event<'a>(world: &'a ToolLifecycleWorld, expected_tool: &str) -> &'a Value { + world + .events + .iter() + .find(|event| { + event.get("type").and_then(Value::as_str) == Some("tool_use") + && event.get("name").and_then(Value::as_str) == Some(expected_tool) + }) + .unwrap_or_else(|| { + panic!( + "expected tool_use event for {expected_tool}\nstdout:\n{}\nstderr:\n{}", + world.stdout, world.stderr + ) + }) +} + +fn tool_result_event(world: &ToolLifecycleWorld) -> &Value { + world + .events + .iter() + .find(|event| event.get("type").and_then(Value::as_str) == Some("tool_result")) + .unwrap_or_else(|| { + panic!( + "expected tool_result event\nstdout:\n{}\nstderr:\n{}", + world.stdout, world.stderr + ) + }) +} + +fn tool_result_output(world: &ToolLifecycleWorld) -> &str { + tool_result_event(world) + .get("output") + .and_then(Value::as_str) + .expect("tool_result output") +} + +fn one_table_row(step: &Step) -> Vec<(String, String)> { + let rows = data_table_rows(step); + assert_eq!(rows.len(), 1, "expected exactly one data table row"); + rows.into_iter().next().expect("one row") +} + +fn data_table_rows(step: &Step) -> Vec> { + let table = step + .table + .as_ref() + .expect("step should include a data table"); + let mut rows = table.rows.iter(); + let headers = rows + .next() + .expect("data table should include a header") + .clone(); + + let values: Vec> = rows + .map(|row| { + headers + .iter() + .zip(row.iter()) + .map(|(header, value)| (header.clone(), value.clone())) + .collect() + }) + .collect(); + assert!( + !values.is_empty(), + "data table should include at least one row" + ); + values +} + +fn row_value(row: &[(String, String)], header: &str) -> String { + row.iter() + .find_map(|(key, value)| (key == header).then(|| value.clone())) + .unwrap_or_else(|| panic!("data table row missing {header} value")) +} + +fn codewhale_tui_binary() -> PathBuf { + if let Some(path) = option_env!("CARGO_BIN_EXE_codewhale-tui") { + return PathBuf::from(path); + } + if let Ok(path) = std::env::var("CARGO_BIN_EXE_codewhale-tui") { + return PathBuf::from(path); + } + + let mut path = std::env::current_exe().expect("current test executable path"); + path.pop(); + if path.ends_with("deps") { + path.pop(); + } + path.push(format!("codewhale-tui{}", std::env::consts::EXE_SUFFIX)); + path +}