feat(tui): FauxStep::Factory for live request-shape assertions

Closes #2074 Adds FauxStep::Factory(Box<dyn Fn(&MessageRequest) -> CannedTurn + Send + Sync>) to MockLlmClient. When a Factory step is dequeued, its closure runs against the real outgoing MessageRequest before the response stream is built, so any assert! panic surfaces directly from the client call instead of later in stream polling. Internal storage moves from VecDeque<CannedTurn> to VecDeque<FauxStep>, but every existing public method keeps working: - MockLlmClient::new(Vec<CannedTurn>) wraps each turn in FauxStep::Canned. - push_turn(CannedTurn) appends as FauxStep::Canned. Adds push_factory(closure) for tests that want the Factory branch. Doc comment on the Factory variant captures the DeepSeek V4 thinking-mode tool-call invariant (the v0.4.9-v0.5.1 reasoning_content drop that produced HTTP 400 on follow-up turns). Adds: - crates/tui/tests/reasoning_content_replayed_after_tool_call.rs — a regression test whose factory asserts the assistant tool-call turn carries a Thinking content block after a thinking + tool-call round. - An additional unit test in mock.rs covering create_message_synthesizes_from_factory_turn. All 20 tests in the new file pass, and the existing integration_mock_llm suite (27 tests) is unchanged.
2026-05-25 00:38:47 -07:00
parent a83fa59594
commit de3a1f7773
2 changed files with 189 additions and 6 deletions
@@ -63,6 +63,21 @@ use super::{LlmClient, StreamEventBox};
 /// the mock does not require `MessageStart` to be present.
 pub type CannedTurn = Vec<StreamEvent>;

+/// A queued mock response step.
+pub enum FauxStep {
+    Canned(CannedTurn),
+    /// Build a canned turn from the live outgoing request.
+    ///
+    /// Tests can assert DeepSeek V4's thinking-mode tool-call invariant here:
+    /// on the assistant turn that produced the previous tool call, the next
+    /// outgoing request must still carry `reasoning_content` (represented in
+    /// this model as a [`ContentBlock::Thinking`] block). If it is missing,
+    /// DeepSeek V4 returns HTTP 400 on the follow-up turn. This guards the
+    /// [v0.4.9-v0.5.1 regression range](https://github.com/Hmbown/CodeWhale/compare/v0.4.9...v0.5.1)
+    /// where that content was dropped.
+    Factory(Box<dyn Fn(&MessageRequest) -> CannedTurn + Send + Sync>),
+}
+
 /// A queue-driven mock LLM client.
 ///
 /// The mock holds a FIFO queue of canned response turns. Each call to
@@ -75,7 +90,7 @@ pub type CannedTurn = Vec<StreamEvent>;
 /// can assert on the outgoing payload (e.g. that prior `reasoning_content` is
 /// preserved across turns).
 pub struct MockLlmClient {
-    canned: Mutex<VecDeque<CannedTurn>>,
+    canned: Mutex<VecDeque<FauxStep>>,
    captured_requests: Mutex<Vec<MessageRequest>>,
    calls: AtomicUsize,
    provider_name: &'static str,
@@ -91,7 +106,7 @@ impl MockLlmClient {
    #[must_use]
    pub fn new(canned: Vec<CannedTurn>) -> Self {
        Self {
-            canned: Mutex::new(canned.into()),
+            canned: Mutex::new(canned.into_iter().map(FauxStep::Canned).collect()),
            captured_requests: Mutex::new(Vec::new()),
            calls: AtomicUsize::new(0),
            provider_name: "mock",
@@ -119,7 +134,22 @@ impl MockLlmClient {
        self.canned
            .lock()
            .expect("MockLlmClient.canned mutex poisoned")
-            .push_back(turn);
+            .push_back(FauxStep::Canned(turn));
+    }
+
+    /// Push a factory step onto the back of the queue.
+    ///
+    /// The closure receives the live outgoing [`MessageRequest`] before the
+    /// response stream is built, so assertions panic directly from the client
+    /// call rather than later while polling the returned stream.
+    pub fn push_factory<F>(&self, factory: F)
+    where
+        F: Fn(&MessageRequest) -> CannedTurn + Send + Sync + 'static,
+    {
+        self.canned
+            .lock()
+            .expect("MockLlmClient.canned mutex poisoned")
+            .push_back(FauxStep::Factory(Box::new(factory)));
    }

    /// Push a canned non-streaming `MessageResponse`. Consumed by
@@ -175,13 +205,20 @@ impl MockLlmClient {
        self.calls.fetch_add(1, Ordering::SeqCst);
    }

-    fn pop_turn(&self) -> Option<CannedTurn> {
+    fn pop_step(&self) -> Option<FauxStep> {
        self.canned
            .lock()
            .expect("MockLlmClient.canned mutex poisoned")
            .pop_front()
    }

+    fn turn_from_step(&self, step: FauxStep, request: &MessageRequest) -> CannedTurn {
+        match step {
+            FauxStep::Canned(turn) => turn,
+            FauxStep::Factory(factory) => factory(request),
+        }
+    }
+
    fn pop_message(&self) -> Option<MessageResponse> {
        self.canned_messages
            .lock()
@@ -207,26 +244,28 @@ impl LlmClient for MockLlmClient {
        }

        // Fallback: synthesize a MessageResponse from the next streaming turn.
-        let Some(turn) = self.pop_turn() else {
+        let Some(step) = self.pop_step() else {
            return Err(anyhow!(
                "MockLlmClient: create_message called but no canned response queued (request #{})",
                self.calls.load(Ordering::SeqCst)
            ));
        };

+        let turn = self.turn_from_step(step, &request);
        Ok(synthesize_message_response(turn, &self.model))
    }

    async fn create_message_stream(&self, request: MessageRequest) -> Result<StreamEventBox> {
        self.record_request(&request);

-        let Some(turn) = self.pop_turn() else {
+        let Some(step) = self.pop_step() else {
            return Err(anyhow!(
                "MockLlmClient: create_message_stream called but no canned turn queued (call #{})",
                self.calls.load(Ordering::SeqCst)
            ));
        };

+        let turn = self.turn_from_step(step, &request);
        Ok(stream_from_canned(turn))
    }

@@ -561,6 +600,22 @@ mod tests {
        assert_eq!(resp.stop_reason.as_deref(), Some("end_turn"));
    }

+    #[tokio::test]
+    async fn create_message_synthesizes_from_factory_turn() {
+        let mock = MockLlmClient::new(Vec::new());
+        mock.push_factory(|request| {
+            assert_eq!(request.model, "mock-model");
+            canned::simple_text_turn("from factory")
+        });
+
+        let resp = mock.create_message(empty_request()).await.unwrap();
+        let text = match &resp.content[0] {
+            ContentBlock::Text { text, .. } => text.clone(),
+            _ => panic!("expected text"),
+        };
+        assert_eq!(text, "from factory");
+    }
+
    #[tokio::test]
    async fn provider_and_model_are_overridable() {
        let mock = MockLlmClient::new(vec![canned::simple_text_turn("x")])
@@ -0,0 +1,128 @@
+use futures_util::StreamExt;
+
+#[path = "../src/models.rs"]
+#[allow(dead_code)]
+mod models;
+
+#[path = "support/llm_client.rs"]
+mod llm_client;
+
+use crate::llm_client::LlmClient;
+use crate::llm_client::mock::{MockLlmClient, canned};
+use crate::models::{ContentBlock, Message, MessageRequest};
+
+fn user_message(text: &str) -> Message {
+    Message {
+        role: "user".to_string(),
+        content: vec![ContentBlock::Text {
+            text: text.to_string(),
+            cache_control: None,
+        }],
+    }
+}
+
+fn assistant_thinking_tool_call(
+    thinking: &str,
+    id: &str,
+    name: &str,
+    input: serde_json::Value,
+) -> Message {
+    Message {
+        role: "assistant".to_string(),
+        content: vec![
+            ContentBlock::Thinking {
+                thinking: thinking.to_string(),
+            },
+            ContentBlock::ToolUse {
+                id: id.to_string(),
+                name: name.to_string(),
+                input,
+                caller: None,
+            },
+        ],
+    }
+}
+
+fn tool_result_message(tool_use_id: &str, content: &str) -> Message {
+    Message {
+        role: "user".to_string(),
+        content: vec![ContentBlock::ToolResult {
+            tool_use_id: tool_use_id.to_string(),
+            content: content.to_string(),
+            is_error: None,
+            content_blocks: None,
+        }],
+    }
+}
+
+fn make_request(messages: Vec<Message>) -> MessageRequest {
+    MessageRequest {
+        model: "deepseek-v4-pro".to_string(),
+        messages,
+        max_tokens: 4096,
+        system: None,
+        tools: None,
+        tool_choice: None,
+        metadata: None,
+        thinking: None,
+        reasoning_effort: Some("high".to_string()),
+        stream: Some(true),
+        temperature: None,
+        top_p: None,
+    }
+}
+
+#[tokio::test]
+async fn reasoning_content_is_replayed_after_thinking_tool_call() {
+    let mock = MockLlmClient::new(vec![]);
+
+    mock.push_turn(vec![
+        canned::message_start("r1"),
+        canned::thinking_delta(0, "I should inspect /tmp before answering."),
+        canned::tool_use_block_start(1, "call_a", "list_dir"),
+        canned::tool_input_delta(1, r#"{"path":"/tmp"}"#),
+        canned::block_stop(1),
+        canned::message_delta("tool_use", None),
+        canned::message_stop(),
+    ]);
+
+    mock.push_factory(|request| {
+        let assistant = request
+            .messages
+            .iter()
+            .rev()
+            .find(|message| message.role == "assistant")
+            .expect("follow-up request must include the prior assistant tool-call turn");
+
+        assert!(
+            assistant
+                .content
+                .iter()
+                .any(|block| matches!(block, ContentBlock::Thinking { .. })),
+            "DeepSeek V4 follow-up requests must replay reasoning_content on the assistant tool-call turn"
+        );
+
+        canned::simple_text_turn("I see the /tmp entries.")
+    });
+
+    let mut first = mock
+        .create_message_stream(make_request(vec![user_message("list /tmp")]))
+        .await
+        .expect("first stream opens");
+    while first.next().await.is_some() {}
+
+    let mut second = mock
+        .create_message_stream(make_request(vec![
+            user_message("list /tmp"),
+            assistant_thinking_tool_call(
+                "I should inspect /tmp before answering.",
+                "call_a",
+                "list_dir",
+                serde_json::json!({ "path": "/tmp" }),
+            ),
+            tool_result_message("call_a", "/tmp/file1\n/tmp/file2"),
+        ]))
+        .await
+        .expect("second stream opens");
+    while second.next().await.is_some() {}
+}