feat(#30): debug-log reasoning_content replay size per request

The thinking-mode sanitizer now sums the byte size of every replayed
`reasoning_content` field in the outgoing chat-completions body and
emits an `info`-level log line:

  Reasoning-content replay: 7 assistant message(s), ~3.2K input tokens (12,884 chars) being re-sent in this request

This is visible under `RUST_LOG=deepseek_tui=info` (or higher). It's the
first step toward the footer/status-line indicator described in #30 —
the model's input-side reasoning replay is now observable per turn,
even before it gets a dedicated UI surface.

Tests cover both branches: bodies that already have reasoning_content
(count is summed across all assistant turns) and bodies where the
sanitizer had to inject the `(reasoning omitted)` placeholder (the
placeholder bytes are included in the count since they ship over the
wire).

Footer integration deferred — that needs a new event from client → engine
→ TUI to surface the count alongside `cache N%` / `$X.XX`. Part of #30
remains open.
This commit is contained in:
Hunter Bown
2026-04-25 13:28:44 -05:00
parent 0a394e1587
commit 017ac97d0d
+113
View File
@@ -1684,6 +1684,12 @@ fn map_tool_choice_for_chat(choice: &Value) -> Option<Value> {
/// carries `tool_calls`, when the model + effort combination requires it.
/// DeepSeek's thinking-mode API rejects such messages with a 400 error;
/// substituting a placeholder keeps the conversation chain intact.
///
/// Also tallies the size of all replayed `reasoning_content` and logs it, so
/// users on `RUST_LOG=deepseek_tui=debug` can see how much of their input
/// budget is being spent re-sending prior thinking traces (V4 §5.1.1
/// "Interleaved Thinking" requires the full trace to be replayed across user
/// message boundaries in tool-calling sessions).
fn sanitize_thinking_mode_messages(body: &mut Value, model: &str, effort: Option<&str>) {
if !should_replay_reasoning_content(model, effort) {
return;
@@ -1692,6 +1698,8 @@ fn sanitize_thinking_mode_messages(body: &mut Value, model: &str, effort: Option
return;
};
let mut substitutions: u32 = 0;
let mut replay_chars: u64 = 0;
let mut replay_messages: u32 = 0;
for (idx, msg) in messages.iter_mut().enumerate() {
if msg.get("role").and_then(Value::as_str) != Some("assistant") {
continue;
@@ -1707,12 +1715,43 @@ fn sanitize_thinking_mode_messages(body: &mut Value, model: &str, effort: Option
"Final sanitizer: forced reasoning_content placeholder on assistant[{idx}]",
));
}
if let Some(reasoning) = msg.get("reasoning_content").and_then(Value::as_str) {
let len = reasoning.len() as u64;
if len > 0 {
replay_chars = replay_chars.saturating_add(len);
replay_messages = replay_messages.saturating_add(1);
}
}
}
if substitutions > 0 {
logging::warn(format!(
"Final sanitizer: {substitutions} assistant message(s) needed reasoning_content placeholder",
));
}
if replay_messages > 0 {
// ~4 chars/token is the standard rough estimate; DeepSeek tokens skew
// a touch shorter on Chinese/code but this is order-of-magnitude info.
let approx_tokens = replay_chars / 4;
logging::info(format!(
"Reasoning-content replay: {replay_messages} assistant message(s), ~{approx_tokens} input tokens ({replay_chars} chars) being re-sent in this request",
));
}
}
/// Sums the byte length of `reasoning_content` across all assistant messages in
/// an outgoing chat-completions body. Used by tests; the production sanitizer
/// computes the same number inline and logs it.
#[cfg(test)]
fn count_reasoning_replay_chars(body: &Value) -> u64 {
let Some(messages) = body.get("messages").and_then(Value::as_array) else {
return 0;
};
messages
.iter()
.filter(|m| m.get("role").and_then(Value::as_str) == Some("assistant"))
.filter_map(|m| m.get("reasoning_content").and_then(Value::as_str))
.map(|s| s.len() as u64)
.sum()
}
/// Diagnostic logger fired when DeepSeek rejects the request despite the
@@ -3395,6 +3434,80 @@ mod tests {
assert_eq!(usage.reasoning_tokens, Some(12));
}
#[test]
fn sanitize_thinking_mode_counts_reasoning_replay_across_assistant_turns() {
// Multi-turn body that mimics two prior tool-calling rounds: each
// assistant message carries its `reasoning_content`. The sanitizer
// should keep all of them and the count helper should tally bytes
// across every assistant message.
let mut body = json!({
"model": "deepseek-v4-pro",
"messages": [
{ "role": "system", "content": "you are helpful" },
{ "role": "user", "content": "step 1" },
{
"role": "assistant",
"content": "",
"reasoning_content": "I need to call tool A first.",
"tool_calls": [{ "id": "1", "type": "function" }]
},
{ "role": "tool", "tool_call_id": "1", "content": "ok" },
{
"role": "assistant",
"content": "",
"reasoning_content": "Now I call tool B.",
"tool_calls": [{ "id": "2", "type": "function" }]
},
{ "role": "tool", "tool_call_id": "2", "content": "ok" },
{ "role": "user", "content": "step 2" }
]
});
sanitize_thinking_mode_messages(&mut body, "deepseek-v4-pro", Some("max"));
let chars = count_reasoning_replay_chars(&body);
// "I need to call tool A first." (28) + "Now I call tool B." (18) = 46
assert_eq!(chars, 46);
// No assistant messages should have lost or had their reasoning_content blanked.
let messages = body["messages"].as_array().unwrap();
let assistant_with_reasoning: usize = messages
.iter()
.filter(|m| m["role"] == "assistant")
.filter(|m| {
m["reasoning_content"]
.as_str()
.is_some_and(|s| !s.is_empty())
})
.count();
assert_eq!(assistant_with_reasoning, 2);
}
#[test]
fn sanitize_thinking_mode_counts_substituted_placeholder() {
// An assistant tool-call message is missing reasoning_content; the
// sanitizer must inject the placeholder, and the count helper must
// include the placeholder in the total (since it's in the wire
// payload that ships to DeepSeek).
let mut body = json!({
"model": "deepseek-v4-pro",
"messages": [
{ "role": "user", "content": "hi" },
{
"role": "assistant",
"content": "",
"tool_calls": [{ "id": "1", "type": "function" }]
}
]
});
sanitize_thinking_mode_messages(&mut body, "deepseek-v4-pro", Some("max"));
let chars = count_reasoning_replay_chars(&body);
// "(reasoning omitted)" is 19 bytes.
assert_eq!(chars, 19);
}
#[test]
fn token_bucket_enforces_delay_when_empty() {
let now = Instant::now();