feat(#30): debug-log reasoning_content replay size per request
The thinking-mode sanitizer now sums the byte size of every replayed `reasoning_content` field in the outgoing chat-completions body and emits an `info`-level log line: Reasoning-content replay: 7 assistant message(s), ~3.2K input tokens (12,884 chars) being re-sent in this request This is visible under `RUST_LOG=deepseek_tui=info` (or higher). It's the first step toward the footer/status-line indicator described in #30 — the model's input-side reasoning replay is now observable per turn, even before it gets a dedicated UI surface. Tests cover both branches: bodies that already have reasoning_content (count is summed across all assistant turns) and bodies where the sanitizer had to inject the `(reasoning omitted)` placeholder (the placeholder bytes are included in the count since they ship over the wire). Footer integration deferred — that needs a new event from client → engine → TUI to surface the count alongside `cache N%` / `$X.XX`. Part of #30 remains open.
This commit is contained in:
@@ -1684,6 +1684,12 @@ fn map_tool_choice_for_chat(choice: &Value) -> Option<Value> {
|
||||
/// carries `tool_calls`, when the model + effort combination requires it.
|
||||
/// DeepSeek's thinking-mode API rejects such messages with a 400 error;
|
||||
/// substituting a placeholder keeps the conversation chain intact.
|
||||
///
|
||||
/// Also tallies the size of all replayed `reasoning_content` and logs it, so
|
||||
/// users on `RUST_LOG=deepseek_tui=debug` can see how much of their input
|
||||
/// budget is being spent re-sending prior thinking traces (V4 §5.1.1
|
||||
/// "Interleaved Thinking" requires the full trace to be replayed across user
|
||||
/// message boundaries in tool-calling sessions).
|
||||
fn sanitize_thinking_mode_messages(body: &mut Value, model: &str, effort: Option<&str>) {
|
||||
if !should_replay_reasoning_content(model, effort) {
|
||||
return;
|
||||
@@ -1692,6 +1698,8 @@ fn sanitize_thinking_mode_messages(body: &mut Value, model: &str, effort: Option
|
||||
return;
|
||||
};
|
||||
let mut substitutions: u32 = 0;
|
||||
let mut replay_chars: u64 = 0;
|
||||
let mut replay_messages: u32 = 0;
|
||||
for (idx, msg) in messages.iter_mut().enumerate() {
|
||||
if msg.get("role").and_then(Value::as_str) != Some("assistant") {
|
||||
continue;
|
||||
@@ -1707,12 +1715,43 @@ fn sanitize_thinking_mode_messages(body: &mut Value, model: &str, effort: Option
|
||||
"Final sanitizer: forced reasoning_content placeholder on assistant[{idx}]",
|
||||
));
|
||||
}
|
||||
if let Some(reasoning) = msg.get("reasoning_content").and_then(Value::as_str) {
|
||||
let len = reasoning.len() as u64;
|
||||
if len > 0 {
|
||||
replay_chars = replay_chars.saturating_add(len);
|
||||
replay_messages = replay_messages.saturating_add(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
if substitutions > 0 {
|
||||
logging::warn(format!(
|
||||
"Final sanitizer: {substitutions} assistant message(s) needed reasoning_content placeholder",
|
||||
));
|
||||
}
|
||||
if replay_messages > 0 {
|
||||
// ~4 chars/token is the standard rough estimate; DeepSeek tokens skew
|
||||
// a touch shorter on Chinese/code but this is order-of-magnitude info.
|
||||
let approx_tokens = replay_chars / 4;
|
||||
logging::info(format!(
|
||||
"Reasoning-content replay: {replay_messages} assistant message(s), ~{approx_tokens} input tokens ({replay_chars} chars) being re-sent in this request",
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
/// Sums the byte length of `reasoning_content` across all assistant messages in
|
||||
/// an outgoing chat-completions body. Used by tests; the production sanitizer
|
||||
/// computes the same number inline and logs it.
|
||||
#[cfg(test)]
|
||||
fn count_reasoning_replay_chars(body: &Value) -> u64 {
|
||||
let Some(messages) = body.get("messages").and_then(Value::as_array) else {
|
||||
return 0;
|
||||
};
|
||||
messages
|
||||
.iter()
|
||||
.filter(|m| m.get("role").and_then(Value::as_str) == Some("assistant"))
|
||||
.filter_map(|m| m.get("reasoning_content").and_then(Value::as_str))
|
||||
.map(|s| s.len() as u64)
|
||||
.sum()
|
||||
}
|
||||
|
||||
/// Diagnostic logger fired when DeepSeek rejects the request despite the
|
||||
@@ -3395,6 +3434,80 @@ mod tests {
|
||||
assert_eq!(usage.reasoning_tokens, Some(12));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sanitize_thinking_mode_counts_reasoning_replay_across_assistant_turns() {
|
||||
// Multi-turn body that mimics two prior tool-calling rounds: each
|
||||
// assistant message carries its `reasoning_content`. The sanitizer
|
||||
// should keep all of them and the count helper should tally bytes
|
||||
// across every assistant message.
|
||||
let mut body = json!({
|
||||
"model": "deepseek-v4-pro",
|
||||
"messages": [
|
||||
{ "role": "system", "content": "you are helpful" },
|
||||
{ "role": "user", "content": "step 1" },
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"reasoning_content": "I need to call tool A first.",
|
||||
"tool_calls": [{ "id": "1", "type": "function" }]
|
||||
},
|
||||
{ "role": "tool", "tool_call_id": "1", "content": "ok" },
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"reasoning_content": "Now I call tool B.",
|
||||
"tool_calls": [{ "id": "2", "type": "function" }]
|
||||
},
|
||||
{ "role": "tool", "tool_call_id": "2", "content": "ok" },
|
||||
{ "role": "user", "content": "step 2" }
|
||||
]
|
||||
});
|
||||
|
||||
sanitize_thinking_mode_messages(&mut body, "deepseek-v4-pro", Some("max"));
|
||||
|
||||
let chars = count_reasoning_replay_chars(&body);
|
||||
// "I need to call tool A first." (28) + "Now I call tool B." (18) = 46
|
||||
assert_eq!(chars, 46);
|
||||
|
||||
// No assistant messages should have lost or had their reasoning_content blanked.
|
||||
let messages = body["messages"].as_array().unwrap();
|
||||
let assistant_with_reasoning: usize = messages
|
||||
.iter()
|
||||
.filter(|m| m["role"] == "assistant")
|
||||
.filter(|m| {
|
||||
m["reasoning_content"]
|
||||
.as_str()
|
||||
.is_some_and(|s| !s.is_empty())
|
||||
})
|
||||
.count();
|
||||
assert_eq!(assistant_with_reasoning, 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sanitize_thinking_mode_counts_substituted_placeholder() {
|
||||
// An assistant tool-call message is missing reasoning_content; the
|
||||
// sanitizer must inject the placeholder, and the count helper must
|
||||
// include the placeholder in the total (since it's in the wire
|
||||
// payload that ships to DeepSeek).
|
||||
let mut body = json!({
|
||||
"model": "deepseek-v4-pro",
|
||||
"messages": [
|
||||
{ "role": "user", "content": "hi" },
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": [{ "id": "1", "type": "function" }]
|
||||
}
|
||||
]
|
||||
});
|
||||
|
||||
sanitize_thinking_mode_messages(&mut body, "deepseek-v4-pro", Some("max"));
|
||||
|
||||
let chars = count_reasoning_replay_chars(&body);
|
||||
// "(reasoning omitted)" is 19 bytes.
|
||||
assert_eq!(chars, 19);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn token_bucket_enforces_delay_when_empty() {
|
||||
let now = Instant::now();
|
||||
|
||||
Reference in New Issue
Block a user