refactor(engine): extract context helpers
This commit is contained in:
+11
-268
@@ -22,7 +22,7 @@ use tokio_util::sync::CancellationToken;
|
||||
|
||||
use crate::client::DeepSeekClient;
|
||||
use crate::compaction::{
|
||||
CompactionConfig, compact_messages_safe, estimate_tokens, merge_system_prompts, should_compact,
|
||||
CompactionConfig, compact_messages_safe, merge_system_prompts, should_compact,
|
||||
};
|
||||
use crate::config::{Config, DEFAULT_MAX_SUBAGENTS, DEFAULT_TEXT_MODEL};
|
||||
use crate::cycle_manager::{
|
||||
@@ -35,7 +35,7 @@ use crate::llm_client::LlmClient;
|
||||
use crate::mcp::McpPool;
|
||||
use crate::models::{
|
||||
ContentBlock, ContentBlockStart, DEFAULT_CONTEXT_WINDOW_TOKENS, Delta, Message, MessageRequest,
|
||||
StreamEvent, SystemBlock, SystemPrompt, Tool, ToolCaller, Usage, context_window_for_model,
|
||||
StreamEvent, SystemPrompt, Tool, ToolCaller, Usage,
|
||||
};
|
||||
use crate::prompts;
|
||||
use crate::seam_manager::{SeamConfig, SeamManager};
|
||||
@@ -358,38 +358,6 @@ fn should_transparently_retry_stream(
|
||||
) -> bool {
|
||||
!any_content_received && transparent_attempts < MAX_TRANSPARENT_STREAM_RETRIES && !cancelled
|
||||
}
|
||||
/// Max output tokens requested for normal agent turns. Generous on purpose:
|
||||
/// V4 thinking models can produce tens of thousands of reasoning tokens on
|
||||
/// hard prompts before the visible reply, and DeepSeek V4 ships with a 1M
|
||||
/// context window. v0.7.5 keeps this cap fixed instead of silently lowering
|
||||
/// `max_tokens` near pressure; hard-cycle/preflight checks reserve this budget
|
||||
/// plus safety headroom before sending the next request.
|
||||
const TURN_MAX_OUTPUT_TOKENS: u32 = 262_144;
|
||||
/// Keep this many most recent messages when emergency trimming is required.
|
||||
const MIN_RECENT_MESSAGES_TO_KEEP: usize = 4;
|
||||
/// Allow a few emergency recovery attempts before failing the turn.
|
||||
const MAX_CONTEXT_RECOVERY_ATTEMPTS: u8 = 2;
|
||||
/// Reserve additional headroom to avoid hitting provider hard limits.
|
||||
const CONTEXT_HEADROOM_TOKENS: usize = 1024;
|
||||
/// Hard cap for any tool output inserted into model context.
|
||||
const TOOL_RESULT_CONTEXT_HARD_LIMIT_CHARS: usize = 12_000;
|
||||
/// Soft cap for known noisy tools inserted into model context.
|
||||
const TOOL_RESULT_CONTEXT_SOFT_LIMIT_CHARS: usize = 2_000;
|
||||
/// Snippet length kept when compacting tool output for model context.
|
||||
const TOOL_RESULT_CONTEXT_SNIPPET_CHARS: usize = 900;
|
||||
/// Hard cap for tool output inserted into a large-context model.
|
||||
const LARGE_CONTEXT_TOOL_RESULT_HARD_LIMIT_CHARS: usize = 180_000;
|
||||
/// Soft cap for known noisy tools inserted into a large-context model.
|
||||
const LARGE_CONTEXT_TOOL_RESULT_SOFT_LIMIT_CHARS: usize = 60_000;
|
||||
/// Snippet length kept when compacting large-context tool output.
|
||||
const LARGE_CONTEXT_TOOL_RESULT_SNIPPET_CHARS: usize = 40_000;
|
||||
/// Context window size at which tool output limits can be relaxed.
|
||||
const LARGE_CONTEXT_WINDOW_TOKENS: u32 = 500_000;
|
||||
/// Max chars to keep from metadata-provided output summaries.
|
||||
const TOOL_RESULT_METADATA_SUMMARY_CHARS: usize = 320;
|
||||
const COMPACTION_SUMMARY_MARKER: &str = "Conversation Summary (Auto-Generated)";
|
||||
const WORKING_SET_SUMMARY_MARKER: &str = "## Repo Working Set";
|
||||
|
||||
pub(crate) const TOOL_CALL_START_MARKERS: [&str; 5] = [
|
||||
"[TOOL_CALL]",
|
||||
"<deepseek:tool_call",
|
||||
@@ -565,240 +533,6 @@ fn format_tool_error(err: &ToolError, tool_name: &str) -> String {
|
||||
}
|
||||
}
|
||||
|
||||
fn summarize_text(text: &str, limit: usize) -> String {
|
||||
if text.chars().count() <= limit {
|
||||
return text.to_string();
|
||||
}
|
||||
let take = limit.saturating_sub(3);
|
||||
let mut out: String = text.chars().take(take).collect();
|
||||
out.push_str("...");
|
||||
out
|
||||
}
|
||||
|
||||
fn summarize_text_head_tail(text: &str, limit: usize) -> String {
|
||||
let total = text.chars().count();
|
||||
if total <= limit {
|
||||
return text.to_string();
|
||||
}
|
||||
if limit <= 20 {
|
||||
return summarize_text(text, limit);
|
||||
}
|
||||
|
||||
let marker = "\n\n[... output truncated for context ...]\n\n";
|
||||
let marker_len = marker.chars().count();
|
||||
if limit <= marker_len + 20 {
|
||||
return summarize_text(text, limit);
|
||||
}
|
||||
|
||||
let remaining = limit - marker_len;
|
||||
let head_len = remaining.saturating_mul(2) / 3;
|
||||
let tail_len = remaining.saturating_sub(head_len);
|
||||
let head: String = text.chars().take(head_len).collect();
|
||||
let tail_vec: Vec<char> = text.chars().rev().take(tail_len).collect();
|
||||
let tail: String = tail_vec.into_iter().rev().collect();
|
||||
format!("{head}{marker}{tail}")
|
||||
}
|
||||
|
||||
fn tool_result_is_noisy(tool_name: &str) -> bool {
|
||||
matches!(
|
||||
tool_name,
|
||||
"exec_shell"
|
||||
| "exec_shell_wait"
|
||||
| "exec_shell_interact"
|
||||
| "multi_tool_use.parallel"
|
||||
| "web_search"
|
||||
)
|
||||
}
|
||||
|
||||
fn tool_result_metadata_summary(metadata: Option<&serde_json::Value>) -> Option<String> {
|
||||
let obj = metadata?.as_object()?;
|
||||
for key in ["summary", "stdout_summary", "stderr_summary", "message"] {
|
||||
if let Some(text) = obj.get(key).and_then(serde_json::Value::as_str) {
|
||||
let trimmed = text.trim();
|
||||
if !trimmed.is_empty() {
|
||||
return Some(summarize_text(trimmed, TOOL_RESULT_METADATA_SUMMARY_CHARS));
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
struct ToolResultContextLimits {
|
||||
hard_limit_chars: usize,
|
||||
noisy_soft_limit_chars: usize,
|
||||
snippet_chars: usize,
|
||||
}
|
||||
|
||||
fn tool_result_context_limits_for_model(model: &str) -> ToolResultContextLimits {
|
||||
let is_large_context =
|
||||
context_window_for_model(model).is_some_and(|window| window >= LARGE_CONTEXT_WINDOW_TOKENS);
|
||||
|
||||
if is_large_context {
|
||||
ToolResultContextLimits {
|
||||
hard_limit_chars: LARGE_CONTEXT_TOOL_RESULT_HARD_LIMIT_CHARS,
|
||||
noisy_soft_limit_chars: LARGE_CONTEXT_TOOL_RESULT_SOFT_LIMIT_CHARS,
|
||||
snippet_chars: LARGE_CONTEXT_TOOL_RESULT_SNIPPET_CHARS,
|
||||
}
|
||||
} else {
|
||||
ToolResultContextLimits {
|
||||
hard_limit_chars: TOOL_RESULT_CONTEXT_HARD_LIMIT_CHARS,
|
||||
noisy_soft_limit_chars: TOOL_RESULT_CONTEXT_SOFT_LIMIT_CHARS,
|
||||
snippet_chars: TOOL_RESULT_CONTEXT_SNIPPET_CHARS,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn compact_tool_result_for_context(
|
||||
model: &str,
|
||||
tool_name: &str,
|
||||
output: &ToolResult,
|
||||
) -> String {
|
||||
let raw = output.content.trim();
|
||||
if raw.is_empty() {
|
||||
return String::new();
|
||||
}
|
||||
|
||||
let limits = tool_result_context_limits_for_model(model);
|
||||
let raw_chars = raw.chars().count();
|
||||
let should_compact = raw_chars > limits.hard_limit_chars
|
||||
|| (tool_result_is_noisy(tool_name) && raw_chars > limits.noisy_soft_limit_chars);
|
||||
if !should_compact {
|
||||
return raw.to_string();
|
||||
}
|
||||
|
||||
let snippet = summarize_text_head_tail(raw, limits.snippet_chars);
|
||||
let omitted = raw_chars.saturating_sub(snippet.chars().count());
|
||||
let summary = tool_result_metadata_summary(output.metadata.as_ref());
|
||||
|
||||
if let Some(summary) = summary {
|
||||
format!(
|
||||
"[{tool_name} output compacted to protect context]\nSummary: {summary}\nSnippet: {snippet}\n(Original: {raw_chars} chars, omitted: {omitted} chars.)"
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"[{tool_name} output compacted to protect context]\nSnippet: {snippet}\n(Original: {raw_chars} chars, omitted: {omitted} chars.)"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_compaction_summary_prompt(prompt: Option<SystemPrompt>) -> Option<SystemPrompt> {
|
||||
match prompt {
|
||||
Some(SystemPrompt::Blocks(blocks)) => {
|
||||
let summary_blocks: Vec<_> = blocks
|
||||
.into_iter()
|
||||
.filter(|block| block.text.contains(COMPACTION_SUMMARY_MARKER))
|
||||
.collect();
|
||||
if summary_blocks.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(SystemPrompt::Blocks(summary_blocks))
|
||||
}
|
||||
}
|
||||
Some(SystemPrompt::Text(text)) => {
|
||||
if text.contains(COMPACTION_SUMMARY_MARKER) {
|
||||
Some(SystemPrompt::Text(text))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn remove_working_set_summary(prompt: Option<&SystemPrompt>) -> Option<SystemPrompt> {
|
||||
match prompt {
|
||||
Some(SystemPrompt::Blocks(blocks)) => {
|
||||
let filtered: Vec<SystemBlock> = blocks
|
||||
.iter()
|
||||
.filter(|block| !block.text.contains(WORKING_SET_SUMMARY_MARKER))
|
||||
.cloned()
|
||||
.collect();
|
||||
if filtered.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(SystemPrompt::Blocks(filtered))
|
||||
}
|
||||
}
|
||||
Some(SystemPrompt::Text(text)) => Some(SystemPrompt::Text(text.clone())),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn append_working_set_summary(
|
||||
prompt: Option<SystemPrompt>,
|
||||
working_set_summary: Option<&str>,
|
||||
) -> Option<SystemPrompt> {
|
||||
let Some(summary) = working_set_summary.map(str::trim).filter(|s| !s.is_empty()) else {
|
||||
return prompt;
|
||||
};
|
||||
let working_set_block = SystemBlock {
|
||||
block_type: "text".to_string(),
|
||||
text: summary.to_string(),
|
||||
cache_control: None,
|
||||
};
|
||||
|
||||
match prompt {
|
||||
Some(SystemPrompt::Text(text)) => Some(SystemPrompt::Blocks(vec![
|
||||
SystemBlock {
|
||||
block_type: "text".to_string(),
|
||||
text,
|
||||
cache_control: None,
|
||||
},
|
||||
working_set_block,
|
||||
])),
|
||||
Some(SystemPrompt::Blocks(mut blocks)) => {
|
||||
blocks.retain(|block| !block.text.contains(WORKING_SET_SUMMARY_MARKER));
|
||||
blocks.push(working_set_block);
|
||||
Some(SystemPrompt::Blocks(blocks))
|
||||
}
|
||||
None => Some(SystemPrompt::Blocks(vec![working_set_block])),
|
||||
}
|
||||
}
|
||||
|
||||
fn estimate_text_tokens_conservative(text: &str) -> usize {
|
||||
text.chars().count().div_ceil(3)
|
||||
}
|
||||
|
||||
fn estimate_system_tokens_conservative(system: Option<&SystemPrompt>) -> usize {
|
||||
match system {
|
||||
Some(SystemPrompt::Text(text)) => estimate_text_tokens_conservative(text),
|
||||
Some(SystemPrompt::Blocks(blocks)) => blocks
|
||||
.iter()
|
||||
.map(|block| estimate_text_tokens_conservative(&block.text))
|
||||
.sum(),
|
||||
None => 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn estimate_input_tokens_conservative(
|
||||
messages: &[Message],
|
||||
system: Option<&SystemPrompt>,
|
||||
) -> usize {
|
||||
let message_tokens = estimate_tokens(messages).saturating_mul(3).div_ceil(2);
|
||||
let system_tokens = estimate_system_tokens_conservative(system);
|
||||
let framing_overhead = messages.len().saturating_mul(12).saturating_add(48);
|
||||
message_tokens
|
||||
.saturating_add(system_tokens)
|
||||
.saturating_add(framing_overhead)
|
||||
}
|
||||
|
||||
fn context_input_budget(model: &str, requested_output_tokens: u32) -> Option<usize> {
|
||||
let window = usize::try_from(context_window_for_model(model)?).ok()?;
|
||||
let output = usize::try_from(requested_output_tokens).ok()?;
|
||||
window
|
||||
.checked_sub(output)
|
||||
.and_then(|v| v.checked_sub(CONTEXT_HEADROOM_TOKENS))
|
||||
}
|
||||
|
||||
fn turn_response_headroom_tokens() -> u64 {
|
||||
u64::from(TURN_MAX_OUTPUT_TOKENS).saturating_add(CONTEXT_HEADROOM_TOKENS as u64)
|
||||
}
|
||||
|
||||
fn is_context_length_error_message(message: &str) -> bool {
|
||||
crate::error_taxonomy::classify_error_message(message) == ErrorCategory::InvalidInput
|
||||
}
|
||||
|
||||
fn emit_tool_audit(event: serde_json::Value) {
|
||||
let Some(path) = std::env::var_os("DEEPSEEK_TOOL_AUDIT_LOG") else {
|
||||
return;
|
||||
@@ -2404,6 +2138,15 @@ pub(crate) fn mock_engine_handle() -> MockEngineHandle {
|
||||
|
||||
mod approval;
|
||||
mod capacity_flow;
|
||||
mod context;
|
||||
pub(crate) use context::compact_tool_result_for_context;
|
||||
use context::{
|
||||
COMPACTION_SUMMARY_MARKER, MAX_CONTEXT_RECOVERY_ATTEMPTS, MIN_RECENT_MESSAGES_TO_KEEP,
|
||||
TURN_MAX_OUTPUT_TOKENS, append_working_set_summary, context_input_budget,
|
||||
estimate_input_tokens_conservative, extract_compaction_summary_prompt,
|
||||
is_context_length_error_message, remove_working_set_summary, summarize_text,
|
||||
turn_response_headroom_tokens,
|
||||
};
|
||||
mod dispatch;
|
||||
mod tool_catalog;
|
||||
mod tool_setup;
|
||||
|
||||
@@ -7,6 +7,8 @@
|
||||
|
||||
use super::*;
|
||||
|
||||
use crate::models::context_window_for_model;
|
||||
|
||||
impl Engine {
|
||||
pub(super) async fn run_capacity_pre_request_checkpoint(
|
||||
&mut self,
|
||||
|
||||
@@ -0,0 +1,279 @@
|
||||
//! Context budgeting and prompt-shaping helpers for the engine.
|
||||
//!
|
||||
//! These functions are shared by the streaming turn loop, capacity flow, and
|
||||
//! engine session maintenance code. Keeping them here prevents the top-level
|
||||
//! engine module from accumulating unrelated context-policy details.
|
||||
|
||||
use crate::compaction::estimate_tokens;
|
||||
use crate::error_taxonomy::ErrorCategory;
|
||||
use crate::models::{Message, SystemBlock, SystemPrompt, context_window_for_model};
|
||||
use crate::tools::spec::ToolResult;
|
||||
|
||||
/// Max output tokens requested for normal agent turns. Generous on purpose:
|
||||
/// V4 thinking models can produce tens of thousands of reasoning tokens on
|
||||
/// hard prompts before the visible reply, and DeepSeek V4 ships with a 1M
|
||||
/// context window. v0.7.5 keeps this cap fixed instead of silently lowering
|
||||
/// `max_tokens` near pressure; hard-cycle/preflight checks reserve this budget
|
||||
/// plus safety headroom before sending the next request.
|
||||
pub(super) const TURN_MAX_OUTPUT_TOKENS: u32 = 262_144;
|
||||
/// Keep this many most recent messages when emergency trimming is required.
|
||||
pub(super) const MIN_RECENT_MESSAGES_TO_KEEP: usize = 4;
|
||||
/// Allow a few emergency recovery attempts before failing the turn.
|
||||
pub(super) const MAX_CONTEXT_RECOVERY_ATTEMPTS: u8 = 2;
|
||||
/// Reserve additional headroom to avoid hitting provider hard limits.
|
||||
const CONTEXT_HEADROOM_TOKENS: usize = 1024;
|
||||
/// Hard cap for any tool output inserted into model context.
|
||||
const TOOL_RESULT_CONTEXT_HARD_LIMIT_CHARS: usize = 12_000;
|
||||
/// Soft cap for known noisy tools inserted into model context.
|
||||
const TOOL_RESULT_CONTEXT_SOFT_LIMIT_CHARS: usize = 2_000;
|
||||
/// Snippet length kept when compacting tool output for model context.
|
||||
const TOOL_RESULT_CONTEXT_SNIPPET_CHARS: usize = 900;
|
||||
/// Hard cap for tool output inserted into a large-context model.
|
||||
const LARGE_CONTEXT_TOOL_RESULT_HARD_LIMIT_CHARS: usize = 180_000;
|
||||
/// Soft cap for known noisy tools inserted into a large-context model.
|
||||
const LARGE_CONTEXT_TOOL_RESULT_SOFT_LIMIT_CHARS: usize = 60_000;
|
||||
/// Snippet length kept when compacting large-context tool output.
|
||||
const LARGE_CONTEXT_TOOL_RESULT_SNIPPET_CHARS: usize = 40_000;
|
||||
/// Context window size at which tool output limits can be relaxed.
|
||||
const LARGE_CONTEXT_WINDOW_TOKENS: u32 = 500_000;
|
||||
/// Max chars to keep from metadata-provided output summaries.
|
||||
const TOOL_RESULT_METADATA_SUMMARY_CHARS: usize = 320;
|
||||
|
||||
pub(super) const COMPACTION_SUMMARY_MARKER: &str = "Conversation Summary (Auto-Generated)";
|
||||
pub(super) const WORKING_SET_SUMMARY_MARKER: &str = "## Repo Working Set";
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
struct ToolResultContextLimits {
|
||||
hard_limit_chars: usize,
|
||||
noisy_soft_limit_chars: usize,
|
||||
snippet_chars: usize,
|
||||
}
|
||||
|
||||
pub(super) fn summarize_text(text: &str, limit: usize) -> String {
|
||||
if text.chars().count() <= limit {
|
||||
return text.to_string();
|
||||
}
|
||||
let take = limit.saturating_sub(3);
|
||||
let mut out: String = text.chars().take(take).collect();
|
||||
out.push_str("...");
|
||||
out
|
||||
}
|
||||
|
||||
fn summarize_text_head_tail(text: &str, limit: usize) -> String {
|
||||
let total = text.chars().count();
|
||||
if total <= limit {
|
||||
return text.to_string();
|
||||
}
|
||||
if limit <= 20 {
|
||||
return summarize_text(text, limit);
|
||||
}
|
||||
|
||||
let marker = "\n\n[... output truncated for context ...]\n\n";
|
||||
let marker_len = marker.chars().count();
|
||||
if limit <= marker_len + 20 {
|
||||
return summarize_text(text, limit);
|
||||
}
|
||||
|
||||
let remaining = limit - marker_len;
|
||||
let head_len = remaining.saturating_mul(2) / 3;
|
||||
let tail_len = remaining.saturating_sub(head_len);
|
||||
let head: String = text.chars().take(head_len).collect();
|
||||
let tail_vec: Vec<char> = text.chars().rev().take(tail_len).collect();
|
||||
let tail: String = tail_vec.into_iter().rev().collect();
|
||||
format!("{head}{marker}{tail}")
|
||||
}
|
||||
|
||||
fn tool_result_is_noisy(tool_name: &str) -> bool {
|
||||
matches!(
|
||||
tool_name,
|
||||
"exec_shell"
|
||||
| "exec_shell_wait"
|
||||
| "exec_shell_interact"
|
||||
| "multi_tool_use.parallel"
|
||||
| "web_search"
|
||||
)
|
||||
}
|
||||
|
||||
fn tool_result_metadata_summary(metadata: Option<&serde_json::Value>) -> Option<String> {
|
||||
let obj = metadata?.as_object()?;
|
||||
for key in ["summary", "stdout_summary", "stderr_summary", "message"] {
|
||||
if let Some(text) = obj.get(key).and_then(serde_json::Value::as_str) {
|
||||
let trimmed = text.trim();
|
||||
if !trimmed.is_empty() {
|
||||
return Some(summarize_text(trimmed, TOOL_RESULT_METADATA_SUMMARY_CHARS));
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn tool_result_context_limits_for_model(model: &str) -> ToolResultContextLimits {
|
||||
let is_large_context =
|
||||
context_window_for_model(model).is_some_and(|window| window >= LARGE_CONTEXT_WINDOW_TOKENS);
|
||||
|
||||
if is_large_context {
|
||||
ToolResultContextLimits {
|
||||
hard_limit_chars: LARGE_CONTEXT_TOOL_RESULT_HARD_LIMIT_CHARS,
|
||||
noisy_soft_limit_chars: LARGE_CONTEXT_TOOL_RESULT_SOFT_LIMIT_CHARS,
|
||||
snippet_chars: LARGE_CONTEXT_TOOL_RESULT_SNIPPET_CHARS,
|
||||
}
|
||||
} else {
|
||||
ToolResultContextLimits {
|
||||
hard_limit_chars: TOOL_RESULT_CONTEXT_HARD_LIMIT_CHARS,
|
||||
noisy_soft_limit_chars: TOOL_RESULT_CONTEXT_SOFT_LIMIT_CHARS,
|
||||
snippet_chars: TOOL_RESULT_CONTEXT_SNIPPET_CHARS,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn compact_tool_result_for_context(
|
||||
model: &str,
|
||||
tool_name: &str,
|
||||
output: &ToolResult,
|
||||
) -> String {
|
||||
let raw = output.content.trim();
|
||||
if raw.is_empty() {
|
||||
return String::new();
|
||||
}
|
||||
|
||||
let limits = tool_result_context_limits_for_model(model);
|
||||
let raw_chars = raw.chars().count();
|
||||
let should_compact = raw_chars > limits.hard_limit_chars
|
||||
|| (tool_result_is_noisy(tool_name) && raw_chars > limits.noisy_soft_limit_chars);
|
||||
if !should_compact {
|
||||
return raw.to_string();
|
||||
}
|
||||
|
||||
let snippet = summarize_text_head_tail(raw, limits.snippet_chars);
|
||||
let omitted = raw_chars.saturating_sub(snippet.chars().count());
|
||||
let summary = tool_result_metadata_summary(output.metadata.as_ref());
|
||||
|
||||
if let Some(summary) = summary {
|
||||
format!(
|
||||
"[{tool_name} output compacted to protect context]\nSummary: {summary}\nSnippet: {snippet}\n(Original: {raw_chars} chars, omitted: {omitted} chars.)"
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"[{tool_name} output compacted to protect context]\nSnippet: {snippet}\n(Original: {raw_chars} chars, omitted: {omitted} chars.)"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn extract_compaction_summary_prompt(
|
||||
prompt: Option<SystemPrompt>,
|
||||
) -> Option<SystemPrompt> {
|
||||
match prompt {
|
||||
Some(SystemPrompt::Blocks(blocks)) => {
|
||||
let summary_blocks: Vec<_> = blocks
|
||||
.into_iter()
|
||||
.filter(|block| block.text.contains(COMPACTION_SUMMARY_MARKER))
|
||||
.collect();
|
||||
if summary_blocks.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(SystemPrompt::Blocks(summary_blocks))
|
||||
}
|
||||
}
|
||||
Some(SystemPrompt::Text(text)) => {
|
||||
if text.contains(COMPACTION_SUMMARY_MARKER) {
|
||||
Some(SystemPrompt::Text(text))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn remove_working_set_summary(prompt: Option<&SystemPrompt>) -> Option<SystemPrompt> {
|
||||
match prompt {
|
||||
Some(SystemPrompt::Blocks(blocks)) => {
|
||||
let filtered: Vec<SystemBlock> = blocks
|
||||
.iter()
|
||||
.filter(|block| !block.text.contains(WORKING_SET_SUMMARY_MARKER))
|
||||
.cloned()
|
||||
.collect();
|
||||
if filtered.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(SystemPrompt::Blocks(filtered))
|
||||
}
|
||||
}
|
||||
Some(SystemPrompt::Text(text)) => Some(SystemPrompt::Text(text.clone())),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn append_working_set_summary(
|
||||
prompt: Option<SystemPrompt>,
|
||||
working_set_summary: Option<&str>,
|
||||
) -> Option<SystemPrompt> {
|
||||
let Some(summary) = working_set_summary.map(str::trim).filter(|s| !s.is_empty()) else {
|
||||
return prompt;
|
||||
};
|
||||
let working_set_block = SystemBlock {
|
||||
block_type: "text".to_string(),
|
||||
text: summary.to_string(),
|
||||
cache_control: None,
|
||||
};
|
||||
|
||||
match prompt {
|
||||
Some(SystemPrompt::Text(text)) => Some(SystemPrompt::Blocks(vec![
|
||||
SystemBlock {
|
||||
block_type: "text".to_string(),
|
||||
text,
|
||||
cache_control: None,
|
||||
},
|
||||
working_set_block,
|
||||
])),
|
||||
Some(SystemPrompt::Blocks(mut blocks)) => {
|
||||
blocks.retain(|block| !block.text.contains(WORKING_SET_SUMMARY_MARKER));
|
||||
blocks.push(working_set_block);
|
||||
Some(SystemPrompt::Blocks(blocks))
|
||||
}
|
||||
None => Some(SystemPrompt::Blocks(vec![working_set_block])),
|
||||
}
|
||||
}
|
||||
|
||||
fn estimate_text_tokens_conservative(text: &str) -> usize {
|
||||
text.chars().count().div_ceil(3)
|
||||
}
|
||||
|
||||
fn estimate_system_tokens_conservative(system: Option<&SystemPrompt>) -> usize {
|
||||
match system {
|
||||
Some(SystemPrompt::Text(text)) => estimate_text_tokens_conservative(text),
|
||||
Some(SystemPrompt::Blocks(blocks)) => blocks
|
||||
.iter()
|
||||
.map(|block| estimate_text_tokens_conservative(&block.text))
|
||||
.sum(),
|
||||
None => 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn estimate_input_tokens_conservative(
|
||||
messages: &[Message],
|
||||
system: Option<&SystemPrompt>,
|
||||
) -> usize {
|
||||
let message_tokens = estimate_tokens(messages).saturating_mul(3).div_ceil(2);
|
||||
let system_tokens = estimate_system_tokens_conservative(system);
|
||||
let framing_overhead = messages.len().saturating_mul(12).saturating_add(48);
|
||||
message_tokens
|
||||
.saturating_add(system_tokens)
|
||||
.saturating_add(framing_overhead)
|
||||
}
|
||||
|
||||
pub(super) fn context_input_budget(model: &str, requested_output_tokens: u32) -> Option<usize> {
|
||||
let window = usize::try_from(context_window_for_model(model)?).ok()?;
|
||||
let output = usize::try_from(requested_output_tokens).ok()?;
|
||||
window
|
||||
.checked_sub(output)
|
||||
.and_then(|v| v.checked_sub(CONTEXT_HEADROOM_TOKENS))
|
||||
}
|
||||
|
||||
pub(super) fn turn_response_headroom_tokens() -> u64 {
|
||||
u64::from(TURN_MAX_OUTPUT_TOKENS).saturating_add(CONTEXT_HEADROOM_TOKENS as u64)
|
||||
}
|
||||
|
||||
pub(super) fn is_context_length_error_message(message: &str) -> bool {
|
||||
crate::error_taxonomy::classify_error_message(message) == ErrorCategory::InvalidInput
|
||||
}
|
||||
@@ -1,5 +1,7 @@
|
||||
use super::*;
|
||||
|
||||
use super::context::WORKING_SET_SUMMARY_MARKER;
|
||||
use crate::models::SystemBlock;
|
||||
use serde_json::json;
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
|
||||
Reference in New Issue
Block a user