diff --git a/crates/tui/src/config.rs b/crates/tui/src/config.rs index 149fb35f..49fd56f3 100644 --- a/crates/tui/src/config.rs +++ b/crates/tui/src/config.rs @@ -225,7 +225,7 @@ pub fn provider_capability(provider: ApiProvider, resolved_model: &str) -> Provi crate::models::DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS } else { crate::models::context_window_for_model(resolved_model) - .unwrap_or(crate::models::DEFAULT_CONTEXT_WINDOW_TOKENS) + .unwrap_or(crate::models::LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS) }; // Max output tokens: DeepSeek V4 models allow 262K; others get 4096. @@ -4070,7 +4070,7 @@ model = "deepseek-v4-pro" let cap = provider_capability(ApiProvider::Deepseek, "deepseek-coder"); assert_eq!( cap.context_window, - crate::models::DEFAULT_CONTEXT_WINDOW_TOKENS + crate::models::LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS ); assert_eq!(cap.max_output, 4096); assert!(!cap.thinking_supported); diff --git a/crates/tui/src/core/engine/capacity_flow.rs b/crates/tui/src/core/engine/capacity_flow.rs index f280e644..cee5fb76 100644 --- a/crates/tui/src/core/engine/capacity_flow.rs +++ b/crates/tui/src/core/engine/capacity_flow.rs @@ -160,9 +160,10 @@ impl Engine { let unique_reference_ids_recent_window = self.recent_unique_reference_count(message_window, turn); let context_window = usize::try_from( - context_window_for_model(&self.session.model).unwrap_or(DEFAULT_CONTEXT_WINDOW_TOKENS), + context_window_for_model(&self.session.model) + .unwrap_or(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS), ) - .unwrap_or(usize::try_from(DEFAULT_CONTEXT_WINDOW_TOKENS).unwrap_or(128_000)) + .unwrap_or(usize::try_from(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS).unwrap_or(128_000)) .max(1); let context_used_ratio = (self.estimated_input_tokens() as f64) / (context_window as f64); diff --git a/crates/tui/src/models.rs b/crates/tui/src/models.rs index 8b1413ee..320d8305 100644 --- a/crates/tui/src/models.rs +++ b/crates/tui/src/models.rs @@ -2,15 +2,17 @@ use serde::{Deserialize, Serialize}; -pub const DEFAULT_CONTEXT_WINDOW_TOKENS: u32 = 128_000; +/// Context window used only for legacy DeepSeek model IDs that do not name a +/// newer V4 alias and do not carry an explicit `*k` suffix. +pub const LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS: u32 = 128_000; pub const DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS: u32 = 1_000_000; /// Last-resort compaction trigger when [`context_window_for_model`] returns /// `None` (an unrecognised model id). v0.8.11 raised this from `50_000` to -/// `102_400` (80% of [`DEFAULT_CONTEXT_WINDOW_TOKENS`]) so unknown models -/// inherit the same late-trigger discipline as V4 instead of paying the -/// prefix-cache hit at 5% of the V4 window. Known DeepSeek / Claude models -/// resolve to their own scaled value via [`compaction_threshold_for_model`] -/// (#664). +/// `102_400` (80% of [`LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS`]) so unknown +/// models inherit the same late-trigger discipline as V4 instead of paying +/// the prefix-cache hit at 5% of the V4 window. Known DeepSeek / Claude +/// models resolve to their own scaled value via +/// [`compaction_threshold_for_model`] (#664). pub const DEFAULT_COMPACTION_TOKEN_THRESHOLD: usize = 102_400; pub const DEFAULT_COMPACTION_MESSAGE_THRESHOLD: usize = 50; const COMPACTION_THRESHOLD_PERCENT: u32 = 80; @@ -212,8 +214,9 @@ pub struct Usage { #[must_use] pub fn context_window_for_model(model: &str) -> Option { let lower = model.to_lowercase(); - // Unknown DeepSeek model IDs default to 128k unless an explicit *k suffix is present. - // DeepSeek-V4 family and current legacy aliases ship with a 1M context window. + // Unknown legacy DeepSeek model IDs default to 128K unless an explicit + // *k suffix is present. DeepSeek-V4 family and current compatibility + // aliases ship with a 1M context window. if lower.contains("deepseek") { if let Some(explicit_window) = deepseek_context_window_hint(&lower) { return Some(explicit_window); @@ -221,7 +224,7 @@ pub fn context_window_for_model(model: &str) -> Option { if lower.contains("v4") || is_current_deepseek_v4_alias(&lower) { return Some(DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS); } - return Some(DEFAULT_CONTEXT_WINDOW_TOKENS); + return Some(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS); } if lower.contains("claude") { return Some(200_000); @@ -411,14 +414,14 @@ mod tests { } #[test] - fn unknown_deepseek_models_map_to_128k_context_window() { + fn unknown_legacy_deepseek_models_map_to_128k_context_window() { assert_eq!( context_window_for_model("deepseek-coder"), - Some(DEFAULT_CONTEXT_WINDOW_TOKENS) + Some(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS) ); assert_eq!( context_window_for_model("deepseek-v3.2-0324"), - Some(DEFAULT_CONTEXT_WINDOW_TOKENS) + Some(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS) ); } @@ -447,7 +450,7 @@ mod tests { ); assert_eq!( context_window_for_model("deepseek-v3.2-2k-preview"), - Some(DEFAULT_CONTEXT_WINDOW_TOKENS) + Some(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS) ); } @@ -458,11 +461,11 @@ mod tests { 102_400 ); // v0.8.11 (#664): unknown-model fallback also resolves to 80% of - // `DEFAULT_CONTEXT_WINDOW_TOKENS` (128k) — same late-trigger - // discipline as the V4 path. Was `50_000` pre-v0.8.11; that - // hardcoded value compacted at ~5% of a 1M window when the model - // detection silently fell through, which is exactly the - // prefix-cache-burning behaviour we're getting away from. + // `LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS` (128K legacy DeepSeek + // fallback) — same late-trigger discipline as the V4 path. Was + // `50_000` pre-v0.8.11; that hardcoded value compacted at ~5% of a + // 1M window when model detection silently fell through, which is + // exactly the prefix-cache-burning behaviour we're getting away from. assert_eq!(compaction_threshold_for_model("unknown-model"), 102_400); } @@ -509,9 +512,9 @@ mod tests { 102_400 ); // v0.8.11 (#664): unknown-model fallback also lands on the - // 80%-of-128K floor instead of the legacy hardcoded 50K, so - // model-detection-fall-through doesn't quietly burn V4 prefix - // cache at 5%-of-window. + // 80%-of-128K legacy DeepSeek fallback instead of the legacy + // hardcoded 50K, so model-detection-fall-through doesn't quietly + // burn V4 prefix cache at 5%-of-window. assert_eq!( compaction_threshold_for_model_and_effort("unknown-model", Some("max")), 102_400 diff --git a/crates/tui/src/tui/context_inspector.rs b/crates/tui/src/tui/context_inspector.rs index 7f0e139b..12a6cca4 100644 --- a/crates/tui/src/tui/context_inspector.rs +++ b/crates/tui/src/tui/context_inspector.rs @@ -4,16 +4,17 @@ use std::collections::HashSet; use std::fmt::Write; use crate::compaction::estimate_input_tokens_conservative; -use crate::models::{DEFAULT_CONTEXT_WINDOW_TOKENS, SystemPrompt, context_window_for_model}; +use crate::models::{ + LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS, SystemPrompt, context_window_for_model, +}; use crate::session_manager::SessionContextReference; use crate::tui::app::{App, ToolDetailRecord}; use crate::tui::file_mention::ContextReferenceSource; use crate::utils::estimate_message_chars; -/// Marker used by the engine's `append_working_set_summary` to tag the -/// volatile tail block in the system prompt. Replicated here so the -/// context inspector can distinguish stable prefix blocks from the -/// ephemeral working-set block without importing engine internals. +/// Marker used by per-turn working-set metadata. Replicated here so the +/// context inspector can distinguish stable prompt blocks from volatile +/// working-set context without importing engine internals. const WORKING_SET_MARKER: &str = "## Repo Working Set"; const CONTEXT_WARNING_THRESHOLD_PERCENT: f64 = 85.0; @@ -68,7 +69,7 @@ pub fn build_context_inspector_text(app: &App) -> String { } fn context_usage(app: &App) -> (usize, u32, f64) { - let max = context_window_for_model(&app.model).unwrap_or(DEFAULT_CONTEXT_WINDOW_TOKENS); + let max = context_window_for_model(&app.model).unwrap_or(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS); let estimated = estimate_input_tokens_conservative(&app.api_messages, app.system_prompt.as_ref()); let total_chars = estimate_message_chars(&app.api_messages);