refactor(models): rename legacy DeepSeek context window

This commit is contained in:
Hunter Bown
2026-05-04 22:06:16 -05:00
parent a4dee56fcc
commit a14227edf8
4 changed files with 36 additions and 31 deletions
+2 -2
View File
@@ -225,7 +225,7 @@ pub fn provider_capability(provider: ApiProvider, resolved_model: &str) -> Provi
crate::models::DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS
} else {
crate::models::context_window_for_model(resolved_model)
.unwrap_or(crate::models::DEFAULT_CONTEXT_WINDOW_TOKENS)
.unwrap_or(crate::models::LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS)
};
// Max output tokens: DeepSeek V4 models allow 262K; others get 4096.
@@ -4070,7 +4070,7 @@ model = "deepseek-v4-pro"
let cap = provider_capability(ApiProvider::Deepseek, "deepseek-coder");
assert_eq!(
cap.context_window,
crate::models::DEFAULT_CONTEXT_WINDOW_TOKENS
crate::models::LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS
);
assert_eq!(cap.max_output, 4096);
assert!(!cap.thinking_supported);
+3 -2
View File
@@ -160,9 +160,10 @@ impl Engine {
let unique_reference_ids_recent_window =
self.recent_unique_reference_count(message_window, turn);
let context_window = usize::try_from(
context_window_for_model(&self.session.model).unwrap_or(DEFAULT_CONTEXT_WINDOW_TOKENS),
context_window_for_model(&self.session.model)
.unwrap_or(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS),
)
.unwrap_or(usize::try_from(DEFAULT_CONTEXT_WINDOW_TOKENS).unwrap_or(128_000))
.unwrap_or(usize::try_from(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS).unwrap_or(128_000))
.max(1);
let context_used_ratio = (self.estimated_input_tokens() as f64) / (context_window as f64);
+24 -21
View File
@@ -2,15 +2,17 @@
use serde::{Deserialize, Serialize};
pub const DEFAULT_CONTEXT_WINDOW_TOKENS: u32 = 128_000;
/// Context window used only for legacy DeepSeek model IDs that do not name a
/// newer V4 alias and do not carry an explicit `*k` suffix.
pub const LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS: u32 = 128_000;
pub const DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS: u32 = 1_000_000;
/// Last-resort compaction trigger when [`context_window_for_model`] returns
/// `None` (an unrecognised model id). v0.8.11 raised this from `50_000` to
/// `102_400` (80% of [`DEFAULT_CONTEXT_WINDOW_TOKENS`]) so unknown models
/// inherit the same late-trigger discipline as V4 instead of paying the
/// prefix-cache hit at 5% of the V4 window. Known DeepSeek / Claude models
/// resolve to their own scaled value via [`compaction_threshold_for_model`]
/// (#664).
/// `102_400` (80% of [`LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS`]) so unknown
/// models inherit the same late-trigger discipline as V4 instead of paying
/// the prefix-cache hit at 5% of the V4 window. Known DeepSeek / Claude
/// models resolve to their own scaled value via
/// [`compaction_threshold_for_model`] (#664).
pub const DEFAULT_COMPACTION_TOKEN_THRESHOLD: usize = 102_400;
pub const DEFAULT_COMPACTION_MESSAGE_THRESHOLD: usize = 50;
const COMPACTION_THRESHOLD_PERCENT: u32 = 80;
@@ -212,8 +214,9 @@ pub struct Usage {
#[must_use]
pub fn context_window_for_model(model: &str) -> Option<u32> {
let lower = model.to_lowercase();
// Unknown DeepSeek model IDs default to 128k unless an explicit *k suffix is present.
// DeepSeek-V4 family and current legacy aliases ship with a 1M context window.
// Unknown legacy DeepSeek model IDs default to 128K unless an explicit
// *k suffix is present. DeepSeek-V4 family and current compatibility
// aliases ship with a 1M context window.
if lower.contains("deepseek") {
if let Some(explicit_window) = deepseek_context_window_hint(&lower) {
return Some(explicit_window);
@@ -221,7 +224,7 @@ pub fn context_window_for_model(model: &str) -> Option<u32> {
if lower.contains("v4") || is_current_deepseek_v4_alias(&lower) {
return Some(DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS);
}
return Some(DEFAULT_CONTEXT_WINDOW_TOKENS);
return Some(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS);
}
if lower.contains("claude") {
return Some(200_000);
@@ -411,14 +414,14 @@ mod tests {
}
#[test]
fn unknown_deepseek_models_map_to_128k_context_window() {
fn unknown_legacy_deepseek_models_map_to_128k_context_window() {
assert_eq!(
context_window_for_model("deepseek-coder"),
Some(DEFAULT_CONTEXT_WINDOW_TOKENS)
Some(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS)
);
assert_eq!(
context_window_for_model("deepseek-v3.2-0324"),
Some(DEFAULT_CONTEXT_WINDOW_TOKENS)
Some(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS)
);
}
@@ -447,7 +450,7 @@ mod tests {
);
assert_eq!(
context_window_for_model("deepseek-v3.2-2k-preview"),
Some(DEFAULT_CONTEXT_WINDOW_TOKENS)
Some(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS)
);
}
@@ -458,11 +461,11 @@ mod tests {
102_400
);
// v0.8.11 (#664): unknown-model fallback also resolves to 80% of
// `DEFAULT_CONTEXT_WINDOW_TOKENS` (128k) — same late-trigger
// discipline as the V4 path. Was `50_000` pre-v0.8.11; that
// hardcoded value compacted at ~5% of a 1M window when the model
// detection silently fell through, which is exactly the
// prefix-cache-burning behaviour we're getting away from.
// `LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS` (128K legacy DeepSeek
// fallback) — same late-trigger discipline as the V4 path. Was
// `50_000` pre-v0.8.11; that hardcoded value compacted at ~5% of a
// 1M window when model detection silently fell through, which is
// exactly the prefix-cache-burning behaviour we're getting away from.
assert_eq!(compaction_threshold_for_model("unknown-model"), 102_400);
}
@@ -509,9 +512,9 @@ mod tests {
102_400
);
// v0.8.11 (#664): unknown-model fallback also lands on the
// 80%-of-128K floor instead of the legacy hardcoded 50K, so
// model-detection-fall-through doesn't quietly burn V4 prefix
// cache at 5%-of-window.
// 80%-of-128K legacy DeepSeek fallback instead of the legacy
// hardcoded 50K, so model-detection-fall-through doesn't quietly
// burn V4 prefix cache at 5%-of-window.
assert_eq!(
compaction_threshold_for_model_and_effort("unknown-model", Some("max")),
102_400
+7 -6
View File
@@ -4,16 +4,17 @@ use std::collections::HashSet;
use std::fmt::Write;
use crate::compaction::estimate_input_tokens_conservative;
use crate::models::{DEFAULT_CONTEXT_WINDOW_TOKENS, SystemPrompt, context_window_for_model};
use crate::models::{
LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS, SystemPrompt, context_window_for_model,
};
use crate::session_manager::SessionContextReference;
use crate::tui::app::{App, ToolDetailRecord};
use crate::tui::file_mention::ContextReferenceSource;
use crate::utils::estimate_message_chars;
/// Marker used by the engine's `append_working_set_summary` to tag the
/// volatile tail block in the system prompt. Replicated here so the
/// context inspector can distinguish stable prefix blocks from the
/// ephemeral working-set block without importing engine internals.
/// Marker used by per-turn working-set metadata. Replicated here so the
/// context inspector can distinguish stable prompt blocks from volatile
/// working-set context without importing engine internals.
const WORKING_SET_MARKER: &str = "## Repo Working Set";
const CONTEXT_WARNING_THRESHOLD_PERCENT: f64 = 85.0;
@@ -68,7 +69,7 @@ pub fn build_context_inspector_text(app: &App) -> String {
}
fn context_usage(app: &App) -> (usize, u32, f64) {
let max = context_window_for_model(&app.model).unwrap_or(DEFAULT_CONTEXT_WINDOW_TOKENS);
let max = context_window_for_model(&app.model).unwrap_or(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS);
let estimated =
estimate_input_tokens_conservative(&app.api_messages, app.system_prompt.as_ref());
let total_chars = estimate_message_chars(&app.api_messages);