refactor(models): rename legacy DeepSeek context window
This commit is contained in:
@@ -225,7 +225,7 @@ pub fn provider_capability(provider: ApiProvider, resolved_model: &str) -> Provi
|
||||
crate::models::DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS
|
||||
} else {
|
||||
crate::models::context_window_for_model(resolved_model)
|
||||
.unwrap_or(crate::models::DEFAULT_CONTEXT_WINDOW_TOKENS)
|
||||
.unwrap_or(crate::models::LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS)
|
||||
};
|
||||
|
||||
// Max output tokens: DeepSeek V4 models allow 262K; others get 4096.
|
||||
@@ -4070,7 +4070,7 @@ model = "deepseek-v4-pro"
|
||||
let cap = provider_capability(ApiProvider::Deepseek, "deepseek-coder");
|
||||
assert_eq!(
|
||||
cap.context_window,
|
||||
crate::models::DEFAULT_CONTEXT_WINDOW_TOKENS
|
||||
crate::models::LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS
|
||||
);
|
||||
assert_eq!(cap.max_output, 4096);
|
||||
assert!(!cap.thinking_supported);
|
||||
|
||||
@@ -160,9 +160,10 @@ impl Engine {
|
||||
let unique_reference_ids_recent_window =
|
||||
self.recent_unique_reference_count(message_window, turn);
|
||||
let context_window = usize::try_from(
|
||||
context_window_for_model(&self.session.model).unwrap_or(DEFAULT_CONTEXT_WINDOW_TOKENS),
|
||||
context_window_for_model(&self.session.model)
|
||||
.unwrap_or(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS),
|
||||
)
|
||||
.unwrap_or(usize::try_from(DEFAULT_CONTEXT_WINDOW_TOKENS).unwrap_or(128_000))
|
||||
.unwrap_or(usize::try_from(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS).unwrap_or(128_000))
|
||||
.max(1);
|
||||
let context_used_ratio = (self.estimated_input_tokens() as f64) / (context_window as f64);
|
||||
|
||||
|
||||
+24
-21
@@ -2,15 +2,17 @@
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
pub const DEFAULT_CONTEXT_WINDOW_TOKENS: u32 = 128_000;
|
||||
/// Context window used only for legacy DeepSeek model IDs that do not name a
|
||||
/// newer V4 alias and do not carry an explicit `*k` suffix.
|
||||
pub const LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS: u32 = 128_000;
|
||||
pub const DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS: u32 = 1_000_000;
|
||||
/// Last-resort compaction trigger when [`context_window_for_model`] returns
|
||||
/// `None` (an unrecognised model id). v0.8.11 raised this from `50_000` to
|
||||
/// `102_400` (80% of [`DEFAULT_CONTEXT_WINDOW_TOKENS`]) so unknown models
|
||||
/// inherit the same late-trigger discipline as V4 instead of paying the
|
||||
/// prefix-cache hit at 5% of the V4 window. Known DeepSeek / Claude models
|
||||
/// resolve to their own scaled value via [`compaction_threshold_for_model`]
|
||||
/// (#664).
|
||||
/// `102_400` (80% of [`LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS`]) so unknown
|
||||
/// models inherit the same late-trigger discipline as V4 instead of paying
|
||||
/// the prefix-cache hit at 5% of the V4 window. Known DeepSeek / Claude
|
||||
/// models resolve to their own scaled value via
|
||||
/// [`compaction_threshold_for_model`] (#664).
|
||||
pub const DEFAULT_COMPACTION_TOKEN_THRESHOLD: usize = 102_400;
|
||||
pub const DEFAULT_COMPACTION_MESSAGE_THRESHOLD: usize = 50;
|
||||
const COMPACTION_THRESHOLD_PERCENT: u32 = 80;
|
||||
@@ -212,8 +214,9 @@ pub struct Usage {
|
||||
#[must_use]
|
||||
pub fn context_window_for_model(model: &str) -> Option<u32> {
|
||||
let lower = model.to_lowercase();
|
||||
// Unknown DeepSeek model IDs default to 128k unless an explicit *k suffix is present.
|
||||
// DeepSeek-V4 family and current legacy aliases ship with a 1M context window.
|
||||
// Unknown legacy DeepSeek model IDs default to 128K unless an explicit
|
||||
// *k suffix is present. DeepSeek-V4 family and current compatibility
|
||||
// aliases ship with a 1M context window.
|
||||
if lower.contains("deepseek") {
|
||||
if let Some(explicit_window) = deepseek_context_window_hint(&lower) {
|
||||
return Some(explicit_window);
|
||||
@@ -221,7 +224,7 @@ pub fn context_window_for_model(model: &str) -> Option<u32> {
|
||||
if lower.contains("v4") || is_current_deepseek_v4_alias(&lower) {
|
||||
return Some(DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS);
|
||||
}
|
||||
return Some(DEFAULT_CONTEXT_WINDOW_TOKENS);
|
||||
return Some(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS);
|
||||
}
|
||||
if lower.contains("claude") {
|
||||
return Some(200_000);
|
||||
@@ -411,14 +414,14 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unknown_deepseek_models_map_to_128k_context_window() {
|
||||
fn unknown_legacy_deepseek_models_map_to_128k_context_window() {
|
||||
assert_eq!(
|
||||
context_window_for_model("deepseek-coder"),
|
||||
Some(DEFAULT_CONTEXT_WINDOW_TOKENS)
|
||||
Some(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS)
|
||||
);
|
||||
assert_eq!(
|
||||
context_window_for_model("deepseek-v3.2-0324"),
|
||||
Some(DEFAULT_CONTEXT_WINDOW_TOKENS)
|
||||
Some(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS)
|
||||
);
|
||||
}
|
||||
|
||||
@@ -447,7 +450,7 @@ mod tests {
|
||||
);
|
||||
assert_eq!(
|
||||
context_window_for_model("deepseek-v3.2-2k-preview"),
|
||||
Some(DEFAULT_CONTEXT_WINDOW_TOKENS)
|
||||
Some(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS)
|
||||
);
|
||||
}
|
||||
|
||||
@@ -458,11 +461,11 @@ mod tests {
|
||||
102_400
|
||||
);
|
||||
// v0.8.11 (#664): unknown-model fallback also resolves to 80% of
|
||||
// `DEFAULT_CONTEXT_WINDOW_TOKENS` (128k) — same late-trigger
|
||||
// discipline as the V4 path. Was `50_000` pre-v0.8.11; that
|
||||
// hardcoded value compacted at ~5% of a 1M window when the model
|
||||
// detection silently fell through, which is exactly the
|
||||
// prefix-cache-burning behaviour we're getting away from.
|
||||
// `LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS` (128K legacy DeepSeek
|
||||
// fallback) — same late-trigger discipline as the V4 path. Was
|
||||
// `50_000` pre-v0.8.11; that hardcoded value compacted at ~5% of a
|
||||
// 1M window when model detection silently fell through, which is
|
||||
// exactly the prefix-cache-burning behaviour we're getting away from.
|
||||
assert_eq!(compaction_threshold_for_model("unknown-model"), 102_400);
|
||||
}
|
||||
|
||||
@@ -509,9 +512,9 @@ mod tests {
|
||||
102_400
|
||||
);
|
||||
// v0.8.11 (#664): unknown-model fallback also lands on the
|
||||
// 80%-of-128K floor instead of the legacy hardcoded 50K, so
|
||||
// model-detection-fall-through doesn't quietly burn V4 prefix
|
||||
// cache at 5%-of-window.
|
||||
// 80%-of-128K legacy DeepSeek fallback instead of the legacy
|
||||
// hardcoded 50K, so model-detection-fall-through doesn't quietly
|
||||
// burn V4 prefix cache at 5%-of-window.
|
||||
assert_eq!(
|
||||
compaction_threshold_for_model_and_effort("unknown-model", Some("max")),
|
||||
102_400
|
||||
|
||||
@@ -4,16 +4,17 @@ use std::collections::HashSet;
|
||||
use std::fmt::Write;
|
||||
|
||||
use crate::compaction::estimate_input_tokens_conservative;
|
||||
use crate::models::{DEFAULT_CONTEXT_WINDOW_TOKENS, SystemPrompt, context_window_for_model};
|
||||
use crate::models::{
|
||||
LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS, SystemPrompt, context_window_for_model,
|
||||
};
|
||||
use crate::session_manager::SessionContextReference;
|
||||
use crate::tui::app::{App, ToolDetailRecord};
|
||||
use crate::tui::file_mention::ContextReferenceSource;
|
||||
use crate::utils::estimate_message_chars;
|
||||
|
||||
/// Marker used by the engine's `append_working_set_summary` to tag the
|
||||
/// volatile tail block in the system prompt. Replicated here so the
|
||||
/// context inspector can distinguish stable prefix blocks from the
|
||||
/// ephemeral working-set block without importing engine internals.
|
||||
/// Marker used by per-turn working-set metadata. Replicated here so the
|
||||
/// context inspector can distinguish stable prompt blocks from volatile
|
||||
/// working-set context without importing engine internals.
|
||||
const WORKING_SET_MARKER: &str = "## Repo Working Set";
|
||||
|
||||
const CONTEXT_WARNING_THRESHOLD_PERCENT: f64 = 85.0;
|
||||
@@ -68,7 +69,7 @@ pub fn build_context_inspector_text(app: &App) -> String {
|
||||
}
|
||||
|
||||
fn context_usage(app: &App) -> (usize, u32, f64) {
|
||||
let max = context_window_for_model(&app.model).unwrap_or(DEFAULT_CONTEXT_WINDOW_TOKENS);
|
||||
let max = context_window_for_model(&app.model).unwrap_or(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS);
|
||||
let estimated =
|
||||
estimate_input_tokens_conservative(&app.api_messages, app.system_prompt.as_ref());
|
||||
let total_chars = estimate_message_chars(&app.api_messages);
|
||||
|
||||
Reference in New Issue
Block a user