refactor(models): rename legacy DeepSeek context window

2026-05-04 22:06:16 -05:00
parent a4dee56fcc
commit a14227edf8
4 changed files with 36 additions and 31 deletions
@@ -225,7 +225,7 @@ pub fn provider_capability(provider: ApiProvider, resolved_model: &str) -> Provi
        crate::models::DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS
    } else {
        crate::models::context_window_for_model(resolved_model)
-            .unwrap_or(crate::models::DEFAULT_CONTEXT_WINDOW_TOKENS)
+            .unwrap_or(crate::models::LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS)
    };

    // Max output tokens: DeepSeek V4 models allow 262K; others get 4096.
@@ -4070,7 +4070,7 @@ model = "deepseek-v4-pro"
        let cap = provider_capability(ApiProvider::Deepseek, "deepseek-coder");
        assert_eq!(
            cap.context_window,
-            crate::models::DEFAULT_CONTEXT_WINDOW_TOKENS
+            crate::models::LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS
        );
        assert_eq!(cap.max_output, 4096);
        assert!(!cap.thinking_supported);
@@ -160,9 +160,10 @@ impl Engine {
        let unique_reference_ids_recent_window =
            self.recent_unique_reference_count(message_window, turn);
        let context_window = usize::try_from(
-            context_window_for_model(&self.session.model).unwrap_or(DEFAULT_CONTEXT_WINDOW_TOKENS),
+            context_window_for_model(&self.session.model)
+                .unwrap_or(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS),
        )
-        .unwrap_or(usize::try_from(DEFAULT_CONTEXT_WINDOW_TOKENS).unwrap_or(128_000))
+        .unwrap_or(usize::try_from(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS).unwrap_or(128_000))
        .max(1);
        let context_used_ratio = (self.estimated_input_tokens() as f64) / (context_window as f64);

@@ -2,15 +2,17 @@

 use serde::{Deserialize, Serialize};

-pub const DEFAULT_CONTEXT_WINDOW_TOKENS: u32 = 128_000;
+/// Context window used only for legacy DeepSeek model IDs that do not name a
+/// newer V4 alias and do not carry an explicit `*k` suffix.
+pub const LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS: u32 = 128_000;
 pub const DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS: u32 = 1_000_000;
 /// Last-resort compaction trigger when [`context_window_for_model`] returns
 /// `None` (an unrecognised model id). v0.8.11 raised this from `50_000` to
-/// `102_400` (80% of [`DEFAULT_CONTEXT_WINDOW_TOKENS`]) so unknown models
-/// inherit the same late-trigger discipline as V4 instead of paying the
-/// prefix-cache hit at 5% of the V4 window. Known DeepSeek / Claude models
-/// resolve to their own scaled value via [`compaction_threshold_for_model`]
-/// (#664).
+/// `102_400` (80% of [`LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS`]) so unknown
+/// models inherit the same late-trigger discipline as V4 instead of paying
+/// the prefix-cache hit at 5% of the V4 window. Known DeepSeek / Claude
+/// models resolve to their own scaled value via
+/// [`compaction_threshold_for_model`] (#664).
 pub const DEFAULT_COMPACTION_TOKEN_THRESHOLD: usize = 102_400;
 pub const DEFAULT_COMPACTION_MESSAGE_THRESHOLD: usize = 50;
 const COMPACTION_THRESHOLD_PERCENT: u32 = 80;
@@ -212,8 +214,9 @@ pub struct Usage {
 #[must_use]
 pub fn context_window_for_model(model: &str) -> Option<u32> {
    let lower = model.to_lowercase();
-    // Unknown DeepSeek model IDs default to 128k unless an explicit *k suffix is present.
-    // DeepSeek-V4 family and current legacy aliases ship with a 1M context window.
+    // Unknown legacy DeepSeek model IDs default to 128K unless an explicit
+    // *k suffix is present. DeepSeek-V4 family and current compatibility
+    // aliases ship with a 1M context window.
    if lower.contains("deepseek") {
        if let Some(explicit_window) = deepseek_context_window_hint(&lower) {
            return Some(explicit_window);
@@ -221,7 +224,7 @@ pub fn context_window_for_model(model: &str) -> Option<u32> {
        if lower.contains("v4") || is_current_deepseek_v4_alias(&lower) {
            return Some(DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS);
        }
-        return Some(DEFAULT_CONTEXT_WINDOW_TOKENS);
+        return Some(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS);
    }
    if lower.contains("claude") {
        return Some(200_000);
@@ -411,14 +414,14 @@ mod tests {
    }

    #[test]
-    fn unknown_deepseek_models_map_to_128k_context_window() {
+    fn unknown_legacy_deepseek_models_map_to_128k_context_window() {
        assert_eq!(
            context_window_for_model("deepseek-coder"),
-            Some(DEFAULT_CONTEXT_WINDOW_TOKENS)
+            Some(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS)
        );
        assert_eq!(
            context_window_for_model("deepseek-v3.2-0324"),
-            Some(DEFAULT_CONTEXT_WINDOW_TOKENS)
+            Some(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS)
        );
    }

@@ -447,7 +450,7 @@ mod tests {
        );
        assert_eq!(
            context_window_for_model("deepseek-v3.2-2k-preview"),
-            Some(DEFAULT_CONTEXT_WINDOW_TOKENS)
+            Some(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS)
        );
    }

@@ -458,11 +461,11 @@ mod tests {
            102_400
        );
        // v0.8.11 (#664): unknown-model fallback also resolves to 80% of
-        // `DEFAULT_CONTEXT_WINDOW_TOKENS` (128k) — same late-trigger
-        // discipline as the V4 path. Was `50_000` pre-v0.8.11; that
-        // hardcoded value compacted at ~5% of a 1M window when the model
-        // detection silently fell through, which is exactly the
-        // prefix-cache-burning behaviour we're getting away from.
+        // `LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS` (128K legacy DeepSeek
+        // fallback) — same late-trigger discipline as the V4 path. Was
+        // `50_000` pre-v0.8.11; that hardcoded value compacted at ~5% of a
+        // 1M window when model detection silently fell through, which is
+        // exactly the prefix-cache-burning behaviour we're getting away from.
        assert_eq!(compaction_threshold_for_model("unknown-model"), 102_400);
    }

@@ -509,9 +512,9 @@ mod tests {
            102_400
        );
        // v0.8.11 (#664): unknown-model fallback also lands on the
-        // 80%-of-128K floor instead of the legacy hardcoded 50K, so
-        // model-detection-fall-through doesn't quietly burn V4 prefix
-        // cache at 5%-of-window.
+        // 80%-of-128K legacy DeepSeek fallback instead of the legacy
+        // hardcoded 50K, so model-detection-fall-through doesn't quietly
+        // burn V4 prefix cache at 5%-of-window.
        assert_eq!(
            compaction_threshold_for_model_and_effort("unknown-model", Some("max")),
            102_400
@@ -4,16 +4,17 @@ use std::collections::HashSet;
 use std::fmt::Write;

 use crate::compaction::estimate_input_tokens_conservative;
-use crate::models::{DEFAULT_CONTEXT_WINDOW_TOKENS, SystemPrompt, context_window_for_model};
+use crate::models::{
+    LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS, SystemPrompt, context_window_for_model,
+};
 use crate::session_manager::SessionContextReference;
 use crate::tui::app::{App, ToolDetailRecord};
 use crate::tui::file_mention::ContextReferenceSource;
 use crate::utils::estimate_message_chars;

-/// Marker used by the engine's `append_working_set_summary` to tag the
-/// volatile tail block in the system prompt. Replicated here so the
-/// context inspector can distinguish stable prefix blocks from the
-/// ephemeral working-set block without importing engine internals.
+/// Marker used by per-turn working-set metadata. Replicated here so the
+/// context inspector can distinguish stable prompt blocks from volatile
+/// working-set context without importing engine internals.
 const WORKING_SET_MARKER: &str = "## Repo Working Set";

 const CONTEXT_WARNING_THRESHOLD_PERCENT: f64 = 85.0;
@@ -68,7 +69,7 @@ pub fn build_context_inspector_text(app: &App) -> String {
 }

 fn context_usage(app: &App) -> (usize, u32, f64) {
-    let max = context_window_for_model(&app.model).unwrap_or(DEFAULT_CONTEXT_WINDOW_TOKENS);
+    let max = context_window_for_model(&app.model).unwrap_or(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS);
    let estimated =
        estimate_input_tokens_conservative(&app.api_messages, app.system_prompt.as_ref());
    let total_chars = estimate_message_chars(&app.api_messages);