feat(prompts): parameterize model-specific facts — context window, pricing, thinking (#3025)

Extends apply_model_template() to substitute model-specific facts from runtime lookups instead of hardcoded V4 claims: - {context_window_note}: resolved from context_window_for_model(); if unknown, emits fallback wording instead of guessing - {subagent_economics}: resolved from input_cost_note() (new public pricing helper); cost-agnostic fallback when pricing is unknown - {model_thinking_note}: gated on model_supports_reasoning(); empty string when the model does not emit thinking tokens The hardcoded "1M-token window", "$0.14/M Flash", and V4 thinking strategy will only reach models whose capability lookups return those values (DeepSeek V4 family). Non-DeepSeek models get accurate facts or honest "unknown" wording. Adds input_cost_note() to pricing.rs — returns a one-line sub-agent cost description, or None when pricing is unavailable.
2026-06-10 16:29:17 -07:00
parent b23067bacd
commit 29edb6d0cf
2 changed files with 57 additions and 1 deletions
@@ -166,6 +166,21 @@ fn deepseek_v4_flash_pricing() -> ModelPricing {
    }
 }

+/// Return a one-line cost note for the given model, suitable for the
+/// sub-agent economics section of the system prompt (#3025).
+///
+/// Returns `None` when pricing is unknown — the prompt should use
+/// cost-agnostic wording instead.
+#[must_use]
+pub fn input_cost_note(model: &str) -> Option<String> {
+    let pricing = pricing_for_model(model)?;
+    Some(format!(
+        "Sub-agents are cheap — {} costs ${:.2} per million input tokens.",
+        model,
+        pricing.usd.input_cache_miss_per_million
+    ))
+}
+
 /// Calculate cost for a turn given token usage and model.
 #[must_use]
 #[allow(dead_code)]
@@ -796,7 +796,48 @@ pub(crate) fn render_runtime_policy_reference() -> String {
 /// says "You are deepseek-v4-pro" or "You are deepseek-v4-flash" instead
 /// of a static placeholder.
 fn apply_model_template(prompt: &str, model_id: &str) -> String {
-    prompt.replace("{model_id}", model_id)
+    let mut prompt = prompt.replace("{model_id}", model_id);
+
+    // #3025: Substitute model-specific facts so non-DeepSeek models don't
+    // get V4 architecture claims, 1M-window assumptions, or Flash pricing.
+    let ctx_window = crate::models::context_window_for_model(model_id);
+    let window_note = if let Some(window) = ctx_window {
+        format!(
+            "You have a {}-token context window. Do not summarize or delete \
+             earlier turns just because the transcript has crossed an older \
+             threshold.",
+            if window >= 1_000_000 {
+                "one-million-token".to_string()
+            } else {
+                format!("{}", window)
+            }
+        )
+    } else {
+        "Your context window is provider-dependent and not known to the \
+         harness; treat the app's context-pressure indicator as authoritative \
+         and suggest /compact when it reports high pressure."
+            .to_string()
+    };
+    prompt = prompt.replace("{context_window_note}", &window_note);
+
+    let subagent_econ = crate::pricing::input_cost_note(model_id).unwrap_or_else(|| {
+        "Sub-agents keep your main context clean; use them liberally for \
+         parallel work."
+            .to_string()
+    });
+    prompt = prompt.replace("{subagent_economics}", &subagent_econ);
+
+    let thinking_note = if crate::models::model_supports_reasoning(model_id) {
+        "Models may emit *thinking tokens* before final answers. These are \
+         invisible to the user but count against context. Use them strategically: \
+         skip for lookups, light for simple code generation, deep for debugging."
+            .to_string()
+    } else {
+        String::new()
+    };
+    prompt = prompt.replace("{model_thinking_note}", &thinking_note);
+
+    prompt
 }

 const TOOL_TAXONOMY_DISCOVERY: &[&str] = &["grep_files", "file_search"];