feat(prompts): parameterize model-specific facts — context window, pricing, thinking (#3025)

Extends apply_model_template() to substitute model-specific facts from
runtime lookups instead of hardcoded V4 claims:

- {context_window_note}: resolved from context_window_for_model();
  if unknown, emits fallback wording instead of guessing
- {subagent_economics}: resolved from input_cost_note() (new public
  pricing helper); cost-agnostic fallback when pricing is unknown
- {model_thinking_note}: gated on model_supports_reasoning();
  empty string when the model does not emit thinking tokens

The hardcoded "1M-token window", "$0.14/M Flash", and V4 thinking
strategy will only reach models whose capability lookups return those
values (DeepSeek V4 family). Non-DeepSeek models get accurate facts
or honest "unknown" wording.

Adds input_cost_note() to pricing.rs — returns a one-line sub-agent
cost description, or None when pricing is unavailable.
This commit is contained in:
Hunter Bown
2026-06-10 16:29:17 -07:00
parent b23067bacd
commit 29edb6d0cf
2 changed files with 57 additions and 1 deletions
+15
View File
@@ -166,6 +166,21 @@ fn deepseek_v4_flash_pricing() -> ModelPricing {
}
}
/// Return a one-line cost note for the given model, suitable for the
/// sub-agent economics section of the system prompt (#3025).
///
/// Returns `None` when pricing is unknown — the prompt should use
/// cost-agnostic wording instead.
#[must_use]
pub fn input_cost_note(model: &str) -> Option<String> {
let pricing = pricing_for_model(model)?;
Some(format!(
"Sub-agents are cheap — {} costs ${:.2} per million input tokens.",
model,
pricing.usd.input_cache_miss_per_million
))
}
/// Calculate cost for a turn given token usage and model.
#[must_use]
#[allow(dead_code)]
+42 -1
View File
@@ -796,7 +796,48 @@ pub(crate) fn render_runtime_policy_reference() -> String {
/// says "You are deepseek-v4-pro" or "You are deepseek-v4-flash" instead
/// of a static placeholder.
fn apply_model_template(prompt: &str, model_id: &str) -> String {
prompt.replace("{model_id}", model_id)
let mut prompt = prompt.replace("{model_id}", model_id);
// #3025: Substitute model-specific facts so non-DeepSeek models don't
// get V4 architecture claims, 1M-window assumptions, or Flash pricing.
let ctx_window = crate::models::context_window_for_model(model_id);
let window_note = if let Some(window) = ctx_window {
format!(
"You have a {}-token context window. Do not summarize or delete \
earlier turns just because the transcript has crossed an older \
threshold.",
if window >= 1_000_000 {
"one-million-token".to_string()
} else {
format!("{}", window)
}
)
} else {
"Your context window is provider-dependent and not known to the \
harness; treat the app's context-pressure indicator as authoritative \
and suggest /compact when it reports high pressure."
.to_string()
};
prompt = prompt.replace("{context_window_note}", &window_note);
let subagent_econ = crate::pricing::input_cost_note(model_id).unwrap_or_else(|| {
"Sub-agents keep your main context clean; use them liberally for \
parallel work."
.to_string()
});
prompt = prompt.replace("{subagent_economics}", &subagent_econ);
let thinking_note = if crate::models::model_supports_reasoning(model_id) {
"Models may emit *thinking tokens* before final answers. These are \
invisible to the user but count against context. Use them strategically: \
skip for lookups, light for simple code generation, deep for debugging."
.to_string()
} else {
String::new()
};
prompt = prompt.replace("{model_thinking_note}", &thinking_note);
prompt
}
const TOOL_TAXONOMY_DISCOVERY: &[&str] = &["grep_files", "file_search"];