feat(prompts): parameterize model-specific facts — context window, pricing, thinking (#3025)
Extends apply_model_template() to substitute model-specific facts from
runtime lookups instead of hardcoded V4 claims:
- {context_window_note}: resolved from context_window_for_model();
if unknown, emits fallback wording instead of guessing
- {subagent_economics}: resolved from input_cost_note() (new public
pricing helper); cost-agnostic fallback when pricing is unknown
- {model_thinking_note}: gated on model_supports_reasoning();
empty string when the model does not emit thinking tokens
The hardcoded "1M-token window", "$0.14/M Flash", and V4 thinking
strategy will only reach models whose capability lookups return those
values (DeepSeek V4 family). Non-DeepSeek models get accurate facts
or honest "unknown" wording.
Adds input_cost_note() to pricing.rs — returns a one-line sub-agent
cost description, or None when pricing is unavailable.
This commit is contained in:
@@ -166,6 +166,21 @@ fn deepseek_v4_flash_pricing() -> ModelPricing {
|
||||
}
|
||||
}
|
||||
|
||||
/// Return a one-line cost note for the given model, suitable for the
|
||||
/// sub-agent economics section of the system prompt (#3025).
|
||||
///
|
||||
/// Returns `None` when pricing is unknown — the prompt should use
|
||||
/// cost-agnostic wording instead.
|
||||
#[must_use]
|
||||
pub fn input_cost_note(model: &str) -> Option<String> {
|
||||
let pricing = pricing_for_model(model)?;
|
||||
Some(format!(
|
||||
"Sub-agents are cheap — {} costs ${:.2} per million input tokens.",
|
||||
model,
|
||||
pricing.usd.input_cache_miss_per_million
|
||||
))
|
||||
}
|
||||
|
||||
/// Calculate cost for a turn given token usage and model.
|
||||
#[must_use]
|
||||
#[allow(dead_code)]
|
||||
|
||||
@@ -796,7 +796,48 @@ pub(crate) fn render_runtime_policy_reference() -> String {
|
||||
/// says "You are deepseek-v4-pro" or "You are deepseek-v4-flash" instead
|
||||
/// of a static placeholder.
|
||||
fn apply_model_template(prompt: &str, model_id: &str) -> String {
|
||||
prompt.replace("{model_id}", model_id)
|
||||
let mut prompt = prompt.replace("{model_id}", model_id);
|
||||
|
||||
// #3025: Substitute model-specific facts so non-DeepSeek models don't
|
||||
// get V4 architecture claims, 1M-window assumptions, or Flash pricing.
|
||||
let ctx_window = crate::models::context_window_for_model(model_id);
|
||||
let window_note = if let Some(window) = ctx_window {
|
||||
format!(
|
||||
"You have a {}-token context window. Do not summarize or delete \
|
||||
earlier turns just because the transcript has crossed an older \
|
||||
threshold.",
|
||||
if window >= 1_000_000 {
|
||||
"one-million-token".to_string()
|
||||
} else {
|
||||
format!("{}", window)
|
||||
}
|
||||
)
|
||||
} else {
|
||||
"Your context window is provider-dependent and not known to the \
|
||||
harness; treat the app's context-pressure indicator as authoritative \
|
||||
and suggest /compact when it reports high pressure."
|
||||
.to_string()
|
||||
};
|
||||
prompt = prompt.replace("{context_window_note}", &window_note);
|
||||
|
||||
let subagent_econ = crate::pricing::input_cost_note(model_id).unwrap_or_else(|| {
|
||||
"Sub-agents keep your main context clean; use them liberally for \
|
||||
parallel work."
|
||||
.to_string()
|
||||
});
|
||||
prompt = prompt.replace("{subagent_economics}", &subagent_econ);
|
||||
|
||||
let thinking_note = if crate::models::model_supports_reasoning(model_id) {
|
||||
"Models may emit *thinking tokens* before final answers. These are \
|
||||
invisible to the user but count against context. Use them strategically: \
|
||||
skip for lookups, light for simple code generation, deep for debugging."
|
||||
.to_string()
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
prompt = prompt.replace("{model_thinking_note}", &thinking_note);
|
||||
|
||||
prompt
|
||||
}
|
||||
|
||||
const TOOL_TAXONOMY_DISCOVERY: &[&str] = &["grep_files", "file_search"];
|
||||
|
||||
Reference in New Issue
Block a user