merge: V4 capacity priors (fixes #73)

This commit is contained in:
Hunter Bown
2026-04-26 12:53:31 -05:00
3 changed files with 110 additions and 1 deletions
+16
View File
@@ -140,6 +140,8 @@ pub struct CapacityConfig {
pub profile_window: Option<usize>,
pub deepseek_v3_2_chat_prior: Option<f64>,
pub deepseek_v3_2_reasoner_prior: Option<f64>,
pub deepseek_v4_pro_prior: Option<f64>,
pub deepseek_v4_flash_prior: Option<f64>,
pub fallback_default_prior: Option<f64>,
}
@@ -811,6 +813,8 @@ fn apply_env_overrides(config: &mut Config) {
profile_window: None,
deepseek_v3_2_chat_prior: None,
deepseek_v3_2_reasoner_prior: None,
deepseek_v4_pro_prior: None,
deepseek_v4_flash_prior: None,
fallback_default_prior: None,
});
@@ -873,6 +877,16 @@ fn apply_env_overrides(config: &mut Config) {
{
capacity.deepseek_v3_2_reasoner_prior = Some(parsed);
}
if let Ok(value) = std::env::var("DEEPSEEK_CAPACITY_PRIOR_V4_PRO")
&& let Ok(parsed) = value.parse::<f64>()
{
capacity.deepseek_v4_pro_prior = Some(parsed);
}
if let Ok(value) = std::env::var("DEEPSEEK_CAPACITY_PRIOR_V4_FLASH")
&& let Ok(parsed) = value.parse::<f64>()
{
capacity.deepseek_v4_flash_prior = Some(parsed);
}
if let Ok(value) = std::env::var("DEEPSEEK_CAPACITY_PRIOR_FALLBACK")
&& let Ok(parsed) = value.parse::<f64>()
{
@@ -892,6 +906,8 @@ fn apply_env_overrides(config: &mut Config) {
&& c.profile_window.is_none()
&& c.deepseek_v3_2_chat_prior.is_none()
&& c.deepseek_v3_2_reasoner_prior.is_none()
&& c.deepseek_v4_pro_prior.is_none()
&& c.deepseek_v4_flash_prior.is_none()
&& c.fallback_default_prior.is_none()
}) {
config.capacity = None;
+92 -1
View File
@@ -24,6 +24,8 @@ impl Default for CapacityControllerConfig {
let mut model_priors = HashMap::new();
model_priors.insert("deepseek_v3_2_chat".to_string(), 3.9);
model_priors.insert("deepseek_v3_2_reasoner".to_string(), 4.1);
model_priors.insert("deepseek_v4_pro".to_string(), 3.5);
model_priors.insert("deepseek_v4_flash".to_string(), 4.2);
Self {
enabled: true,
@@ -89,6 +91,12 @@ impl CapacityControllerConfig {
out.model_priors
.insert("deepseek_v3_2_reasoner".to_string(), v);
}
if let Some(v) = capacity.deepseek_v4_pro_prior {
out.model_priors.insert("deepseek_v4_pro".to_string(), v);
}
if let Some(v) = capacity.deepseek_v4_flash_prior {
out.model_priors.insert("deepseek_v4_flash".to_string(), v);
}
if let Some(v) = capacity.fallback_default_prior {
out.fallback_default = v;
}
@@ -460,8 +468,17 @@ pub fn decide_policy(
}
fn normalize_model_prior_key(model: &str) -> &str {
// Strip optional "deepseek-ai/" NIM namespace prefix before pattern matching.
let model = model.strip_prefix("deepseek-ai/").unwrap_or(model);
let lower = model.to_ascii_lowercase();
if lower.contains("reasoner") || lower.contains("r1") {
// V4 variants must be checked before the generic V3/chat/reasoner branches
// because those branches do not contain "v4" tokens and the ordering prevents
// accidental cross-matches.
if lower.contains("v4-pro") || lower.contains("v4_pro") {
"deepseek_v4_pro"
} else if lower.contains("v4-flash") || lower.contains("v4_flash") {
"deepseek_v4_flash"
} else if lower.contains("reasoner") || lower.contains("r1") {
"deepseek_v3_2_reasoner"
} else if lower.contains("chat") || lower.contains("v3") {
"deepseek_v3_2_chat"
@@ -590,6 +607,80 @@ mod tests {
assert_eq!(decide_policy(&cfg, &snap), GuardrailAction::VerifyAndReplan);
}
#[test]
fn normalize_v4_pro_variants() {
assert_eq!(
normalize_model_prior_key("deepseek-v4-pro"),
"deepseek_v4_pro"
);
assert_eq!(
normalize_model_prior_key("deepseek-v4_pro"),
"deepseek_v4_pro"
);
assert_eq!(
normalize_model_prior_key("deepseek-ai/deepseek-v4-pro"),
"deepseek_v4_pro"
);
assert_eq!(
normalize_model_prior_key("deepseek-ai/deepseek-v4_pro"),
"deepseek_v4_pro"
);
}
#[test]
fn normalize_v4_flash_variants() {
assert_eq!(
normalize_model_prior_key("deepseek-v4-flash"),
"deepseek_v4_flash"
);
assert_eq!(
normalize_model_prior_key("deepseek-v4_flash"),
"deepseek_v4_flash"
);
assert_eq!(
normalize_model_prior_key("deepseek-ai/deepseek-v4-flash"),
"deepseek_v4_flash"
);
assert_eq!(
normalize_model_prior_key("deepseek-ai/deepseek-v4_flash"),
"deepseek_v4_flash"
);
}
#[test]
fn normalize_v3_and_reasoner_unchanged() {
assert_eq!(
normalize_model_prior_key("deepseek-chat"),
"deepseek_v3_2_chat"
);
assert_eq!(
normalize_model_prior_key("deepseek-v3-chat"),
"deepseek_v3_2_chat"
);
assert_eq!(
normalize_model_prior_key("deepseek-reasoner"),
"deepseek_v3_2_reasoner"
);
assert_eq!(
normalize_model_prior_key("deepseek-r1"),
"deepseek_v3_2_reasoner"
);
assert_eq!(
normalize_model_prior_key("unknown-model"),
"fallback_default"
);
}
#[test]
fn v4_priors_loaded_into_default_config() {
let cfg = CapacityControllerConfig::default();
assert_eq!(cfg.model_priors.get("deepseek_v4_pro").copied(), Some(3.5));
assert_eq!(
cfg.model_priors.get("deepseek_v4_flash").copied(),
Some(4.2)
);
}
#[test]
fn cooldown_blocks_repeated_action() {
let mut controller = CapacityController::new(CapacityControllerConfig::default());
+2
View File
@@ -1546,6 +1546,8 @@ mod tests {
profile_window: None,
deepseek_v3_2_chat_prior: None,
deepseek_v3_2_reasoner_prior: None,
deepseek_v4_pro_prior: None,
deepseek_v4_flash_prior: None,
fallback_default_prior: None,
});
let runtime_threads: SharedRuntimeThreadManager = Arc::new(RuntimeThreadManager::open(