merge: V4 capacity priors (fixes #73)
This commit is contained in:
@@ -140,6 +140,8 @@ pub struct CapacityConfig {
|
||||
pub profile_window: Option<usize>,
|
||||
pub deepseek_v3_2_chat_prior: Option<f64>,
|
||||
pub deepseek_v3_2_reasoner_prior: Option<f64>,
|
||||
pub deepseek_v4_pro_prior: Option<f64>,
|
||||
pub deepseek_v4_flash_prior: Option<f64>,
|
||||
pub fallback_default_prior: Option<f64>,
|
||||
}
|
||||
|
||||
@@ -811,6 +813,8 @@ fn apply_env_overrides(config: &mut Config) {
|
||||
profile_window: None,
|
||||
deepseek_v3_2_chat_prior: None,
|
||||
deepseek_v3_2_reasoner_prior: None,
|
||||
deepseek_v4_pro_prior: None,
|
||||
deepseek_v4_flash_prior: None,
|
||||
fallback_default_prior: None,
|
||||
});
|
||||
|
||||
@@ -873,6 +877,16 @@ fn apply_env_overrides(config: &mut Config) {
|
||||
{
|
||||
capacity.deepseek_v3_2_reasoner_prior = Some(parsed);
|
||||
}
|
||||
if let Ok(value) = std::env::var("DEEPSEEK_CAPACITY_PRIOR_V4_PRO")
|
||||
&& let Ok(parsed) = value.parse::<f64>()
|
||||
{
|
||||
capacity.deepseek_v4_pro_prior = Some(parsed);
|
||||
}
|
||||
if let Ok(value) = std::env::var("DEEPSEEK_CAPACITY_PRIOR_V4_FLASH")
|
||||
&& let Ok(parsed) = value.parse::<f64>()
|
||||
{
|
||||
capacity.deepseek_v4_flash_prior = Some(parsed);
|
||||
}
|
||||
if let Ok(value) = std::env::var("DEEPSEEK_CAPACITY_PRIOR_FALLBACK")
|
||||
&& let Ok(parsed) = value.parse::<f64>()
|
||||
{
|
||||
@@ -892,6 +906,8 @@ fn apply_env_overrides(config: &mut Config) {
|
||||
&& c.profile_window.is_none()
|
||||
&& c.deepseek_v3_2_chat_prior.is_none()
|
||||
&& c.deepseek_v3_2_reasoner_prior.is_none()
|
||||
&& c.deepseek_v4_pro_prior.is_none()
|
||||
&& c.deepseek_v4_flash_prior.is_none()
|
||||
&& c.fallback_default_prior.is_none()
|
||||
}) {
|
||||
config.capacity = None;
|
||||
|
||||
@@ -24,6 +24,8 @@ impl Default for CapacityControllerConfig {
|
||||
let mut model_priors = HashMap::new();
|
||||
model_priors.insert("deepseek_v3_2_chat".to_string(), 3.9);
|
||||
model_priors.insert("deepseek_v3_2_reasoner".to_string(), 4.1);
|
||||
model_priors.insert("deepseek_v4_pro".to_string(), 3.5);
|
||||
model_priors.insert("deepseek_v4_flash".to_string(), 4.2);
|
||||
|
||||
Self {
|
||||
enabled: true,
|
||||
@@ -89,6 +91,12 @@ impl CapacityControllerConfig {
|
||||
out.model_priors
|
||||
.insert("deepseek_v3_2_reasoner".to_string(), v);
|
||||
}
|
||||
if let Some(v) = capacity.deepseek_v4_pro_prior {
|
||||
out.model_priors.insert("deepseek_v4_pro".to_string(), v);
|
||||
}
|
||||
if let Some(v) = capacity.deepseek_v4_flash_prior {
|
||||
out.model_priors.insert("deepseek_v4_flash".to_string(), v);
|
||||
}
|
||||
if let Some(v) = capacity.fallback_default_prior {
|
||||
out.fallback_default = v;
|
||||
}
|
||||
@@ -460,8 +468,17 @@ pub fn decide_policy(
|
||||
}
|
||||
|
||||
fn normalize_model_prior_key(model: &str) -> &str {
|
||||
// Strip optional "deepseek-ai/" NIM namespace prefix before pattern matching.
|
||||
let model = model.strip_prefix("deepseek-ai/").unwrap_or(model);
|
||||
let lower = model.to_ascii_lowercase();
|
||||
if lower.contains("reasoner") || lower.contains("r1") {
|
||||
// V4 variants must be checked before the generic V3/chat/reasoner branches
|
||||
// because those branches do not contain "v4" tokens and the ordering prevents
|
||||
// accidental cross-matches.
|
||||
if lower.contains("v4-pro") || lower.contains("v4_pro") {
|
||||
"deepseek_v4_pro"
|
||||
} else if lower.contains("v4-flash") || lower.contains("v4_flash") {
|
||||
"deepseek_v4_flash"
|
||||
} else if lower.contains("reasoner") || lower.contains("r1") {
|
||||
"deepseek_v3_2_reasoner"
|
||||
} else if lower.contains("chat") || lower.contains("v3") {
|
||||
"deepseek_v3_2_chat"
|
||||
@@ -590,6 +607,80 @@ mod tests {
|
||||
assert_eq!(decide_policy(&cfg, &snap), GuardrailAction::VerifyAndReplan);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_v4_pro_variants() {
|
||||
assert_eq!(
|
||||
normalize_model_prior_key("deepseek-v4-pro"),
|
||||
"deepseek_v4_pro"
|
||||
);
|
||||
assert_eq!(
|
||||
normalize_model_prior_key("deepseek-v4_pro"),
|
||||
"deepseek_v4_pro"
|
||||
);
|
||||
assert_eq!(
|
||||
normalize_model_prior_key("deepseek-ai/deepseek-v4-pro"),
|
||||
"deepseek_v4_pro"
|
||||
);
|
||||
assert_eq!(
|
||||
normalize_model_prior_key("deepseek-ai/deepseek-v4_pro"),
|
||||
"deepseek_v4_pro"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_v4_flash_variants() {
|
||||
assert_eq!(
|
||||
normalize_model_prior_key("deepseek-v4-flash"),
|
||||
"deepseek_v4_flash"
|
||||
);
|
||||
assert_eq!(
|
||||
normalize_model_prior_key("deepseek-v4_flash"),
|
||||
"deepseek_v4_flash"
|
||||
);
|
||||
assert_eq!(
|
||||
normalize_model_prior_key("deepseek-ai/deepseek-v4-flash"),
|
||||
"deepseek_v4_flash"
|
||||
);
|
||||
assert_eq!(
|
||||
normalize_model_prior_key("deepseek-ai/deepseek-v4_flash"),
|
||||
"deepseek_v4_flash"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_v3_and_reasoner_unchanged() {
|
||||
assert_eq!(
|
||||
normalize_model_prior_key("deepseek-chat"),
|
||||
"deepseek_v3_2_chat"
|
||||
);
|
||||
assert_eq!(
|
||||
normalize_model_prior_key("deepseek-v3-chat"),
|
||||
"deepseek_v3_2_chat"
|
||||
);
|
||||
assert_eq!(
|
||||
normalize_model_prior_key("deepseek-reasoner"),
|
||||
"deepseek_v3_2_reasoner"
|
||||
);
|
||||
assert_eq!(
|
||||
normalize_model_prior_key("deepseek-r1"),
|
||||
"deepseek_v3_2_reasoner"
|
||||
);
|
||||
assert_eq!(
|
||||
normalize_model_prior_key("unknown-model"),
|
||||
"fallback_default"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn v4_priors_loaded_into_default_config() {
|
||||
let cfg = CapacityControllerConfig::default();
|
||||
assert_eq!(cfg.model_priors.get("deepseek_v4_pro").copied(), Some(3.5));
|
||||
assert_eq!(
|
||||
cfg.model_priors.get("deepseek_v4_flash").copied(),
|
||||
Some(4.2)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cooldown_blocks_repeated_action() {
|
||||
let mut controller = CapacityController::new(CapacityControllerConfig::default());
|
||||
|
||||
@@ -1546,6 +1546,8 @@ mod tests {
|
||||
profile_window: None,
|
||||
deepseek_v3_2_chat_prior: None,
|
||||
deepseek_v3_2_reasoner_prior: None,
|
||||
deepseek_v4_pro_prior: None,
|
||||
deepseek_v4_flash_prior: None,
|
||||
fallback_default_prior: None,
|
||||
});
|
||||
let runtime_threads: SharedRuntimeThreadManager = Arc::new(RuntimeThreadManager::open(
|
||||
|
||||
Reference in New Issue
Block a user