diff --git a/README.md b/README.md index b1705b0e..67db4441 100644 --- a/README.md +++ b/README.md @@ -85,14 +85,16 @@ Set a default in config with `reasoning_effort = "max"` (or `off` / `low` / | Model | Thinking | Context | Input cache hit | Input cache miss | Output | |---|---|---|---|---|---| -| `deepseek-v4-pro` | default | 1M | $0.145 / 1M | $1.74 / 1M | $3.48 / 1M | +| `deepseek-v4-pro` | default | 1M | $0.03625 / 1M* | $0.435 / 1M* | $0.87 / 1M* | | `deepseek-v4-flash` | default | 1M | $0.028 / 1M | $0.14 / 1M | $0.28 / 1M | | `deepseek-ai/deepseek-v4-pro` via NVIDIA NIM | default | 1M | NVIDIA account terms | NVIDIA account terms | NVIDIA account terms | | `deepseek-ai/deepseek-v4-flash` via NVIDIA NIM | default | 1M | NVIDIA account terms | NVIDIA account terms | NVIDIA account terms | Legacy `deepseek-chat` and `deepseek-reasoner` remain as silent aliases for `deepseek-v4-flash` (priced identically). Pricing is per 1M tokens as published -by DeepSeek and is subject to change. +by DeepSeek and is subject to change. *DeepSeek lists the Pro rates above as a +limited-time 75% discount valid until 2026-05-05 15:59 UTC; the TUI estimator +falls back to the base Pro rates after that timestamp. ## Usage diff --git a/crates/tui/src/pricing.rs b/crates/tui/src/pricing.rs index 3b39eabf..1ac8a55a 100644 --- a/crates/tui/src/pricing.rs +++ b/crates/tui/src/pricing.rs @@ -2,6 +2,8 @@ //! //! Pricing based on DeepSeek's published rates (per million tokens). +use chrono::{DateTime, TimeZone, Utc}; + use crate::models::Usage; /// Per-million-token pricing for a model. @@ -11,8 +13,18 @@ struct ModelPricing { output_per_million: f64, } +fn v4_pro_discount_ends_at() -> DateTime { + Utc.with_ymd_and_hms(2026, 5, 5, 15, 59, 0) + .single() + .expect("valid DeepSeek V4 Pro discount end timestamp") +} + /// Look up pricing for a model name. fn pricing_for_model(model: &str) -> Option { + pricing_for_model_at(model, Utc::now()) +} + +fn pricing_for_model_at(model: &str, now: DateTime) -> Option { let lower = model.to_lowercase(); if lower.starts_with("deepseek-ai/") { // NVIDIA NIM-hosted DeepSeek uses NVIDIA's catalog/account terms, not @@ -23,6 +35,15 @@ fn pricing_for_model(model: &str) -> Option { return None; } if lower.contains("v4-pro") || lower.contains("v4pro") { + if now <= v4_pro_discount_ends_at() { + // DeepSeek lists these as a limited-time 75% discount through + // 2026-05-05 15:59 UTC. + return Some(ModelPricing { + input_cache_hit_per_million: 0.03625, + input_cache_miss_per_million: 0.435, + output_per_million: 0.87, + }); + } Some(ModelPricing { input_cache_hit_per_million: 0.145, input_cache_miss_per_million: 1.74, @@ -44,15 +65,31 @@ fn pricing_for_model(model: &str) -> Option { #[allow(dead_code)] pub fn calculate_turn_cost(model: &str, input_tokens: u32, output_tokens: u32) -> Option { let pricing = pricing_for_model(model)?; + Some(calculate_turn_cost_with_pricing( + pricing, + input_tokens, + output_tokens, + )) +} + +fn calculate_turn_cost_with_pricing( + pricing: ModelPricing, + input_tokens: u32, + output_tokens: u32, +) -> f64 { let input_cost = (input_tokens as f64 / 1_000_000.0) * pricing.input_cache_miss_per_million; let output_cost = (output_tokens as f64 / 1_000_000.0) * pricing.output_per_million; - Some(input_cost + output_cost) + input_cost + output_cost } /// Calculate cost from provider usage, honoring DeepSeek context-cache fields. #[must_use] pub fn calculate_turn_cost_from_usage(model: &str, usage: &Usage) -> Option { let pricing = pricing_for_model(model)?; + Some(calculate_turn_cost_from_usage_with_pricing(pricing, usage)) +} + +fn calculate_turn_cost_from_usage_with_pricing(pricing: ModelPricing, usage: &Usage) -> f64 { let hit_tokens = usage.prompt_cache_hit_tokens.unwrap_or(0); let miss_tokens = usage .prompt_cache_miss_tokens @@ -64,7 +101,7 @@ pub fn calculate_turn_cost_from_usage(model: &str, usage: &Usage) -> Option let miss_cost = ((miss_tokens.saturating_add(uncategorized_input)) as f64 / 1_000_000.0) * pricing.input_cache_miss_per_million; let output_cost = (usage.output_tokens as f64 / 1_000_000.0) * pricing.output_per_million; - Some(hit_cost + miss_cost + output_cost) + hit_cost + miss_cost + output_cost } /// Format a USD cost for compact display. @@ -90,4 +127,37 @@ mod tests { fn nvidia_nim_deepseek_model_does_not_use_deepseek_platform_pricing() { assert!(calculate_turn_cost("deepseek-ai/deepseek-v4-pro", 1_000, 1_000).is_none()); } + + #[test] + fn v4_pro_uses_limited_time_discount_before_expiry() { + let before_expiry = Utc + .with_ymd_and_hms(2026, 5, 5, 15, 58, 59) + .single() + .unwrap(); + let pricing = pricing_for_model_at("deepseek-v4-pro", before_expiry).unwrap(); + + assert_eq!(pricing.input_cache_hit_per_million, 0.03625); + assert_eq!(pricing.input_cache_miss_per_million, 0.435); + assert_eq!(pricing.output_per_million, 0.87); + } + + #[test] + fn v4_pro_returns_to_base_rates_after_discount_expiry() { + let after_expiry = Utc.with_ymd_and_hms(2026, 5, 5, 16, 0, 0).single().unwrap(); + let pricing = pricing_for_model_at("deepseek-v4-pro", after_expiry).unwrap(); + + assert_eq!(pricing.input_cache_hit_per_million, 0.145); + assert_eq!(pricing.input_cache_miss_per_million, 1.74); + assert_eq!(pricing.output_per_million, 3.48); + } + + #[test] + fn v4_flash_keeps_current_published_rates() { + let now = Utc.with_ymd_and_hms(2026, 4, 25, 0, 0, 0).single().unwrap(); + let pricing = pricing_for_model_at("deepseek-v4-flash", now).unwrap(); + + assert_eq!(pricing.input_cache_hit_per_million, 0.028); + assert_eq!(pricing.input_cache_miss_per_million, 0.14); + assert_eq!(pricing.output_per_million, 0.28); + } }