From 754cff1c84b1ed65dccb231920913f1d477875dc Mon Sep 17 00:00:00 2001 From: Hunter B Date: Fri, 12 Jun 2026 23:56:01 -0700 Subject: [PATCH] Refresh GLM and MiniMax provider models --- config.example.toml | 23 ++++++++- crates/agent/src/lib.rs | 9 ++++ crates/config/src/lib.rs | 51 ++++++++++++++++++ crates/tui/src/config.rs | 83 +++++++++++++++++++++++++++++- crates/tui/src/models.rs | 13 +++++ crates/tui/src/tui/model_picker.rs | 3 ++ docs/PROVIDERS.md | 15 ++++-- 7 files changed, 191 insertions(+), 6 deletions(-) diff --git a/config.example.toml b/config.example.toml index 03e56bdf..1f176b77 100644 --- a/config.example.toml +++ b/config.example.toml @@ -41,6 +41,10 @@ base_url = "https://api.deepseek.com/beta" # arcee-ai/trinity-large-thinking — OpenRouter Arcee Trinity Large Thinking # xiaomi/mimo-v2.5-pro — OpenRouter Xiaomi MiMo 2.5 Pro # xiaomi/mimo-v2.5 — OpenRouter Xiaomi MiMo 2.5 +# z-ai/glm-5.1 — OpenRouter Z.AI GLM 5.1 +# z-ai/glm-5.2 — OpenRouter Z.AI GLM 5.2 (opt-in preview) +# GLM-5.1 — default direct Z.AI Coding Plan model +# GLM-5.2 — direct Z.AI opt-in preview model # kimi-k2.6 — default Moonshot/Kimi model ID # gpt-4.1 — default generic OpenAI-compatible model ID # deepseek-ai/deepseek-v4-flash — default AtlasCloud model ID @@ -343,7 +347,8 @@ max_subagents = 10 # optional (1-20) # Recent large model IDs also accepted here include arcee-ai/trinity-large-thinking, # minimax/minimax-m3, minimax/minimax-2.7, xiaomi/mimo-v2.5-pro, qwen/qwen3.6-flash, # qwen/qwen3.6-35b-a3b, qwen/qwen3.6-max-preview, qwen/qwen3.6-27b, qwen/qwen3.6-plus, -# qwen/qwen3.7-max, google/gemma-4-31b-it, z-ai/glm-5.1, moonshotai/kimi-k2.6, +# qwen/qwen3.7-max, google/gemma-4-31b-it, z-ai/glm-5.1, z-ai/glm-5.2, +# moonshotai/kimi-k2.6, # nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free, and nvidia/nemotron-3-ultra. # Xiaomi MiMo OpenAI-compatible endpoint (https://platform.xiaomimimo.com) @@ -402,6 +407,22 @@ max_subagents = 10 # optional (1-20) # # model = "kimi-for-coding" # # auth_mode = "kimi_oauth" # reads Kimi Code OAuth credentials +# Z.AI GLM Coding Plan endpoint (https://docs.z.ai) +[providers.zai] +# api_key = "YOUR_ZAI_API_KEY" # or Z_AI_API_KEY +# base_url = "https://api.z.ai/api/coding/paas/v4" +# # General API endpoint, if you are not using the Coding Plan: +# # base_url = "https://api.z.ai/api/paas/v4" +# model = "GLM-5.1" # default; GLM-5.2 is opt-in preview + +# MiniMax direct OpenAI-compatible endpoint (https://platform.minimax.io) +[providers.minimax] +# api_key = "YOUR_MINIMAX_API_KEY" +# base_url = "https://api.minimax.io/v1" +# model = "MiniMax-M3" # or MiniMax-M2.7, MiniMax-M2.7-highspeed +# # MiniMax also publishes Anthropic-compatible endpoints: +# # global https://api.minimax.io/anthropic, China https://api.minimaxi.com/anthropic. + # Self-hosted SGLang OpenAI-compatible server [providers.sglang] # api_key = "OPTIONAL_SGLANG_TOKEN" diff --git a/crates/agent/src/lib.rs b/crates/agent/src/lib.rs index 54621d2e..d79371b2 100644 --- a/crates/agent/src/lib.rs +++ b/crates/agent/src/lib.rs @@ -321,6 +321,13 @@ impl Default for ModelRegistry { supports_tools: true, supports_reasoning: true, }, + ModelInfo { + id: "z-ai/glm-5.2".to_string(), + provider: ProviderKind::Openrouter, + aliases: vec!["glm-5.2".to_string(), "zai-glm-5.2".to_string()], + supports_tools: true, + supports_reasoning: true, + }, ModelInfo { id: "tencent/hy3-preview".to_string(), provider: ProviderKind::Openrouter, @@ -1263,7 +1270,9 @@ mod tests { ("qwen3.6-plus", "qwen/qwen3.6-plus"), ("gemma-4-31b-it", "google/gemma-4-31b-it"), ("glm-5.1", "z-ai/glm-5.1"), + ("glm-5.2", "z-ai/glm-5.2"), ("minimax-m3", "minimax/minimax-m3"), + ("minimax-2.7", "minimax/minimax-2.7"), ("openrouter-mimo-v2.5-pro", "xiaomi/mimo-v2.5-pro"), ("openrouter-kimi-k2.7-code", "moonshotai/kimi-k2.7-code"), ("openrouter-kimi-k2.6", "moonshotai/kimi-k2.6"), diff --git a/crates/config/src/lib.rs b/crates/config/src/lib.rs index 19ad3698..b982bead 100644 --- a/crates/config/src/lib.rs +++ b/crates/config/src/lib.rs @@ -43,8 +43,11 @@ const OPENROUTER_ARCEE_TRINITY_LARGE_THINKING_MODEL: &str = "arcee-ai/trinity-la const OPENROUTER_GEMMA_4_31B_MODEL: &str = "google/gemma-4-31b-it"; const OPENROUTER_GEMMA_4_26B_A4B_MODEL: &str = "google/gemma-4-26b-a4b-it"; const OPENROUTER_GLM_5_1_MODEL: &str = "z-ai/glm-5.1"; +const OPENROUTER_GLM_5_2_MODEL: &str = "z-ai/glm-5.2"; const OPENROUTER_KIMI_K2_7_CODE_MODEL: &str = "moonshotai/kimi-k2.7-code"; const OPENROUTER_KIMI_K2_6_MODEL: &str = "moonshotai/kimi-k2.6"; +const OPENROUTER_MINIMAX_M3_MODEL: &str = "minimax/minimax-m3"; +const OPENROUTER_MINIMAX_2_7_MODEL: &str = "minimax/minimax-2.7"; const OPENROUTER_NEMOTRON_3_NANO_OMNI_MODEL: &str = "nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free"; const OPENROUTER_QWEN_3_6_FLASH_MODEL: &str = "qwen/qwen3.6-flash"; @@ -104,6 +107,7 @@ const DEFAULT_OLLAMA_BASE_URL: &str = "http://localhost:11434/v1"; // Z.ai (GLM Coding Plan) defaults const DEFAULT_ZAI_MODEL: &str = "GLM-5.1"; +const ZAI_GLM_5_2_MODEL: &str = "GLM-5.2"; const DEFAULT_ZAI_BASE_URL: &str = "https://api.z.ai/api/coding/paas/v4"; // StepFun / StepFlash defaults const DEFAULT_STEPFUN_MODEL: &str = "step-3.7-flash"; @@ -2394,6 +2398,11 @@ fn normalize_model_for_provider(provider: ProviderKind, model: &str) -> String { { return canonical.to_string(); } + if matches!(provider, ProviderKind::Zai) + && let Some(canonical) = canonical_zai_model_id(model) + { + return canonical.to_string(); + } if matches!( provider, @@ -2590,6 +2599,16 @@ fn canonical_minimax_model_id(model: &str) -> Option<&'static str> { } } +fn canonical_zai_model_id(model: &str) -> Option<&'static str> { + let normalized = model.trim().to_ascii_lowercase(); + let normalized = normalized.replace(['_', ' '], "-"); + match normalized.as_str() { + "glm-5.1" | "glm-5-1" | "zai-glm-5.1" | "zai-glm-5-1" => Some(DEFAULT_ZAI_MODEL), + "glm-5.2" | "glm-5-2" | "zai-glm-5.2" | "zai-glm-5-2" => Some(ZAI_GLM_5_2_MODEL), + _ => None, + } +} + fn canonical_openrouter_recent_model_id(model: &str) -> Option<&'static str> { let normalized = model.trim().to_ascii_lowercase(); let normalized = normalized.replace(['_', ' '], "-"); @@ -2608,6 +2627,9 @@ fn canonical_openrouter_recent_model_id(model: &str) -> Option<&'static str> { OPENROUTER_GLM_5_1_MODEL | "glm-5.1" | "glm-5-1" | "zai-glm-5.1" | "zai-glm-5-1" => { Some(OPENROUTER_GLM_5_1_MODEL) } + OPENROUTER_GLM_5_2_MODEL | "glm-5.2" | "glm-5-2" | "zai-glm-5.2" | "zai-glm-5-2" => { + Some(OPENROUTER_GLM_5_2_MODEL) + } OPENROUTER_KIMI_K2_7_CODE_MODEL | "kimi" | "kimi-k2" @@ -2621,6 +2643,16 @@ fn canonical_openrouter_recent_model_id(model: &str) -> Option<&'static str> { OPENROUTER_KIMI_K2_6_MODEL | "kimi-k2.6" | "kimi-k2-6" | "moonshot-kimi-k2.6" => { Some(OPENROUTER_KIMI_K2_6_MODEL) } + OPENROUTER_MINIMAX_M3_MODEL | "minimax-m3" | "minimax-m-3" => { + Some(OPENROUTER_MINIMAX_M3_MODEL) + } + OPENROUTER_MINIMAX_2_7_MODEL + | "minimax-2.7" + | "minimax-2-7" + | "minimax-m2.7" + | "minimax-m2-7" + | "minimax-m-2.7" + | "minimax-m-2-7" => Some(OPENROUTER_MINIMAX_2_7_MODEL), OPENROUTER_NEMOTRON_3_NANO_OMNI_MODEL | "nemotron-3-nano-omni" | "nemotron-3-nano-omni-reasoning" => Some(OPENROUTER_NEMOTRON_3_NANO_OMNI_MODEL), @@ -5753,6 +5785,22 @@ mode = "token-plan-usa" ); } + #[test] + fn zai_aliases_resolve_to_canonical_models() { + assert_eq!( + normalize_model_for_provider(ProviderKind::Zai, "glm-5.1"), + DEFAULT_ZAI_MODEL + ); + assert_eq!( + normalize_model_for_provider(ProviderKind::Zai, "glm-5-2"), + ZAI_GLM_5_2_MODEL + ); + assert_eq!( + normalize_model_for_provider(ProviderKind::Zai, "custom-glm-preview"), + "custom-glm-preview" + ); + } + #[test] fn novita_provider_defaults_to_canonical_endpoint_and_model() { let _lock = env_lock(); @@ -6738,8 +6786,11 @@ mode = "token-plan-usa" ("kimi-k2.7-code", OPENROUTER_KIMI_K2_7_CODE_MODEL), ("kimi", OPENROUTER_KIMI_K2_7_CODE_MODEL), ("kimi-k2.6", OPENROUTER_KIMI_K2_6_MODEL), + ("minimax-m3", OPENROUTER_MINIMAX_M3_MODEL), + ("minimax-2.7", OPENROUTER_MINIMAX_2_7_MODEL), ("gemma-4-31b-it", OPENROUTER_GEMMA_4_31B_MODEL), ("glm-5.1", OPENROUTER_GLM_5_1_MODEL), + ("glm-5.2", OPENROUTER_GLM_5_2_MODEL), ] { let cli = CliRuntimeOverrides { provider: Some(ProviderKind::Openrouter), diff --git a/crates/tui/src/config.rs b/crates/tui/src/config.rs index 3b60c00c..a2f6defd 100644 --- a/crates/tui/src/config.rs +++ b/crates/tui/src/config.rs @@ -71,6 +71,7 @@ pub const OPENROUTER_ARCEE_TRINITY_LARGE_THINKING_MODEL: &str = "arcee-ai/trinit pub const OPENROUTER_GEMMA_4_31B_MODEL: &str = "google/gemma-4-31b-it"; pub const OPENROUTER_GEMMA_4_26B_A4B_MODEL: &str = "google/gemma-4-26b-a4b-it"; pub const OPENROUTER_GLM_5_1_MODEL: &str = "z-ai/glm-5.1"; +pub const OPENROUTER_GLM_5_2_MODEL: &str = "z-ai/glm-5.2"; pub const OPENROUTER_KIMI_K2_7_CODE_MODEL: &str = "moonshotai/kimi-k2.7-code"; pub const OPENROUTER_KIMI_K2_6_MODEL: &str = "moonshotai/kimi-k2.6"; pub const OPENROUTER_MINIMAX_M3_MODEL: &str = "minimax/minimax-m3"; @@ -103,6 +104,7 @@ pub const RECENT_OPENROUTER_LARGE_MODELS: &[&str] = &[ OPENROUTER_KIMI_K2_7_CODE_MODEL, OPENROUTER_KIMI_K2_6_MODEL, OPENROUTER_GLM_5_1_MODEL, + OPENROUTER_GLM_5_2_MODEL, OPENROUTER_TENCENT_HY3_PREVIEW_MODEL, OPENROUTER_GEMMA_4_31B_MODEL, OPENROUTER_GEMMA_4_26B_A4B_MODEL, @@ -172,6 +174,7 @@ pub const COMMON_DEEPSEEK_MODELS: &[&str] = &[ ]; pub const OFFICIAL_DEEPSEEK_MODELS: &[&str] = &["deepseek-v4-pro", "deepseek-v4-flash"]; pub const DEFAULT_ZAI_MODEL: &str = "GLM-5.1"; +pub const ZAI_GLM_5_2_MODEL: &str = "GLM-5.2"; pub const DEFAULT_ZAI_BASE_URL: &str = "https://api.z.ai/api/coding/paas/v4"; pub const DEFAULT_STEPFUN_MODEL: &str = "step-3.7-flash"; pub const DEFAULT_STEPFUN_BASE_URL: &str = "https://api.stepfun.ai/v1"; @@ -657,6 +660,9 @@ fn canonical_openrouter_recent_model_id(model: &str) -> Option<&'static str> { OPENROUTER_GLM_5_1_MODEL | "glm-5.1" | "glm-5-1" | "zai-glm-5.1" | "zai-glm-5-1" => { Some(OPENROUTER_GLM_5_1_MODEL) } + OPENROUTER_GLM_5_2_MODEL | "glm-5.2" | "glm-5-2" | "zai-glm-5.2" | "zai-glm-5-2" => { + Some(OPENROUTER_GLM_5_2_MODEL) + } OPENROUTER_KIMI_K2_7_CODE_MODEL | "kimi" | "kimi-k2" @@ -673,6 +679,13 @@ fn canonical_openrouter_recent_model_id(model: &str) -> Option<&'static str> { OPENROUTER_MINIMAX_M3_MODEL | "minimax-m3" | "minimax-m-3" => { Some(OPENROUTER_MINIMAX_M3_MODEL) } + OPENROUTER_MINIMAX_2_7_MODEL + | "minimax-2.7" + | "minimax-2-7" + | "minimax-m2.7" + | "minimax-m2-7" + | "minimax-m-2.7" + | "minimax-m-2-7" => Some(OPENROUTER_MINIMAX_2_7_MODEL), OPENROUTER_NEMOTRON_3_NANO_OMNI_MODEL | "nemotron-3-nano-omni" | "nemotron-3-nano-omni-reasoning" => Some(OPENROUTER_NEMOTRON_3_NANO_OMNI_MODEL), @@ -797,6 +810,16 @@ fn canonical_moonshot_model_id(model: &str) -> Option<&'static str> { } } +fn canonical_zai_model_id(model: &str) -> Option<&'static str> { + let normalized = model.trim().to_ascii_lowercase(); + let normalized = normalized.replace(['_', ' '], "-"); + match normalized.as_str() { + "glm-5.1" | "glm-5-1" | "zai-glm-5.1" | "zai-glm-5-1" => Some(DEFAULT_ZAI_MODEL), + "glm-5.2" | "glm-5-2" | "zai-glm-5.2" | "zai-glm-5-2" => Some(ZAI_GLM_5_2_MODEL), + _ => None, + } +} + fn canonical_minimax_model_id(model: &str) -> Option<&'static str> { let normalized = model.trim().to_ascii_lowercase(); let normalized = normalized.replace(['_', ' '], "-"); @@ -866,6 +889,12 @@ pub fn normalize_model_name_for_provider(provider: ApiProvider, model: &str) -> .or_else(|| normalize_custom_model_id(model)); } + if matches!(provider, ApiProvider::Zai) { + return canonical_zai_model_id(model) + .map(ToString::to_string) + .or_else(|| normalize_custom_model_id(model)); + } + if matches!(provider, ApiProvider::Minimax) { return canonical_minimax_model_id(model) .map(ToString::to_string) @@ -950,7 +979,7 @@ pub fn model_completion_names_for_provider(provider: ApiProvider) -> Vec<&'stati ApiProvider::Openai | ApiProvider::Atlascloud => OFFICIAL_DEEPSEEK_MODELS.to_vec(), ApiProvider::Together => vec![DEFAULT_TOGETHER_MODEL], ApiProvider::OpenaiCodex => vec![DEFAULT_OPENAI_CODEX_MODEL], - ApiProvider::Zai => vec![DEFAULT_ZAI_MODEL], + ApiProvider::Zai => vec![DEFAULT_ZAI_MODEL, ZAI_GLM_5_2_MODEL], ApiProvider::Stepfun => vec![DEFAULT_STEPFUN_MODEL], ApiProvider::Anthropic => vec![ ANTHROPIC_OPUS_MODEL, @@ -8427,8 +8456,10 @@ api_key = "old-openrouter-key" ("kimi", OPENROUTER_KIMI_K2_7_CODE_MODEL), ("kimi-k2.6", OPENROUTER_KIMI_K2_6_MODEL), ("minimax-m3", OPENROUTER_MINIMAX_M3_MODEL), + ("minimax-2.7", OPENROUTER_MINIMAX_2_7_MODEL), ("gemma-4-31b-it", OPENROUTER_GEMMA_4_31B_MODEL), ("glm-5.1", OPENROUTER_GLM_5_1_MODEL), + ("glm-5.2", OPENROUTER_GLM_5_2_MODEL), ] { assert_eq!( normalize_model_name_for_provider(ApiProvider::Openrouter, alias).as_deref(), @@ -8565,11 +8596,14 @@ api_key = "old-openrouter-key" OPENROUTER_ARCEE_TRINITY_LARGE_THINKING_MODEL, OPENROUTER_XIAOMI_MIMO_V2_5_PRO_MODEL, OPENROUTER_MINIMAX_M3_MODEL, + OPENROUTER_MINIMAX_2_7_MODEL, OPENROUTER_QWEN_3_6_FLASH_MODEL, OPENROUTER_QWEN_3_6_35B_A3B_MODEL, OPENROUTER_QWEN_3_6_MAX_PREVIEW_MODEL, OPENROUTER_QWEN_3_6_27B_MODEL, OPENROUTER_QWEN_3_6_PLUS_MODEL, + OPENROUTER_GLM_5_1_MODEL, + OPENROUTER_GLM_5_2_MODEL, OPENROUTER_GEMMA_4_31B_MODEL, ] { assert!(models.contains(&expected), "missing {expected}"); @@ -8584,6 +8618,33 @@ api_key = "old-openrouter-key" ); } + #[test] + fn model_completion_names_for_zai_keep_5_1_default_and_include_5_2() { + let models = model_completion_names_for_provider(ApiProvider::Zai); + + assert_eq!(models.first().copied(), Some(DEFAULT_ZAI_MODEL)); + assert!(models.contains(&ZAI_GLM_5_2_MODEL)); + } + + #[test] + fn normalize_model_name_for_zai_canonicalizes_current_glm_models() { + for (alias, expected) in [ + ("glm-5.1", DEFAULT_ZAI_MODEL), + ("glm-5-1", DEFAULT_ZAI_MODEL), + ("glm-5.2", ZAI_GLM_5_2_MODEL), + ("zai-glm-5-2", ZAI_GLM_5_2_MODEL), + ] { + assert_eq!( + normalize_model_name_for_provider(ApiProvider::Zai, alias).as_deref(), + Some(expected) + ); + } + assert_eq!( + normalize_model_name_for_provider(ApiProvider::Zai, "glm-next-preview").as_deref(), + Some("glm-next-preview") + ); + } + #[test] fn model_completion_names_for_minimax_include_direct_chat_models() { let models = model_completion_names_for_provider(ApiProvider::Minimax); @@ -11360,6 +11421,9 @@ model = "deepseek-ai/deepseek-v4-pro" (OPENROUTER_QWEN_3_6_PLUS_MODEL, 1_000_000, 65_536), (OPENROUTER_XIAOMI_MIMO_V2_5_PRO_MODEL, 1_000_000, 131_072), (OPENROUTER_MINIMAX_M3_MODEL, 1_000_000, 524_288), + (OPENROUTER_MINIMAX_2_7_MODEL, 204_800, 4096), + (OPENROUTER_GLM_5_1_MODEL, 202_752, 131_072), + (OPENROUTER_GLM_5_2_MODEL, 1_000_000, 131_072), (OPENROUTER_NEMOTRON_3_ULTRA_MODEL, 1_000_000, 16_384), ] { let cap = provider_capability(ApiProvider::Openrouter, model); @@ -11539,6 +11603,23 @@ model = "deepseek-ai/deepseek-v4-pro" ); } + #[test] + fn provider_capability_zai_keeps_5_1_default_and_tracks_5_2_window() { + let default = provider_capability(ApiProvider::Zai, DEFAULT_ZAI_MODEL); + assert_eq!(default.resolved_model, DEFAULT_ZAI_MODEL); + assert_eq!(default.context_window, 202_752); + assert_eq!(default.max_output, 131_072); + assert!(default.thinking_supported); + assert!(!default.cache_telemetry_supported); + + let preview = provider_capability(ApiProvider::Zai, ZAI_GLM_5_2_MODEL); + assert_eq!(preview.resolved_model, ZAI_GLM_5_2_MODEL); + assert_eq!(preview.context_window, 1_000_000); + assert_eq!(preview.max_output, 131_072); + assert!(preview.thinking_supported); + assert!(!preview.cache_telemetry_supported); + } + #[test] fn provider_capability_minimax_direct_models_use_api_docs_shape() { let m3 = provider_capability(ApiProvider::Minimax, DEFAULT_MINIMAX_MODEL); diff --git a/crates/tui/src/models.rs b/crates/tui/src/models.rs index 9d90156a..fe7e60b8 100644 --- a/crates/tui/src/models.rs +++ b/crates/tui/src/models.rs @@ -287,6 +287,7 @@ fn known_context_window_for_model(model_lower: &str) -> Option { | "kimi-k2.6" | "kimi-for-coding" => Some(262_144), "minimax-m2.7" + | "minimax/minimax-2.7" | "minimax-m2.7-highspeed" | "minimax-m2.5" | "minimax-m2.5-highspeed" @@ -294,6 +295,7 @@ fn known_context_window_for_model(model_lower: &str) -> Option { | "minimax-m2.1-highspeed" | "minimax-m2" => Some(204_800), "z-ai/glm-5.1" | "z-ai/glm-5v-turbo" | "glm-5.1" | "glm-5v-turbo" => Some(202_752), + "z-ai/glm-5.2" | "glm-5.2" => Some(1_000_000), "minimax/minimax-m3" | "minimax-m3" | "qwen/qwen3.6-flash" | "qwen/qwen3.6-plus" => { Some(1_000_000) } @@ -333,6 +335,7 @@ pub fn max_output_tokens_for_model(model: &str) -> Option { "minimax/minimax-m3" | "minimax-m3" => Some(524_288), "qwen/qwen3.6-35b-a3b" | "qwen/qwen3.6-27b" => Some(262_140), "qwen/qwen3.6-flash" | "qwen/qwen3.6-max-preview" | "qwen/qwen3.6-plus" => Some(65_536), + "z-ai/glm-5.1" | "z-ai/glm-5.2" | "glm-5.1" | "glm-5.2" => Some(131_072), "xiaomi/mimo-v2.5-pro" | "xiaomi/mimo-v2.5" | "mimo-v2.5-pro" | "mimo-v2.5" => { Some(131_072) } @@ -385,6 +388,7 @@ pub fn model_supports_reasoning(model: &str) -> bool { | "kimi-k2.6" | "kimi-for-coding" | "minimax/minimax-m3" + | "minimax/minimax-2.7" | "minimax-m3" | "minimax-m2.7" | "minimax-m2.7-highspeed" @@ -407,7 +411,9 @@ pub fn model_supports_reasoning(model: &str) -> bool { | "mimo-v2.5-pro" | "mimo-v2.5" | "z-ai/glm-5.1" + | "z-ai/glm-5.2" | "glm-5.1" + | "glm-5.2" ) } @@ -613,10 +619,12 @@ mod tests { ("mimo-v2.5-pro", 1_000_000), ("mimo-v2.5", 1_000_000), ("minimax/minimax-m3", 1_000_000), + ("minimax/minimax-2.7", 204_800), ("moonshotai/kimi-k2.7-code", 262_144), ("moonshotai/kimi-k2.6", 262_144), ("google/gemma-4-31b-it", 262_144), ("z-ai/glm-5.1", 202_752), + ("z-ai/glm-5.2", 1_000_000), ] { assert_eq!(context_window_for_model(model), Some(expected_window)); assert!(model_supports_reasoning(model)); @@ -695,6 +703,8 @@ mod tests { max_output_tokens_for_model("minimax/minimax-m3"), Some(524_288) ); + assert_eq!(max_output_tokens_for_model("z-ai/glm-5.1"), Some(131_072)); + assert_eq!(max_output_tokens_for_model("z-ai/glm-5.2"), Some(131_072)); } #[test] @@ -710,6 +720,7 @@ mod tests { ("minimax-m2.5-highspeed", 204_800), ("minimax-m2", 204_800), ("glm-5.1", 202_752), + ("glm-5.2", 1_000_000), ] { assert_eq!(context_window_for_model(model), Some(expected_window)); assert!(model_supports_reasoning(model)); @@ -725,6 +736,8 @@ mod tests { Some(262_144) ); assert_eq!(max_output_tokens_for_model("minimax-m3"), Some(524_288)); + assert_eq!(max_output_tokens_for_model("glm-5.1"), Some(131_072)); + assert_eq!(max_output_tokens_for_model("glm-5.2"), Some(131_072)); } #[test] diff --git a/crates/tui/src/tui/model_picker.rs b/crates/tui/src/tui/model_picker.rs index 3910643d..300fbc81 100644 --- a/crates/tui/src/tui/model_picker.rs +++ b/crates/tui/src/tui/model_picker.rs @@ -478,6 +478,8 @@ fn picker_model_hint(id: &str) -> &'static str { "mimo-v2.5-tts-voicedesign" => "voice design", "mimo-v2.5-tts-voiceclone" => "voice clone", "minimax/minimax-m3" => "1M multimodal", + "z-ai/glm-5.1" | "GLM-5.1" => "default coding", + "z-ai/glm-5.2" | "GLM-5.2" => "preview coding", _ => "provider model", } } @@ -938,6 +940,7 @@ mod tests { assert!(model_ids.contains(&"arcee-ai/trinity-large-thinking")); assert!(model_ids.contains(&"xiaomi/mimo-v2.5-pro")); assert!(model_ids.contains(&"minimax/minimax-m3")); + assert!(model_ids.contains(&"z-ai/glm-5.2")); assert!( model_ids .iter() diff --git a/docs/PROVIDERS.md b/docs/PROVIDERS.md index f0e87215..36fb466b 100644 --- a/docs/PROVIDERS.md +++ b/docs/PROVIDERS.md @@ -128,7 +128,7 @@ endpoint. | `atlascloud` | `[providers.atlascloud]` | `ATLASCLOUD_API_KEY` | `ATLASCLOUD_BASE_URL`; default `https://api.atlascloud.ai/v1` | Default `deepseek-ai/deepseek-v4-flash`; explicit `vendor/model-id` values pass through when AtlasCloud is selected | OpenAI-compatible hosted route. `ATLASCLOUD_MODEL` is accepted by the TUI config path, the static `ModelRegistry` keeps DeepSeek V4 fallback rows, and provider-hinted CLI model IDs are sent to AtlasCloud exactly as requested. | | `wanjie-ark` | `[providers.wanjie_ark]` | `WANJIE_ARK_API_KEY`, `WANJIE_API_KEY`, `WANJIE_MAAS_API_KEY` | `WANJIE_ARK_BASE_URL`, `WANJIE_BASE_URL`, `WANJIE_MAAS_BASE_URL`; default `https://maas-openapi.wanjiedata.com/api/v1` | `deepseek-reasoner` | OpenAI-compatible hosted route. `WANJIE_ARK_MODEL`, `WANJIE_MODEL`, and `WANJIE_MAAS_MODEL` are accepted. | | `volcengine` | `[providers.volcengine]` | `VOLCENGINE_API_KEY`, `VOLCENGINE_ARK_API_KEY`, `ARK_API_KEY` | `VOLCENGINE_BASE_URL`, `VOLCENGINE_ARK_BASE_URL`, `ARK_BASE_URL`; default `https://ark.cn-beijing.volces.com/api/coding/v3` | `DeepSeek-V4-Pro`, `DeepSeek-V4-Flash` | Volcengine/Volcano Engine Ark OpenAI-compatible coding endpoint. `VOLCENGINE_MODEL` and `VOLCENGINE_ARK_MODEL` are accepted. | -| `openrouter` | `[providers.openrouter]` | `OPENROUTER_API_KEY` | `OPENROUTER_BASE_URL`; default `https://openrouter.ai/api/v1` | `deepseek/deepseek-v4-pro`, `deepseek/deepseek-v4-flash`; recent large IDs include `arcee-ai/trinity-large-thinking`, `minimax/minimax-m3`, `xiaomi/mimo-v2.5-pro`, `qwen/qwen3.6-flash`, `qwen/qwen3.6-35b-a3b`, `qwen/qwen3.6-max-preview`, `qwen/qwen3.6-27b`, `qwen/qwen3.6-plus`, `google/gemma-4-31b-it`, `z-ai/glm-5.1`, `moonshotai/kimi-k2.7-code`, `moonshotai/kimi-k2.6` | Additive open-model routing layer. It does not replace DeepSeek; it lets users route supported model IDs through OpenRouter when they choose it. | +| `openrouter` | `[providers.openrouter]` | `OPENROUTER_API_KEY` | `OPENROUTER_BASE_URL`; default `https://openrouter.ai/api/v1` | `deepseek/deepseek-v4-pro`, `deepseek/deepseek-v4-flash`; recent large IDs include `arcee-ai/trinity-large-thinking`, `minimax/minimax-m3`, `xiaomi/mimo-v2.5-pro`, `qwen/qwen3.6-flash`, `qwen/qwen3.6-35b-a3b`, `qwen/qwen3.6-max-preview`, `qwen/qwen3.6-27b`, `qwen/qwen3.6-plus`, `google/gemma-4-31b-it`, `z-ai/glm-5.1`, `z-ai/glm-5.2`, `moonshotai/kimi-k2.7-code`, `moonshotai/kimi-k2.6` | Additive open-model routing layer. It does not replace DeepSeek; it lets users route supported model IDs through OpenRouter when they choose it. | | `xiaomi-mimo` | `[providers.xiaomi_mimo]` | `XIAOMI_MIMO_TOKEN_PLAN_API_KEY`, `MIMO_TOKEN_PLAN_API_KEY`, `XIAOMI_MIMO_API_KEY`, `XIAOMI_API_KEY`, `MIMO_API_KEY` | `XIAOMI_MIMO_BASE_URL`, `MIMO_BASE_URL`, `XIAOMI_MIMO_MODE`, `MIMO_MODE`; default `https://token-plan-sgp.xiaomimimo.com/v1` | Chat: `mimo-v2.5-pro`, `mimo-v2.5`; speech/TTS: `mimo-v2.5-tts`, `mimo-v2.5-tts-voicedesign`, `mimo-v2.5-tts-voiceclone`, `mimo-v2-tts` | Xiaomi MiMo OpenAI-compatible chat completions route. Token Plan keys (`tp-...`) use `api-key` auth and the token-plan endpoint by default; pay-as-you-go mode uses standard API keys (`sk-...`) and `https://api.xiaomimimo.com/v1`. It sends `max_completion_tokens` and uses MiMo's `thinking` field for reasoning control. `codewhale speech` / `tts` uses the TTS models. | | `novita` | `[providers.novita]` | `NOVITA_API_KEY` | `NOVITA_BASE_URL`; default `https://api.novita.ai/v1` | `deepseek/deepseek-v4-pro`, `deepseek/deepseek-v4-flash` | OpenAI-compatible hosted route for DeepSeek model IDs. Use config or `CODEWHALE_MODEL` / `DEEPSEEK_MODEL` for model overrides. | | `fireworks` | `[providers.fireworks]` | `FIREWORKS_API_KEY` | `FIREWORKS_BASE_URL`; default `https://api.fireworks.ai/inference/v1` | `accounts/fireworks/models/deepseek-v4-pro` | OpenAI-compatible hosted route. Use config or `CODEWHALE_MODEL` / `DEEPSEEK_MODEL` for model overrides. | @@ -136,7 +136,8 @@ endpoint. | `siliconflow-CN` | `[providers.siliconflow_cn]` | `SILICONFLOW_API_KEY` | `SILICONFLOW_BASE_URL`; default `https://api.siliconflow.cn/v1` | Uses the SiliconFlow model set | China regional SiliconFlow route. Falls back to `[providers.siliconflow]` for api_key / base_url / model when unset. Select it with `provider = "siliconflow-CN"` or `CODEWHALE_PROVIDER=siliconflow-CN`. | | `arcee` | `[providers.arcee]` | `ARCEE_API_KEY` | `ARCEE_BASE_URL`; default `https://api.arcee.ai/api/v1` | `trinity-large-thinking`, `trinity-large-preview` | Arcee AI direct OpenAI-compatible route, tracked as 256K-context BF16 serving. `ARCEE_MODEL` is accepted. OpenRouter's `arcee-ai/trinity-large-thinking` remains the OpenRouter namespaced model ID; direct Arcee uses the bare `trinity-large-thinking` ID. | | `moonshot` | `[providers.moonshot]` | `MOONSHOT_API_KEY`, `KIMI_API_KEY` | `MOONSHOT_BASE_URL`, `KIMI_BASE_URL`; default `https://api.moonshot.ai/v1` | `kimi-k2.7-code`, `kimi-k2.6`; Kimi Code path uses `kimi-for-coding` at `https://api.kimi.com/coding/v1` | Moonshot/Kimi route. `kimi` and `kimi-k2` aliases select `kimi-k2.7-code`; `MOONSHOT_MODEL`, `KIMI_MODEL_NAME`, and `KIMI_MODEL` are accepted. Kimi thinking streams through `reasoning_content`; CodeWhale keeps it in Thinking cells and replays it for thinking/tool-call continuity. `[providers.moonshot] auth_mode = "kimi_oauth"` reads Kimi Code OAuth credentials from `KIMI_CODE_HOME`/`~/.kimi-code`, with legacy `KIMI_SHARE_DIR`/`~/.kimi` fallback. | -| `minimax` | `[providers.minimax]` | `MINIMAX_API_KEY` | `MINIMAX_BASE_URL`; default `https://api.minimax.io/v1` | `MiniMax-M3`, `MiniMax-M2.7`, `MiniMax-M2.7-highspeed`, `MiniMax-M2.5`, `MiniMax-M2.5-highspeed`, `MiniMax-M2.1`, `MiniMax-M2.1-highspeed`, `MiniMax-M2` | MiniMax direct OpenAI-compatible route. CodeWhale sends `reasoning_split = true` so MiniMax thinking arrives separately from answer text, and direct MiniMax IDs stay distinct from OpenRouter namespaced IDs such as `minimax/minimax-m3`. | +| `zai` | `[providers.zai]` | `ZAI_API_KEY`, `Z_AI_API_KEY` | `ZAI_BASE_URL`, `Z_AI_BASE_URL`; default `https://api.z.ai/api/coding/paas/v4`; general API `https://api.z.ai/api/paas/v4` | `GLM-5.1` default; `GLM-5.2` opt-in preview | Z.AI GLM Coding Plan route. Keep `GLM-5.1` as the default until 5.2 is generally documented; set `model = "GLM-5.2"` or `ZAI_MODEL=GLM-5.2` to try the preview. | +| `minimax` | `[providers.minimax]` | `MINIMAX_API_KEY` | `MINIMAX_BASE_URL`; default `https://api.minimax.io/v1`; Anthropic-compatible routes are `https://api.minimax.io/anthropic` globally and `https://api.minimaxi.com/anthropic` in China | `MiniMax-M3`, `MiniMax-M2.7`, `MiniMax-M2.7-highspeed`, `MiniMax-M2.5`, `MiniMax-M2.5-highspeed`, `MiniMax-M2.1`, `MiniMax-M2.1-highspeed`, `MiniMax-M2` | MiniMax direct OpenAI-compatible route. CodeWhale sends `reasoning_split = true` so MiniMax thinking arrives separately from answer text, and direct MiniMax IDs stay distinct from OpenRouter namespaced IDs such as `minimax/minimax-m3`. | | `sglang` | `[providers.sglang]` | Optional `SGLANG_API_KEY` | `SGLANG_BASE_URL`; default `http://localhost:30000/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | Self-hosted OpenAI-compatible route. Localhost deployments commonly omit auth. `SGLANG_MODEL` is accepted. | | `vllm` | `[providers.vllm]` | Optional `VLLM_API_KEY` | `VLLM_BASE_URL`; default `http://localhost:8000/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | Self-hosted vLLM OpenAI-compatible route. Localhost deployments commonly omit auth. `VLLM_MODEL` is accepted. | | `ollama` | `[providers.ollama]` | Optional `OLLAMA_API_KEY` | `OLLAMA_BASE_URL`; default `http://localhost:11434/v1` | `deepseek-coder:1.3b`; provider-hinted custom tags pass through | Self-hosted Ollama OpenAI-compatible route. Localhost deployments commonly omit auth. `OLLAMA_MODEL` is accepted. | @@ -193,11 +194,14 @@ large models verified through OpenRouter's model metadata: `qwen/qwen3.6-35b-a3b`, `qwen/qwen3.6-max-preview`, `qwen/qwen3.6-27b`, `qwen/qwen3.6-plus`, `minimax/minimax-m3`, `xiaomi/mimo-v2.5-pro`, `xiaomi/mimo-v2.5`, `moonshotai/kimi-k2.7-code`, `moonshotai/kimi-k2.6`, -`z-ai/glm-5.1`, `tencent/hy3-preview`, +`z-ai/glm-5.1`, `z-ai/glm-5.2`, `tencent/hy3-preview`, `google/gemma-4-31b-it`, `google/gemma-4-26b-a4b-it`, and `nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free`. `minimax/minimax-m3` was added from OpenRouter's May 31, 2026 listing as a 1M context multimodal model for coding, tool use, and long-horizon agentic work. +`z-ai/glm-5.2` is listed as an opt-in preview route ahead of broad availability; +`GLM-5.1` remains the default direct Z.AI model until 5.2 is generally +documented and smoke-tested. ## Static Model Registry @@ -214,13 +218,14 @@ endpoint when the endpoint supports model listing. | `atlascloud` | `deepseek-ai/deepseek-v4-flash`, `deepseek-ai/deepseek-v4-pro` | yes | yes | | `wanjie-ark` | `deepseek-reasoner` | yes | yes | | `volcengine` | `DeepSeek-V4-Pro`, `DeepSeek-V4-Flash` | yes | yes | -| `openrouter` | `deepseek/deepseek-v4-pro`, `deepseek/deepseek-v4-flash`, `arcee-ai/trinity-large-thinking`, `minimax/minimax-m3`, `minimax/minimax-2.7`, `xiaomi/mimo-v2.5-pro`, `xiaomi/mimo-v2.5`, `qwen/qwen3.6-flash`, `qwen/qwen3.6-35b-a3b`, `qwen/qwen3.6-max-preview`, `qwen/qwen3.6-27b`, `qwen/qwen3.6-plus`, `qwen/qwen3.7-max`, `moonshotai/kimi-k2.7-code`, `moonshotai/kimi-k2.6`, `z-ai/glm-5.1`, `tencent/hy3-preview`, `google/gemma-4-31b-it`, `google/gemma-4-26b-a4b-it`, `nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free`, `nvidia/nemotron-3-ultra-550b-a55b` | yes | yes | +| `openrouter` | `deepseek/deepseek-v4-pro`, `deepseek/deepseek-v4-flash`, `arcee-ai/trinity-large-thinking`, `minimax/minimax-m3`, `minimax/minimax-2.7`, `xiaomi/mimo-v2.5-pro`, `xiaomi/mimo-v2.5`, `qwen/qwen3.6-flash`, `qwen/qwen3.6-35b-a3b`, `qwen/qwen3.6-max-preview`, `qwen/qwen3.6-27b`, `qwen/qwen3.6-plus`, `qwen/qwen3.7-max`, `moonshotai/kimi-k2.7-code`, `moonshotai/kimi-k2.6`, `z-ai/glm-5.1`, `z-ai/glm-5.2`, `tencent/hy3-preview`, `google/gemma-4-31b-it`, `google/gemma-4-26b-a4b-it`, `nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free`, `nvidia/nemotron-3-ultra-550b-a55b` | yes | yes | | `xiaomi-mimo` | `mimo-v2.5-pro`, `mimo-v2.5`; speech/TTS IDs are selected through `codewhale speech` / `tts` | yes | yes for chat models; no for speech/TTS models | | `novita` | `deepseek/deepseek-v4-pro`, `deepseek/deepseek-v4-flash` | yes | yes | | `fireworks` | `accounts/fireworks/models/deepseek-v4-pro` | yes | yes | | `siliconflow` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | yes | yes | | `arcee` | `trinity-large-thinking`, `trinity-large-preview`; provider-hinted custom model IDs pass through | yes | yes for `trinity-large-thinking`; no for `trinity-large-preview` | | `moonshot` | `kimi-k2.7-code`, `kimi-k2.6` | yes | yes | +| `zai` | `GLM-5.1`, `GLM-5.2`; provider-hinted custom model IDs pass through | yes | yes | | `minimax` | `MiniMax-M3`, `MiniMax-M2.7`, `MiniMax-M2.7-highspeed`, `MiniMax-M2.5`, `MiniMax-M2.5-highspeed`, `MiniMax-M2.1`, `MiniMax-M2.1-highspeed`, `MiniMax-M2` | yes | yes | | `sglang` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | yes | yes | | `vllm` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | yes | yes | @@ -262,6 +267,8 @@ Anthropic uses Messages, and `openai-codex` uses Responses. | Direct Arcee API `trinity-large-thinking` | 262,144 | 262,144 | yes | no | not documented in code | | Direct Arcee API `trinity-large-preview` | 262,144 | 4,096 | no in doctor capability metadata | no | not documented in code | | Direct Moonshot/Kimi `kimi-k2.7-code`, `kimi-k2.6`, `kimi-for-coding` | 262,144 | 262,144 | yes | no | not documented in code | +| Direct Z.AI `GLM-5.1` | 202,752 | 131,072 | yes | no | not documented in code | +| Direct Z.AI `GLM-5.2` | 1,000,000 | 131,072 provisional | yes | no | not documented in code | | Direct MiniMax `MiniMax-M3` | 1,000,000 | 524,288 | yes | no | not documented in code | | Direct MiniMax M2.x models | 204,800 | 4,096 fallback until MiniMax output metadata is promoted | yes | no | not documented in code | | Generic `openai` and AtlasCloud | 128,000 | 4,096 | no in doctor capability metadata | no | not documented in code |