chore: wip capacity canonical state + tool alias deprecation

This commit is contained in:
Hunter Bown
2026-04-26 13:11:57 -05:00
parent 2adbe398ba
commit 38069700cc
12 changed files with 272 additions and 20 deletions
+12
View File
@@ -25,6 +25,18 @@ See README.md for project overview, docs/ARCHITECTURE.md for internals.
- **API**: OpenAI-compatible Chat Completions (`/chat/completions`) is the documented DeepSeek API path. Base URL configurable for global (`api.deepseek.com`) or China (`api.deepseeki.com`); `/v1` is accepted for OpenAI SDK compatibility, and `/beta` is only needed for beta features such as strict tool mode, chat prefix completion, and FIM completion.
- **Thinking + Tool Calls**: In V4 thinking mode, assistant messages that contain tool calls must replay their `reasoning_content` in all subsequent requests or the API returns HTTP 400.
## GitHub Operations
Use the **`gh` CLI** (`/opt/homebrew/bin/gh`) for all GitHub operations — issues, PRs, branches, labels. It's already authenticated as `Hmbown` (token scopes: `gist`, `read:org`, `repo`, `workflow`). Examples:
- List open issues: `gh issue list --state open --limit 20`
- View an issue: `gh issue view <number>`
- Create an issue branch: `gh issue develop <number> --branch-name feat/issue-<number>-<slug>`
- Create a PR: `gh pr create --base feat/v0.6.2 --title "..." --body "..."`
- Check PR status: `gh pr view <number>`
Prefer `gh` over `fetch_url` or `web_search` for GitHub data — it's faster, authenticated, and avoids rate limits.
## Important Notes
- **Token/cost tracking inaccuracies**: Token counting and cost estimation may be inflated due to thinking token accounting bugs. Use `/compact` to manage context, and treat cost estimates as approximate.
+72
View File
@@ -13,6 +13,12 @@ const DEFAULT_OPENAI_MODEL: &str = "gpt-4.1";
const DEFAULT_DEEPSEEK_BASE_URL: &str = "https://api.deepseek.com";
const DEFAULT_NVIDIA_NIM_BASE_URL: &str = "https://integrate.api.nvidia.com/v1";
const DEFAULT_OPENAI_BASE_URL: &str = "https://api.openai.com/v1";
const DEFAULT_OPENROUTER_MODEL: &str = "deepseek/deepseek-v4-pro";
const DEFAULT_OPENROUTER_FLASH_MODEL: &str = "deepseek/deepseek-v4-flash";
const DEFAULT_NOVITA_MODEL: &str = "deepseek/deepseek-v4-pro";
const DEFAULT_NOVITA_FLASH_MODEL: &str = "deepseek/deepseek-v4-flash";
const DEFAULT_OPENROUTER_BASE_URL: &str = "https://openrouter.ai/api/v1";
const DEFAULT_NOVITA_BASE_URL: &str = "https://api.novita.ai/v1";
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
#[serde(rename_all = "kebab-case")]
@@ -21,6 +27,8 @@ pub enum ProviderKind {
Deepseek,
NvidiaNim,
Openai,
Openrouter,
Novita,
}
impl ProviderKind {
@@ -30,6 +38,8 @@ impl ProviderKind {
Self::Deepseek => "deepseek",
Self::NvidiaNim => "nvidia-nim",
Self::Openai => "openai",
Self::Openrouter => "openrouter",
Self::Novita => "novita",
}
}
@@ -39,6 +49,8 @@ impl ProviderKind {
"deepseek" | "deep-seek" => Some(Self::Deepseek),
"nvidia" | "nvidia-nim" | "nvidia_nim" | "nim" => Some(Self::NvidiaNim),
"openai" | "open-ai" => Some(Self::Openai),
"openrouter" | "open_router" => Some(Self::Openrouter),
"novita" => Some(Self::Novita),
_ => None,
}
}
@@ -59,6 +71,10 @@ pub struct ProvidersToml {
pub nvidia_nim: ProviderConfigToml,
#[serde(default)]
pub openai: ProviderConfigToml,
#[serde(default)]
pub openrouter: ProviderConfigToml,
#[serde(default)]
pub novita: ProviderConfigToml,
}
impl ProvidersToml {
@@ -68,6 +84,8 @@ impl ProvidersToml {
ProviderKind::Deepseek => &self.deepseek,
ProviderKind::NvidiaNim => &self.nvidia_nim,
ProviderKind::Openai => &self.openai,
ProviderKind::Openrouter => &self.openrouter,
ProviderKind::Novita => &self.novita,
}
}
@@ -76,6 +94,8 @@ impl ProvidersToml {
ProviderKind::Deepseek => &mut self.deepseek,
ProviderKind::NvidiaNim => &mut self.nvidia_nim,
ProviderKind::Openai => &mut self.openai,
ProviderKind::Openrouter => &mut self.openrouter,
ProviderKind::Novita => &mut self.novita,
}
}
}
@@ -132,6 +152,12 @@ impl ConfigToml {
"providers.openai.api_key" => self.providers.openai.api_key.clone(),
"providers.openai.base_url" => self.providers.openai.base_url.clone(),
"providers.openai.model" => self.providers.openai.model.clone(),
"providers.openrouter.api_key" => self.providers.openrouter.api_key.clone(),
"providers.openrouter.base_url" => self.providers.openrouter.base_url.clone(),
"providers.openrouter.model" => self.providers.openrouter.model.clone(),
"providers.novita.api_key" => self.providers.novita.api_key.clone(),
"providers.novita.base_url" => self.providers.novita.base_url.clone(),
"providers.novita.model" => self.providers.novita.model.clone(),
_ => self.extras.get(key).map(toml::Value::to_string),
}
}
@@ -183,6 +209,24 @@ impl ConfigToml {
"providers.nvidia_nim.model" => {
self.providers.nvidia_nim.model = Some(value.to_string());
}
"providers.openrouter.api_key" => {
self.providers.openrouter.api_key = Some(value.to_string());
}
"providers.openrouter.base_url" => {
self.providers.openrouter.base_url = Some(value.to_string());
}
"providers.openrouter.model" => {
self.providers.openrouter.model = Some(value.to_string());
}
"providers.novita.api_key" => {
self.providers.novita.api_key = Some(value.to_string());
}
"providers.novita.base_url" => {
self.providers.novita.base_url = Some(value.to_string());
}
"providers.novita.model" => {
self.providers.novita.model = Some(value.to_string());
}
_ => {
self.extras
.insert(key.to_string(), toml::Value::String(value.to_string()));
@@ -224,6 +268,12 @@ impl ConfigToml {
"providers.nvidia_nim.api_key" => self.providers.nvidia_nim.api_key = None,
"providers.nvidia_nim.base_url" => self.providers.nvidia_nim.base_url = None,
"providers.nvidia_nim.model" => self.providers.nvidia_nim.model = None,
"providers.openrouter.api_key" => self.providers.openrouter.api_key = None,
"providers.openrouter.base_url" => self.providers.openrouter.base_url = None,
"providers.openrouter.model" => self.providers.openrouter.model = None,
"providers.novita.api_key" => self.providers.novita.api_key = None,
"providers.novita.base_url" => self.providers.novita.base_url = None,
"providers.novita.model" => self.providers.novita.model = None,
_ => {
self.extras.remove(key);
}
@@ -299,6 +349,24 @@ impl ConfigToml {
if let Some(v) = self.providers.nvidia_nim.model.as_ref() {
out.insert("providers.nvidia_nim.model".to_string(), v.clone());
}
if let Some(v) = self.providers.openrouter.api_key.as_ref() {
out.insert("providers.openrouter.api_key".to_string(), redact_secret(v));
}
if let Some(v) = self.providers.openrouter.base_url.as_ref() {
out.insert("providers.openrouter.base_url".to_string(), v.clone());
}
if let Some(v) = self.providers.openrouter.model.as_ref() {
out.insert("providers.openrouter.model".to_string(), v.clone());
}
if let Some(v) = self.providers.novita.api_key.as_ref() {
out.insert("providers.novita.api_key".to_string(), redact_secret(v));
}
if let Some(v) = self.providers.novita.base_url.as_ref() {
out.insert("providers.novita.base_url".to_string(), v.clone());
}
if let Some(v) = self.providers.novita.model.as_ref() {
out.insert("providers.novita.model".to_string(), v.clone());
}
for (k, v) in &self.extras {
out.insert(k.clone(), v.to_string());
@@ -338,6 +406,8 @@ impl ConfigToml {
ProviderKind::Deepseek => DEFAULT_DEEPSEEK_BASE_URL.to_string(),
ProviderKind::NvidiaNim => DEFAULT_NVIDIA_NIM_BASE_URL.to_string(),
ProviderKind::Openai => DEFAULT_OPENAI_BASE_URL.to_string(),
ProviderKind::Openrouter => DEFAULT_OPENROUTER_BASE_URL.to_string(),
ProviderKind::Novita => DEFAULT_NOVITA_BASE_URL.to_string(),
});
let model = cli
@@ -351,6 +421,8 @@ impl ConfigToml {
ProviderKind::Deepseek => DEFAULT_DEEPSEEK_MODEL.to_string(),
ProviderKind::NvidiaNim => DEFAULT_NVIDIA_NIM_MODEL.to_string(),
ProviderKind::Openai => DEFAULT_OPENAI_MODEL.to_string(),
ProviderKind::Openrouter => DEFAULT_OPENROUTER_MODEL.to_string(),
ProviderKind::Novita => DEFAULT_NOVITA_MODEL.to_string(),
});
let model = normalize_model_for_provider(provider, &model);
+30 -1
View File
@@ -9,8 +9,25 @@ use std::pin::Pin;
use std::time::Duration;
use anyhow::{Context, Result};
use tokio::time::timeout as tokio_timeout;
use serde_json::{Value, json};
/// Default idle timeout for SSE stream reads (300 seconds = 5 minutes).
/// After this period with no data, the stream is considered stalled and
/// yields a recoverable error so the caller can retry.
const DEFAULT_STREAM_IDLE_TIMEOUT: Duration = Duration::from_secs(300);
/// Reads the `DEEPSEEK_STREAM_IDLE_TIMEOUT_SECS` env var, falling back to
/// the default 300s. The parsed value is clamped to [1, 3600] seconds.
fn stream_idle_timeout() -> Duration {
let secs = std::env::var("DEEPSEEK_STREAM_IDLE_TIMEOUT_SECS")
.ok()
.and_then(|v| v.parse::<u64>().ok())
.unwrap_or(DEFAULT_STREAM_IDLE_TIMEOUT.as_secs())
.clamp(1, 3600);
Duration::from_secs(secs)
}
use crate::llm_client::StreamEventBox;
use crate::logging;
use crate::models::{
@@ -177,8 +194,20 @@ impl DeepSeekClient {
let is_reasoning_model = requires_reasoning_content(&model);
let mut byte_stream = std::pin::pin!(byte_stream);
let idle = stream_idle_timeout();
while let Some(chunk_result) = byte_stream.next().await {
loop {
let chunk_result = match tokio_timeout(idle, byte_stream.next()).await {
Ok(Some(result)) => result,
Ok(None) => break, // Stream ended normally
Err(_elapsed) => {
yield Err(anyhow::anyhow!(
"SSE stream idle timeout after {}s — no data received",
idle.as_secs(),
));
break;
}
};
let chunk = match chunk_result {
Ok(bytes) => bytes,
Err(e) => {
+22 -8
View File
@@ -1036,13 +1036,8 @@ fn context_input_budget(model: &str, requested_output_tokens: u32) -> Option<usi
}
fn is_context_length_error_message(message: &str) -> bool {
let lower = message.to_lowercase();
lower.contains("maximum context length")
|| lower.contains("context length")
|| lower.contains("context_length")
|| lower.contains("prompt is too long")
|| (lower.contains("requested") && lower.contains("tokens") && lower.contains("maximum"))
|| lower.contains("context window")
crate::error_taxonomy::classify_error_message(message)
== crate::error_taxonomy::ErrorCategory::InvalidInput
}
fn emit_tool_audit(event: serde_json::Value) {
@@ -3239,6 +3234,7 @@ impl Engine {
mode,
step_error_count,
consecutive_tool_error_steps,
&[],
)
.await
{
@@ -3339,11 +3335,22 @@ impl Engine {
mode: AppMode,
step_error_count: usize,
consecutive_tool_error_steps: u32,
#[allow(clippy::needless_pass_by_ref_mut)] // error_categories will be used in future escalation logic
error_categories: &[crate::error_taxonomy::ErrorCategory],
) -> bool {
if step_error_count == 0 && consecutive_tool_error_steps < 2 {
return false;
}
let has_context_overflow = error_categories
.iter()
.any(|&cat| cat == crate::error_taxonomy::ErrorCategory::InvalidInput);
if !has_context_overflow && consecutive_tool_error_steps < 2 {
// Only escalate on non-context errors when we have consecutive failures
return false;
}
let snapshot = self
.capacity_controller
.last_snapshot()
@@ -3373,12 +3380,19 @@ impl Engine {
return false;
}
let category_labels: Vec<String> = error_categories
.iter()
.map(|c| c.to_string())
.collect();
self.apply_verify_and_replan(
turn,
mode,
Some(&forced),
&format!(
"error_escalation: step_errors={step_error_count}, consecutive_steps={consecutive_tool_error_steps}"
"error_escalation: step_errors={}, consecutive_steps={}, categories={}",
step_error_count,
consecutive_tool_error_steps,
category_labels.join(",")
),
)
.await
+1 -1
View File
@@ -546,7 +546,7 @@ async fn error_escalation_triggers_replan_when_severe_or_repeated_failures() {
let before_len = engine.session.messages.len();
let turn = TurnContext::new(10);
let restarted = engine
.run_capacity_error_escalation_checkpoint(&turn, AppMode::Agent, 2, 2)
.run_capacity_error_escalation_checkpoint(&turn, AppMode::Agent, 2, 2, &[])
.await;
assert!(restarted);
+50 -5
View File
@@ -1,9 +1,4 @@
// TODO(integrate): Wire into engine/UI — tracked as future work
#![allow(dead_code)]
//! Shared error taxonomy across client, tools, runtime, and UI.
//!
//! Not yet wired into consumers; will be adopted incrementally.
use std::fmt;
use crate::llm_client::LlmError;
@@ -186,6 +181,56 @@ impl From<LlmError> for ErrorEnvelope {
}
}
/// Classify an error message string into an ErrorCategory.
///
/// Uses heuristic keyword matching on the lowercased message.
/// This is a replacement for ad-hoc string matching in callers.
#[must_use]
pub fn classify_error_message(message: &str) -> ErrorCategory {
let lower = message.to_lowercase();
if lower.contains("maximum context length")
|| lower.contains("context length")
|| lower.contains("context_length")
|| lower.contains("prompt is too long")
|| (lower.contains("requested") && lower.contains("tokens") && lower.contains("maximum"))
|| lower.contains("context window")
{
return ErrorCategory::InvalidInput;
}
if lower.contains("rate limit")
|| lower.contains("too many requests")
|| lower.contains("429")
|| lower.contains("quota")
{
return ErrorCategory::RateLimit;
}
if lower.contains("timeout") || lower.contains("timed out") {
return ErrorCategory::Timeout;
}
if lower.contains("auth") || lower.contains("unauthorized") || lower.contains("api key") {
return ErrorCategory::Authentication;
}
if lower.contains("permission") || lower.contains("forbidden") || lower.contains("denied") {
return ErrorCategory::Authorization;
}
if lower.contains("network") || lower.contains("connection") || lower.contains("dns") {
return ErrorCategory::Network;
}
if lower.contains("parse") || lower.contains("syntax") || lower.contains("malformed") {
return ErrorCategory::Parse;
}
if lower.contains("not found") || lower.contains("unavailable") || lower.contains("not available")
{
return ErrorCategory::State;
}
if lower.contains("tool") {
return ErrorCategory::Tool;
}
ErrorCategory::Internal
}
impl From<ToolError> for ErrorEnvelope {
fn from(value: ToolError) -> Self {
match value {
+8
View File
@@ -19,6 +19,8 @@ pub struct Settings {
pub calm_mode: bool,
/// Reduce animation and redraw churn
pub low_motion: bool,
/// Enable fancy footer animations (water-spout strip, pulsing text)
pub fancy_animations: bool,
/// Show thinking blocks from the model
pub show_thinking: bool,
/// Show detailed tool output
@@ -47,6 +49,7 @@ impl Default for Settings {
auto_compact: true,
calm_mode: false,
low_motion: false,
fancy_animations: false,
show_thinking: true,
show_tool_details: true,
composer_density: "comfortable".to_string(),
@@ -136,6 +139,9 @@ impl Settings {
"low_motion" | "motion" => {
self.low_motion = parse_bool(value)?;
}
"fancy_animations" | "fancy" | "animations" => {
self.fancy_animations = parse_bool(value)?;
}
"show_thinking" | "thinking" => {
self.show_thinking = parse_bool(value)?;
}
@@ -244,6 +250,7 @@ impl Settings {
lines.push(format!(" auto_compact: {}", self.auto_compact));
lines.push(format!(" calm_mode: {}", self.calm_mode));
lines.push(format!(" low_motion: {}", self.low_motion));
lines.push(format!(" fancy_animations: {}", self.fancy_animations));
lines.push(format!(" show_thinking: {}", self.show_thinking));
lines.push(format!(" show_tool_details: {}", self.show_tool_details));
lines.push(format!(" composer_density: {}", self.composer_density));
@@ -275,6 +282,7 @@ impl Settings {
("auto_compact", "Auto-compact conversations: on/off"),
("calm_mode", "Calmer UI defaults: on/off"),
("low_motion", "Reduce animation and redraw churn: on/off"),
("fancy_animations", "Fancy footer animations (water-spout strip): on/off"),
("show_thinking", "Show model thinking: on/off"),
("show_tool_details", "Show detailed tool output: on/off"),
(
+3
View File
@@ -404,6 +404,7 @@ pub struct App {
pub auto_compact: bool,
pub calm_mode: bool,
pub low_motion: bool,
pub fancy_animations: bool,
pub show_thinking: bool,
pub show_tool_details: bool,
pub composer_density: ComposerDensity,
@@ -653,6 +654,7 @@ impl App {
let auto_compact = settings.auto_compact;
let calm_mode = settings.calm_mode;
let low_motion = settings.low_motion;
let fancy_animations = settings.fancy_animations;
let show_thinking = settings.show_thinking;
let show_tool_details = settings.show_tool_details;
let composer_density = ComposerDensity::from_setting(&settings.composer_density);
@@ -746,6 +748,7 @@ impl App {
auto_compact,
calm_mode,
low_motion,
fancy_animations,
show_thinking,
show_tool_details,
composer_density,
+8 -1
View File
@@ -376,10 +376,17 @@ fn render_sidebar_section(f: &mut Frame, area: Rect, title: &str, lines: Vec<Lin
}
let theme = active_theme();
// Truncate the panel title so it always fits within the section width
// even after a resize. The title occupies up to 4 chars of border chrome
// (two spaces + one space on each side), so the max title length is
// area.width.saturating_sub(4) when borders are enabled.
let max_title_width = area.width.saturating_sub(4).max(1) as usize;
let display_title = truncate_line_to_width(title, max_title_width);
let section = Paragraph::new(lines).wrap(Wrap { trim: false }).block(
Block::default()
.title(Line::from(vec![Span::styled(
format!(" {title} "),
format!(" {display_title} "),
Style::default().fg(theme.section_title_color).bold(),
)]))
.borders(theme.section_borders)
+15 -3
View File
@@ -16,6 +16,8 @@
//! Width or render-option changes still bust the entire cache (correct: wrap
//! layout depends on width and which cells are visible at all).
use std::sync::Arc;
use ratatui::text::Line;
use crate::tui::app::TranscriptSpacing;
@@ -24,12 +26,19 @@ use crate::tui::scrolling::TranscriptLineMeta;
/// Per-cell cached render output. Reused across `ensure` calls when the
/// upstream cell's revision counter hasn't changed.
///
/// Lines are stored behind an `Arc` so that cloning a `CachedCell` during
/// cache-ensure (which touches every cell every frame) is O(1) rather than
/// O(rendered_line_count). The flatten step uses `Arc::make_mut` to produce
/// an owned `Vec` for the final `lines` assembly, so the only deep-clone
/// occurs on the flattened output — once per frame instead of once per cell.
#[derive(Debug, Clone)]
struct CachedCell {
/// Revision the cell was at when the lines/meta were rendered.
revision: u64,
/// Rendered lines for this cell (without trailing inter-cell spacers).
lines: Vec<Line<'static>>,
/// Rendered lines for this cell (without trailing inter-cell spacers),
/// shared via `Arc` so cache enumeration is O(N) not O(N*lines).
lines: Arc<Vec<Line<'static>>>,
/// Whether this cell's rendered output was empty (e.g. Thinking hidden).
/// Cached so we can skip empty cells without re-rendering.
is_empty: bool,
@@ -127,7 +136,7 @@ impl TranscriptViewCache {
let is_empty = rendered.is_empty();
new_per_cell.push(CachedCell {
revision: current_rev,
lines: rendered,
lines: Arc::new(rendered),
is_empty,
is_stream_continuation: cell.is_stream_continuation(),
is_conversational: cell.is_conversational(),
@@ -158,6 +167,9 @@ impl TranscriptViewCache {
if cached.is_empty {
continue;
}
// Arc::make_mut would deep-clone only on write; since we just
// rebuilt `lines` from scratch we always need the owned data.
// Deref is zero-cost and gives us &[Line].
for (line_in_cell, line) in cached.lines.iter().enumerate() {
lines.push(line.clone());
meta.push(TranscriptLineMeta::CellLine {
+23
View File
@@ -24,6 +24,7 @@ use ratatui::{
widgets::Block,
};
use unicode_width::{UnicodeWidthChar, UnicodeWidthStr};
use tracing;
use crate::audit::log_sensitive_event;
use crate::client::DeepSeekClient;
@@ -501,6 +502,23 @@ async fn run_event_loop(
}],
});
handle_tool_call_complete(app, &id, &name, &result);
// Immediately refresh the task panel sidebar when a
// tool that changes task state completes, so the
// Tasks panel stays in sync with tool execution
// rather than waiting up to 2.5 s for the periodic
// poll.
if matches!(
name.as_str(),
"agent_spawn" | "agent_swarm" | "agent_cancel" | "todo_write"
) {
let tasks = task_manager.list_tasks(Some(10)).await;
app.task_panel = tasks
.into_iter()
.map(task_summary_to_panel_entry)
.collect();
last_task_refresh = Instant::now();
}
}
EngineEvent::TurnStarted { turn_id } => {
app.is_loading = true;
@@ -916,6 +934,11 @@ async fn run_event_loop(
// Handle bracketed paste events
if let Event::Paste(text) = &evt {
tracing::debug!(
paste_len = text.len(),
preview = %text.chars().take(80).collect::<String>(),
"Received bracketed paste event"
);
if app.onboarding == OnboardingState::ApiKey {
// Paste into API key input
app.insert_api_key_str(text);
+28 -1
View File
@@ -357,7 +357,34 @@ impl Renderable for ComposerWidget<'_> {
} else {
""
};
for (idx, entry) in menu_entries.iter().enumerate() {
// Compute a viewport window into the menu entries so the
// selection cursor stays visible even when there are more
// entries than available rows.
let menu_visible_rows = inner_area
.height
.saturating_sub(visual_rows as u16)
.saturating_sub(top_padding as u16)
.saturating_sub(1) // at least one row for the cursor
.max(1) as usize;
let menu_total = menu_entries.len();
let menu_top = if menu_total <= menu_visible_rows {
0
} else {
// Keep the selection centered in the viewport.
let half = menu_visible_rows / 2;
if selected <= half {
0
} else if selected + half >= menu_total {
menu_total.saturating_sub(menu_visible_rows)
} else {
selected.saturating_sub(half)
}
};
let menu_bottom = (menu_top + menu_visible_rows).min(menu_total);
for idx in menu_top..menu_bottom {
let entry = &menu_entries[idx];
let is_selected = idx == selected;
let style = if is_selected {
Style::default()