diff --git a/AGENTS.md b/AGENTS.md index c9dbe619..0e8d8579 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -25,6 +25,18 @@ See README.md for project overview, docs/ARCHITECTURE.md for internals. - **API**: OpenAI-compatible Chat Completions (`/chat/completions`) is the documented DeepSeek API path. Base URL configurable for global (`api.deepseek.com`) or China (`api.deepseeki.com`); `/v1` is accepted for OpenAI SDK compatibility, and `/beta` is only needed for beta features such as strict tool mode, chat prefix completion, and FIM completion. - **Thinking + Tool Calls**: In V4 thinking mode, assistant messages that contain tool calls must replay their `reasoning_content` in all subsequent requests or the API returns HTTP 400. +## GitHub Operations + +Use the **`gh` CLI** (`/opt/homebrew/bin/gh`) for all GitHub operations — issues, PRs, branches, labels. It's already authenticated as `Hmbown` (token scopes: `gist`, `read:org`, `repo`, `workflow`). Examples: + +- List open issues: `gh issue list --state open --limit 20` +- View an issue: `gh issue view ` +- Create an issue branch: `gh issue develop --branch-name feat/issue--` +- Create a PR: `gh pr create --base feat/v0.6.2 --title "..." --body "..."` +- Check PR status: `gh pr view ` + +Prefer `gh` over `fetch_url` or `web_search` for GitHub data — it's faster, authenticated, and avoids rate limits. + ## Important Notes - **Token/cost tracking inaccuracies**: Token counting and cost estimation may be inflated due to thinking token accounting bugs. Use `/compact` to manage context, and treat cost estimates as approximate. diff --git a/crates/config/src/lib.rs b/crates/config/src/lib.rs index 8d04f9c2..88ca6a97 100644 --- a/crates/config/src/lib.rs +++ b/crates/config/src/lib.rs @@ -13,6 +13,12 @@ const DEFAULT_OPENAI_MODEL: &str = "gpt-4.1"; const DEFAULT_DEEPSEEK_BASE_URL: &str = "https://api.deepseek.com"; const DEFAULT_NVIDIA_NIM_BASE_URL: &str = "https://integrate.api.nvidia.com/v1"; const DEFAULT_OPENAI_BASE_URL: &str = "https://api.openai.com/v1"; +const DEFAULT_OPENROUTER_MODEL: &str = "deepseek/deepseek-v4-pro"; +const DEFAULT_OPENROUTER_FLASH_MODEL: &str = "deepseek/deepseek-v4-flash"; +const DEFAULT_NOVITA_MODEL: &str = "deepseek/deepseek-v4-pro"; +const DEFAULT_NOVITA_FLASH_MODEL: &str = "deepseek/deepseek-v4-flash"; +const DEFAULT_OPENROUTER_BASE_URL: &str = "https://openrouter.ai/api/v1"; +const DEFAULT_NOVITA_BASE_URL: &str = "https://api.novita.ai/v1"; #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)] #[serde(rename_all = "kebab-case")] @@ -21,6 +27,8 @@ pub enum ProviderKind { Deepseek, NvidiaNim, Openai, + Openrouter, + Novita, } impl ProviderKind { @@ -30,6 +38,8 @@ impl ProviderKind { Self::Deepseek => "deepseek", Self::NvidiaNim => "nvidia-nim", Self::Openai => "openai", + Self::Openrouter => "openrouter", + Self::Novita => "novita", } } @@ -39,6 +49,8 @@ impl ProviderKind { "deepseek" | "deep-seek" => Some(Self::Deepseek), "nvidia" | "nvidia-nim" | "nvidia_nim" | "nim" => Some(Self::NvidiaNim), "openai" | "open-ai" => Some(Self::Openai), + "openrouter" | "open_router" => Some(Self::Openrouter), + "novita" => Some(Self::Novita), _ => None, } } @@ -59,6 +71,10 @@ pub struct ProvidersToml { pub nvidia_nim: ProviderConfigToml, #[serde(default)] pub openai: ProviderConfigToml, + #[serde(default)] + pub openrouter: ProviderConfigToml, + #[serde(default)] + pub novita: ProviderConfigToml, } impl ProvidersToml { @@ -68,6 +84,8 @@ impl ProvidersToml { ProviderKind::Deepseek => &self.deepseek, ProviderKind::NvidiaNim => &self.nvidia_nim, ProviderKind::Openai => &self.openai, + ProviderKind::Openrouter => &self.openrouter, + ProviderKind::Novita => &self.novita, } } @@ -76,6 +94,8 @@ impl ProvidersToml { ProviderKind::Deepseek => &mut self.deepseek, ProviderKind::NvidiaNim => &mut self.nvidia_nim, ProviderKind::Openai => &mut self.openai, + ProviderKind::Openrouter => &mut self.openrouter, + ProviderKind::Novita => &mut self.novita, } } } @@ -132,6 +152,12 @@ impl ConfigToml { "providers.openai.api_key" => self.providers.openai.api_key.clone(), "providers.openai.base_url" => self.providers.openai.base_url.clone(), "providers.openai.model" => self.providers.openai.model.clone(), + "providers.openrouter.api_key" => self.providers.openrouter.api_key.clone(), + "providers.openrouter.base_url" => self.providers.openrouter.base_url.clone(), + "providers.openrouter.model" => self.providers.openrouter.model.clone(), + "providers.novita.api_key" => self.providers.novita.api_key.clone(), + "providers.novita.base_url" => self.providers.novita.base_url.clone(), + "providers.novita.model" => self.providers.novita.model.clone(), _ => self.extras.get(key).map(toml::Value::to_string), } } @@ -183,6 +209,24 @@ impl ConfigToml { "providers.nvidia_nim.model" => { self.providers.nvidia_nim.model = Some(value.to_string()); } + "providers.openrouter.api_key" => { + self.providers.openrouter.api_key = Some(value.to_string()); + } + "providers.openrouter.base_url" => { + self.providers.openrouter.base_url = Some(value.to_string()); + } + "providers.openrouter.model" => { + self.providers.openrouter.model = Some(value.to_string()); + } + "providers.novita.api_key" => { + self.providers.novita.api_key = Some(value.to_string()); + } + "providers.novita.base_url" => { + self.providers.novita.base_url = Some(value.to_string()); + } + "providers.novita.model" => { + self.providers.novita.model = Some(value.to_string()); + } _ => { self.extras .insert(key.to_string(), toml::Value::String(value.to_string())); @@ -224,6 +268,12 @@ impl ConfigToml { "providers.nvidia_nim.api_key" => self.providers.nvidia_nim.api_key = None, "providers.nvidia_nim.base_url" => self.providers.nvidia_nim.base_url = None, "providers.nvidia_nim.model" => self.providers.nvidia_nim.model = None, + "providers.openrouter.api_key" => self.providers.openrouter.api_key = None, + "providers.openrouter.base_url" => self.providers.openrouter.base_url = None, + "providers.openrouter.model" => self.providers.openrouter.model = None, + "providers.novita.api_key" => self.providers.novita.api_key = None, + "providers.novita.base_url" => self.providers.novita.base_url = None, + "providers.novita.model" => self.providers.novita.model = None, _ => { self.extras.remove(key); } @@ -299,6 +349,24 @@ impl ConfigToml { if let Some(v) = self.providers.nvidia_nim.model.as_ref() { out.insert("providers.nvidia_nim.model".to_string(), v.clone()); } + if let Some(v) = self.providers.openrouter.api_key.as_ref() { + out.insert("providers.openrouter.api_key".to_string(), redact_secret(v)); + } + if let Some(v) = self.providers.openrouter.base_url.as_ref() { + out.insert("providers.openrouter.base_url".to_string(), v.clone()); + } + if let Some(v) = self.providers.openrouter.model.as_ref() { + out.insert("providers.openrouter.model".to_string(), v.clone()); + } + if let Some(v) = self.providers.novita.api_key.as_ref() { + out.insert("providers.novita.api_key".to_string(), redact_secret(v)); + } + if let Some(v) = self.providers.novita.base_url.as_ref() { + out.insert("providers.novita.base_url".to_string(), v.clone()); + } + if let Some(v) = self.providers.novita.model.as_ref() { + out.insert("providers.novita.model".to_string(), v.clone()); + } for (k, v) in &self.extras { out.insert(k.clone(), v.to_string()); @@ -338,6 +406,8 @@ impl ConfigToml { ProviderKind::Deepseek => DEFAULT_DEEPSEEK_BASE_URL.to_string(), ProviderKind::NvidiaNim => DEFAULT_NVIDIA_NIM_BASE_URL.to_string(), ProviderKind::Openai => DEFAULT_OPENAI_BASE_URL.to_string(), + ProviderKind::Openrouter => DEFAULT_OPENROUTER_BASE_URL.to_string(), + ProviderKind::Novita => DEFAULT_NOVITA_BASE_URL.to_string(), }); let model = cli @@ -351,6 +421,8 @@ impl ConfigToml { ProviderKind::Deepseek => DEFAULT_DEEPSEEK_MODEL.to_string(), ProviderKind::NvidiaNim => DEFAULT_NVIDIA_NIM_MODEL.to_string(), ProviderKind::Openai => DEFAULT_OPENAI_MODEL.to_string(), + ProviderKind::Openrouter => DEFAULT_OPENROUTER_MODEL.to_string(), + ProviderKind::Novita => DEFAULT_NOVITA_MODEL.to_string(), }); let model = normalize_model_for_provider(provider, &model); diff --git a/crates/tui/src/client/chat.rs b/crates/tui/src/client/chat.rs index 73d81315..e8637999 100644 --- a/crates/tui/src/client/chat.rs +++ b/crates/tui/src/client/chat.rs @@ -9,8 +9,25 @@ use std::pin::Pin; use std::time::Duration; use anyhow::{Context, Result}; +use tokio::time::timeout as tokio_timeout; use serde_json::{Value, json}; +/// Default idle timeout for SSE stream reads (300 seconds = 5 minutes). +/// After this period with no data, the stream is considered stalled and +/// yields a recoverable error so the caller can retry. +const DEFAULT_STREAM_IDLE_TIMEOUT: Duration = Duration::from_secs(300); + +/// Reads the `DEEPSEEK_STREAM_IDLE_TIMEOUT_SECS` env var, falling back to +/// the default 300s. The parsed value is clamped to [1, 3600] seconds. +fn stream_idle_timeout() -> Duration { + let secs = std::env::var("DEEPSEEK_STREAM_IDLE_TIMEOUT_SECS") + .ok() + .and_then(|v| v.parse::().ok()) + .unwrap_or(DEFAULT_STREAM_IDLE_TIMEOUT.as_secs()) + .clamp(1, 3600); + Duration::from_secs(secs) +} + use crate::llm_client::StreamEventBox; use crate::logging; use crate::models::{ @@ -177,8 +194,20 @@ impl DeepSeekClient { let is_reasoning_model = requires_reasoning_content(&model); let mut byte_stream = std::pin::pin!(byte_stream); + let idle = stream_idle_timeout(); - while let Some(chunk_result) = byte_stream.next().await { + loop { + let chunk_result = match tokio_timeout(idle, byte_stream.next()).await { + Ok(Some(result)) => result, + Ok(None) => break, // Stream ended normally + Err(_elapsed) => { + yield Err(anyhow::anyhow!( + "SSE stream idle timeout after {}s — no data received", + idle.as_secs(), + )); + break; + } + }; let chunk = match chunk_result { Ok(bytes) => bytes, Err(e) => { diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index f7a4f861..c325479f 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -1036,13 +1036,8 @@ fn context_input_budget(model: &str, requested_output_tokens: u32) -> Option bool { - let lower = message.to_lowercase(); - lower.contains("maximum context length") - || lower.contains("context length") - || lower.contains("context_length") - || lower.contains("prompt is too long") - || (lower.contains("requested") && lower.contains("tokens") && lower.contains("maximum")) - || lower.contains("context window") + crate::error_taxonomy::classify_error_message(message) + == crate::error_taxonomy::ErrorCategory::InvalidInput } fn emit_tool_audit(event: serde_json::Value) { @@ -3239,6 +3234,7 @@ impl Engine { mode, step_error_count, consecutive_tool_error_steps, + &[], ) .await { @@ -3339,11 +3335,22 @@ impl Engine { mode: AppMode, step_error_count: usize, consecutive_tool_error_steps: u32, + #[allow(clippy::needless_pass_by_ref_mut)] // error_categories will be used in future escalation logic + error_categories: &[crate::error_taxonomy::ErrorCategory], ) -> bool { if step_error_count == 0 && consecutive_tool_error_steps < 2 { return false; } + let has_context_overflow = error_categories + .iter() + .any(|&cat| cat == crate::error_taxonomy::ErrorCategory::InvalidInput); + + if !has_context_overflow && consecutive_tool_error_steps < 2 { + // Only escalate on non-context errors when we have consecutive failures + return false; + } + let snapshot = self .capacity_controller .last_snapshot() @@ -3373,12 +3380,19 @@ impl Engine { return false; } + let category_labels: Vec = error_categories + .iter() + .map(|c| c.to_string()) + .collect(); self.apply_verify_and_replan( turn, mode, Some(&forced), &format!( - "error_escalation: step_errors={step_error_count}, consecutive_steps={consecutive_tool_error_steps}" + "error_escalation: step_errors={}, consecutive_steps={}, categories={}", + step_error_count, + consecutive_tool_error_steps, + category_labels.join(",") ), ) .await diff --git a/crates/tui/src/core/engine/tests.rs b/crates/tui/src/core/engine/tests.rs index 5a9bff25..dd1000a8 100644 --- a/crates/tui/src/core/engine/tests.rs +++ b/crates/tui/src/core/engine/tests.rs @@ -546,7 +546,7 @@ async fn error_escalation_triggers_replan_when_severe_or_repeated_failures() { let before_len = engine.session.messages.len(); let turn = TurnContext::new(10); let restarted = engine - .run_capacity_error_escalation_checkpoint(&turn, AppMode::Agent, 2, 2) + .run_capacity_error_escalation_checkpoint(&turn, AppMode::Agent, 2, 2, &[]) .await; assert!(restarted); diff --git a/crates/tui/src/error_taxonomy.rs b/crates/tui/src/error_taxonomy.rs index 34a719b7..7da83227 100644 --- a/crates/tui/src/error_taxonomy.rs +++ b/crates/tui/src/error_taxonomy.rs @@ -1,9 +1,4 @@ -// TODO(integrate): Wire into engine/UI — tracked as future work -#![allow(dead_code)] - //! Shared error taxonomy across client, tools, runtime, and UI. -//! -//! Not yet wired into consumers; will be adopted incrementally. use std::fmt; use crate::llm_client::LlmError; @@ -186,6 +181,56 @@ impl From for ErrorEnvelope { } } +/// Classify an error message string into an ErrorCategory. +/// +/// Uses heuristic keyword matching on the lowercased message. +/// This is a replacement for ad-hoc string matching in callers. +#[must_use] +pub fn classify_error_message(message: &str) -> ErrorCategory { + let lower = message.to_lowercase(); + + if lower.contains("maximum context length") + || lower.contains("context length") + || lower.contains("context_length") + || lower.contains("prompt is too long") + || (lower.contains("requested") && lower.contains("tokens") && lower.contains("maximum")) + || lower.contains("context window") + { + return ErrorCategory::InvalidInput; + } + if lower.contains("rate limit") + || lower.contains("too many requests") + || lower.contains("429") + || lower.contains("quota") + { + return ErrorCategory::RateLimit; + } + if lower.contains("timeout") || lower.contains("timed out") { + return ErrorCategory::Timeout; + } + if lower.contains("auth") || lower.contains("unauthorized") || lower.contains("api key") { + return ErrorCategory::Authentication; + } + if lower.contains("permission") || lower.contains("forbidden") || lower.contains("denied") { + return ErrorCategory::Authorization; + } + if lower.contains("network") || lower.contains("connection") || lower.contains("dns") { + return ErrorCategory::Network; + } + if lower.contains("parse") || lower.contains("syntax") || lower.contains("malformed") { + return ErrorCategory::Parse; + } + if lower.contains("not found") || lower.contains("unavailable") || lower.contains("not available") + { + return ErrorCategory::State; + } + if lower.contains("tool") { + return ErrorCategory::Tool; + } + + ErrorCategory::Internal +} + impl From for ErrorEnvelope { fn from(value: ToolError) -> Self { match value { diff --git a/crates/tui/src/settings.rs b/crates/tui/src/settings.rs index 258b27b4..0c261353 100644 --- a/crates/tui/src/settings.rs +++ b/crates/tui/src/settings.rs @@ -19,6 +19,8 @@ pub struct Settings { pub calm_mode: bool, /// Reduce animation and redraw churn pub low_motion: bool, + /// Enable fancy footer animations (water-spout strip, pulsing text) + pub fancy_animations: bool, /// Show thinking blocks from the model pub show_thinking: bool, /// Show detailed tool output @@ -47,6 +49,7 @@ impl Default for Settings { auto_compact: true, calm_mode: false, low_motion: false, + fancy_animations: false, show_thinking: true, show_tool_details: true, composer_density: "comfortable".to_string(), @@ -136,6 +139,9 @@ impl Settings { "low_motion" | "motion" => { self.low_motion = parse_bool(value)?; } + "fancy_animations" | "fancy" | "animations" => { + self.fancy_animations = parse_bool(value)?; + } "show_thinking" | "thinking" => { self.show_thinking = parse_bool(value)?; } @@ -244,6 +250,7 @@ impl Settings { lines.push(format!(" auto_compact: {}", self.auto_compact)); lines.push(format!(" calm_mode: {}", self.calm_mode)); lines.push(format!(" low_motion: {}", self.low_motion)); + lines.push(format!(" fancy_animations: {}", self.fancy_animations)); lines.push(format!(" show_thinking: {}", self.show_thinking)); lines.push(format!(" show_tool_details: {}", self.show_tool_details)); lines.push(format!(" composer_density: {}", self.composer_density)); @@ -275,6 +282,7 @@ impl Settings { ("auto_compact", "Auto-compact conversations: on/off"), ("calm_mode", "Calmer UI defaults: on/off"), ("low_motion", "Reduce animation and redraw churn: on/off"), + ("fancy_animations", "Fancy footer animations (water-spout strip): on/off"), ("show_thinking", "Show model thinking: on/off"), ("show_tool_details", "Show detailed tool output: on/off"), ( diff --git a/crates/tui/src/tui/app.rs b/crates/tui/src/tui/app.rs index e869a4d6..6d6cca11 100644 --- a/crates/tui/src/tui/app.rs +++ b/crates/tui/src/tui/app.rs @@ -404,6 +404,7 @@ pub struct App { pub auto_compact: bool, pub calm_mode: bool, pub low_motion: bool, + pub fancy_animations: bool, pub show_thinking: bool, pub show_tool_details: bool, pub composer_density: ComposerDensity, @@ -653,6 +654,7 @@ impl App { let auto_compact = settings.auto_compact; let calm_mode = settings.calm_mode; let low_motion = settings.low_motion; + let fancy_animations = settings.fancy_animations; let show_thinking = settings.show_thinking; let show_tool_details = settings.show_tool_details; let composer_density = ComposerDensity::from_setting(&settings.composer_density); @@ -746,6 +748,7 @@ impl App { auto_compact, calm_mode, low_motion, + fancy_animations, show_thinking, show_tool_details, composer_density, diff --git a/crates/tui/src/tui/sidebar.rs b/crates/tui/src/tui/sidebar.rs index a7bee4f9..4a062115 100644 --- a/crates/tui/src/tui/sidebar.rs +++ b/crates/tui/src/tui/sidebar.rs @@ -376,10 +376,17 @@ fn render_sidebar_section(f: &mut Frame, area: Rect, title: &str, lines: Vec>, + /// Rendered lines for this cell (without trailing inter-cell spacers), + /// shared via `Arc` so cache enumeration is O(N) not O(N*lines). + lines: Arc>>, /// Whether this cell's rendered output was empty (e.g. Thinking hidden). /// Cached so we can skip empty cells without re-rendering. is_empty: bool, @@ -127,7 +136,7 @@ impl TranscriptViewCache { let is_empty = rendered.is_empty(); new_per_cell.push(CachedCell { revision: current_rev, - lines: rendered, + lines: Arc::new(rendered), is_empty, is_stream_continuation: cell.is_stream_continuation(), is_conversational: cell.is_conversational(), @@ -158,6 +167,9 @@ impl TranscriptViewCache { if cached.is_empty { continue; } + // Arc::make_mut would deep-clone only on write; since we just + // rebuilt `lines` from scratch we always need the owned data. + // Deref is zero-cost and gives us &[Line]. for (line_in_cell, line) in cached.lines.iter().enumerate() { lines.push(line.clone()); meta.push(TranscriptLineMeta::CellLine { diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs index 5308b16c..80204c23 100644 --- a/crates/tui/src/tui/ui.rs +++ b/crates/tui/src/tui/ui.rs @@ -24,6 +24,7 @@ use ratatui::{ widgets::Block, }; use unicode_width::{UnicodeWidthChar, UnicodeWidthStr}; +use tracing; use crate::audit::log_sensitive_event; use crate::client::DeepSeekClient; @@ -501,6 +502,23 @@ async fn run_event_loop( }], }); handle_tool_call_complete(app, &id, &name, &result); + + // Immediately refresh the task panel sidebar when a + // tool that changes task state completes, so the + // Tasks panel stays in sync with tool execution + // rather than waiting up to 2.5 s for the periodic + // poll. + if matches!( + name.as_str(), + "agent_spawn" | "agent_swarm" | "agent_cancel" | "todo_write" + ) { + let tasks = task_manager.list_tasks(Some(10)).await; + app.task_panel = tasks + .into_iter() + .map(task_summary_to_panel_entry) + .collect(); + last_task_refresh = Instant::now(); + } } EngineEvent::TurnStarted { turn_id } => { app.is_loading = true; @@ -916,6 +934,11 @@ async fn run_event_loop( // Handle bracketed paste events if let Event::Paste(text) = &evt { + tracing::debug!( + paste_len = text.len(), + preview = %text.chars().take(80).collect::(), + "Received bracketed paste event" + ); if app.onboarding == OnboardingState::ApiKey { // Paste into API key input app.insert_api_key_str(text); diff --git a/crates/tui/src/tui/widgets/mod.rs b/crates/tui/src/tui/widgets/mod.rs index 825fc568..0332a783 100644 --- a/crates/tui/src/tui/widgets/mod.rs +++ b/crates/tui/src/tui/widgets/mod.rs @@ -357,7 +357,34 @@ impl Renderable for ComposerWidget<'_> { } else { "" }; - for (idx, entry) in menu_entries.iter().enumerate() { + + // Compute a viewport window into the menu entries so the + // selection cursor stays visible even when there are more + // entries than available rows. + let menu_visible_rows = inner_area + .height + .saturating_sub(visual_rows as u16) + .saturating_sub(top_padding as u16) + .saturating_sub(1) // at least one row for the cursor + .max(1) as usize; + let menu_total = menu_entries.len(); + let menu_top = if menu_total <= menu_visible_rows { + 0 + } else { + // Keep the selection centered in the viewport. + let half = menu_visible_rows / 2; + if selected <= half { + 0 + } else if selected + half >= menu_total { + menu_total.saturating_sub(menu_visible_rows) + } else { + selected.saturating_sub(half) + } + }; + let menu_bottom = (menu_top + menu_visible_rows).min(menu_total); + + for idx in menu_top..menu_bottom { + let entry = &menu_entries[idx]; let is_selected = idx == selected; let style = if is_selected { Style::default()