From f0a4e2536068369c2b4bf37048c531a8cd460847 Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Wed, 13 May 2026 13:12:14 -0500 Subject: [PATCH] fix(prompt): trim first-turn context noise --- CHANGELOG.md | 13 +- crates/tui/CHANGELOG.md | 13 +- crates/tui/src/project_context.rs | 169 ++++++++++++++++---- crates/tui/src/prompts.rs | 2 +- crates/tui/src/prompts/agent.txt | 7 +- crates/tui/src/prompts/approvals/auto.md | 2 +- crates/tui/src/prompts/approvals/never.md | 2 +- crates/tui/src/prompts/approvals/suggest.md | 2 +- crates/tui/src/prompts/base.md | 8 +- crates/tui/src/prompts/base.txt | 8 +- crates/tui/src/prompts/modes/agent.md | 10 +- crates/tui/src/prompts/modes/plan.md | 4 +- crates/tui/src/prompts/modes/yolo.md | 4 +- crates/tui/src/skills/mod.rs | 12 +- crates/tui/src/tui/context_inspector.rs | 146 ++++++++++++++++- 15 files changed, 331 insertions(+), 71 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cb3a9eb8..82b0ce2e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,9 +34,6 @@ mega-files that had grown around the agent loop and TUI. by round-trip. - **Cancel-all shell jobs.** A single action stops every running background shell command instead of cancelling them one-by-one. -- **Session title in composer border.** The top-right of the composer - shows the derived session title so the active thread is visible - without opening the sessions panel. - **`edit_file` tolerates typographic punctuation drift.** When the exact-match and leading-whitespace-fuzzy passes both fail and `fuzz: true` is set, the tool retries with smart quotes (`"`/`"` → @@ -68,6 +65,14 @@ mega-files that had grown around the agent loop and TUI. (v0.8.6 era), `PROMPT_ANALYSIS.md`, and the redundant `DEPENDENCY_GRAPH.md` no longer ship in releases; `docs/ARCHITECTURE.md` remains the canonical crate-layout reference. +- **First-turn prompt context is leaner and easier to audit.** The + generated project context pack now ignores hidden tool/cache state, + balances top-level directories before descending, and `/context` + shows named prompt layers instead of a single opaque system blob. +- **Model-visible prompt policy de-conflicted.** The base and mode + prompts no longer forbid useful `deepseek` CLI diagnostics, no + longer require checklists for simple one-step work, and align + long-session compaction guidance around the 60% threshold. ### Fixed @@ -98,6 +103,8 @@ mega-files that had grown around the agent loop and TUI. users reach the same surface. - **VTE flicker terminals get reduced motion** by default to avoid thrashing on terminals that mishandle frequent partial redraws. +- **Composer border no longer shows the derived session title**, keeping + the composer chrome reserved for editor and mode state. ## [0.8.33] - 2026-05-12 diff --git a/crates/tui/CHANGELOG.md b/crates/tui/CHANGELOG.md index cb3a9eb8..82b0ce2e 100644 --- a/crates/tui/CHANGELOG.md +++ b/crates/tui/CHANGELOG.md @@ -34,9 +34,6 @@ mega-files that had grown around the agent loop and TUI. by round-trip. - **Cancel-all shell jobs.** A single action stops every running background shell command instead of cancelling them one-by-one. -- **Session title in composer border.** The top-right of the composer - shows the derived session title so the active thread is visible - without opening the sessions panel. - **`edit_file` tolerates typographic punctuation drift.** When the exact-match and leading-whitespace-fuzzy passes both fail and `fuzz: true` is set, the tool retries with smart quotes (`"`/`"` → @@ -68,6 +65,14 @@ mega-files that had grown around the agent loop and TUI. (v0.8.6 era), `PROMPT_ANALYSIS.md`, and the redundant `DEPENDENCY_GRAPH.md` no longer ship in releases; `docs/ARCHITECTURE.md` remains the canonical crate-layout reference. +- **First-turn prompt context is leaner and easier to audit.** The + generated project context pack now ignores hidden tool/cache state, + balances top-level directories before descending, and `/context` + shows named prompt layers instead of a single opaque system blob. +- **Model-visible prompt policy de-conflicted.** The base and mode + prompts no longer forbid useful `deepseek` CLI diagnostics, no + longer require checklists for simple one-step work, and align + long-session compaction guidance around the 60% threshold. ### Fixed @@ -98,6 +103,8 @@ mega-files that had grown around the agent loop and TUI. users reach the same surface. - **VTE flicker terminals get reduced motion** by default to avoid thrashing on terminals that mishandle frequent partial redraws. +- **Composer border no longer shows the derived session title**, keeping + the composer chrome reserved for editor and mode state. ## [0.8.33] - 2026-05-12 diff --git a/crates/tui/src/project_context.rs b/crates/tui/src/project_context.rs index acde4fbf..9fcead49 100644 --- a/crates/tui/src/project_context.rs +++ b/crates/tui/src/project_context.rs @@ -11,7 +11,7 @@ //! The loaded content is injected into the system prompt to give the agent //! context about the project's conventions, structure, and requirements. -use std::collections::BTreeMap; +use std::collections::{BTreeMap, VecDeque}; use std::fs; use std::path::{Path, PathBuf}; @@ -33,9 +33,9 @@ const GLOBAL_AGENTS_RELATIVE_PATH: &[&str] = &[".deepseek", "AGENTS.md"]; /// Maximum size for project context files (to prevent loading huge files) const MAX_CONTEXT_SIZE: usize = 100 * 1024; // 100KB const PACK_README_MAX_CHARS: usize = 4_000; -const PACK_MAX_ENTRIES: usize = 400; -const PACK_MAX_SOURCE_FILES: usize = 80; -const PACK_MAX_CONFIG_FILES: usize = 80; +const PACK_MAX_ENTRIES: usize = 220; +const PACK_MAX_SOURCE_FILES: usize = 60; +const PACK_MAX_CONFIG_FILES: usize = 60; const PACK_MAX_DEPTH: usize = 4; const PACK_IGNORED_DIRS: &[&str] = &[ ".git", @@ -51,6 +51,13 @@ const PACK_IGNORED_DIRS: &[&str] = &[ ".pytest_cache", ".DS_Store", ]; +const PACK_ALLOWED_HIDDEN_DIRS: &[&str] = &[".github"]; +const PACK_ALLOWED_HIDDEN_FILES: &[&str] = &[".editorconfig", ".gitattributes", ".gitignore"]; +const PACK_IGNORED_FILE_NAMES: &[&str] = &[".DS_Store"]; +const PACK_IGNORED_FILE_EXTENSIONS: &[&str] = &[ + "7z", "avif", "db", "gif", "gz", "ico", "jpeg", "jpg", "log", "mov", "mp3", "mp4", "pdf", + "png", "sqlite", "tar", "tgz", "wav", "webp", "zip", +]; // === Errors === @@ -197,38 +204,70 @@ fn collect_pack_entries(root: &Path, dir: &Path, depth: usize, out: &mut Vec>(); - children.sort_by_key(|entry| entry.path()); + let mut queue = VecDeque::new(); + queue.push_back((dir.to_path_buf(), depth)); - for entry in children { - if out.len() >= PACK_MAX_ENTRIES { - break; - } - let path = entry.path(); - let Some(name) = path.file_name().and_then(|name| name.to_str()) else { - continue; - }; - let Ok(file_type) = entry.file_type() else { - continue; - }; - if file_type.is_dir() && PACK_IGNORED_DIRS.contains(&name) { + while let Some((current_dir, current_depth)) = queue.pop_front() { + if current_depth > PACK_MAX_DEPTH || out.len() >= PACK_MAX_ENTRIES { continue; } - if let Some(relative) = relative_slash_path(root, &path) { - if file_type.is_dir() { - out.push(format!("{relative}/")); - collect_pack_entries(root, &path, depth + 1, out); - } else if file_type.is_file() { - out.push(relative); + let Ok(read_dir) = fs::read_dir(¤t_dir) else { + continue; + }; + let mut children = read_dir.filter_map(Result::ok).collect::>(); + children.sort_by_key(|entry| entry.path()); + + for entry in children { + if out.len() >= PACK_MAX_ENTRIES { + break; + } + let path = entry.path(); + let Some(name) = path.file_name().and_then(|name| name.to_str()) else { + continue; + }; + let Ok(file_type) = entry.file_type() else { + continue; + }; + if file_type.is_dir() && should_ignore_pack_dir(name) { + continue; + } + if file_type.is_file() && should_ignore_pack_file(name) { + continue; + } + + if let Some(relative) = relative_slash_path(root, &path) { + if file_type.is_dir() { + out.push(format!("{relative}/")); + if current_depth < PACK_MAX_DEPTH { + queue.push_back((path, current_depth + 1)); + } + } else if file_type.is_file() { + out.push(relative); + } } } } } +fn should_ignore_pack_dir(name: &str) -> bool { + PACK_IGNORED_DIRS.contains(&name) + || (name.starts_with('.') && !PACK_ALLOWED_HIDDEN_DIRS.contains(&name)) +} + +fn should_ignore_pack_file(name: &str) -> bool { + if name.starts_with('.') && !PACK_ALLOWED_HIDDEN_FILES.contains(&name) { + return true; + } + if PACK_IGNORED_FILE_NAMES.contains(&name) { + return true; + } + let Some((_, ext)) = name.rsplit_once('.') else { + return false; + }; + PACK_IGNORED_FILE_EXTENSIONS.contains(&ext.to_ascii_lowercase().as_str()) +} + fn relative_slash_path(root: &Path, path: &Path) -> Option { let relative = path.strip_prefix(root).ok()?; let mut parts = Vec::new(); @@ -865,6 +904,82 @@ mod tests { ); } + #[test] + fn project_context_pack_ignores_agent_state_and_binary_noise() { + let tmp = tempdir().expect("tempdir"); + fs::create_dir_all(tmp.path().join("src")).expect("mkdir src"); + fs::write(tmp.path().join("src").join("main.rs"), "fn main() {}").expect("write src"); + fs::write(tmp.path().join(".DS_Store"), "noise").expect("write ds store"); + fs::write(tmp.path().join("paper.pdf"), "not a real pdf").expect("write pdf"); + fs::create_dir_all(tmp.path().join(".deepseek").join("state")).expect("mkdir state"); + fs::write( + tmp.path() + .join(".deepseek") + .join("state") + .join("subagents.v1.json"), + "{}", + ) + .expect("write state"); + fs::create_dir_all(tmp.path().join(".playwright-mcp")).expect("mkdir playwright"); + fs::write( + tmp.path().join(".playwright-mcp").join("trace.log"), + "noise", + ) + .expect("write log"); + fs::create_dir_all(tmp.path().join(".agents").join("skills").join("demo")) + .expect("mkdir skills"); + fs::write( + tmp.path() + .join(".agents") + .join("skills") + .join("demo") + .join("SKILL.md"), + "skill body", + ) + .expect("write skill"); + fs::create_dir_all(tmp.path().join(".github").join("workflows")).expect("mkdir workflows"); + fs::write( + tmp.path().join(".github").join("workflows").join("ci.yml"), + "name: ci", + ) + .expect("write workflow"); + + let pack = generate_project_context_pack(tmp.path()).expect("pack"); + + assert!(pack.contains("\"src/main.rs\""), "{pack}"); + assert!(pack.contains("\".github/\""), "{pack}"); + assert!(pack.contains("\".github/workflows/ci.yml\""), "{pack}"); + assert!(!pack.contains(".deepseek"), "{pack}"); + assert!(!pack.contains(".playwright-mcp"), "{pack}"); + assert!(!pack.contains(".agents"), "{pack}"); + assert!(!pack.contains(".DS_Store"), "{pack}"); + assert!(!pack.contains("paper.pdf"), "{pack}"); + assert!(!pack.contains("trace.log"), "{pack}"); + } + + #[test] + fn project_context_pack_keeps_later_top_level_dirs_under_budget() { + let tmp = tempdir().expect("tempdir"); + let noisy = tmp.path().join("aaa-many-files"); + fs::create_dir_all(&noisy).expect("mkdir noisy"); + for i in 0..(PACK_MAX_ENTRIES + 20) { + fs::write(noisy.join(format!("file-{i:03}.rs")), "fn f() {}").expect("write noisy"); + } + fs::create_dir_all(tmp.path().join("zzz-important")).expect("mkdir important"); + fs::write( + tmp.path().join("zzz-important").join("main.rs"), + "fn important() {}", + ) + .expect("write important"); + + let pack = generate_project_context_pack(tmp.path()).expect("pack"); + + assert!( + pack.contains("\"zzz-important/\""), + "breadth-first packing should keep later top-level directories visible:\n{pack}" + ); + } + #[test] fn test_load_global_agents_when_project_has_no_context() { let workspace = tempdir().expect("workspace tempdir"); diff --git a/crates/tui/src/prompts.rs b/crates/tui/src/prompts.rs index 5b22a2ff..f801ed59 100644 --- a/crates/tui/src/prompts.rs +++ b/crates/tui/src/prompts.rs @@ -664,7 +664,7 @@ pub fn system_prompt_for_mode_with_context_skills_session_and_approval( 1. Use `/compact` to summarize earlier context and free up space\n\ 2. The system will preserve important information (files you're working on, recent messages, tool results)\n\ 3. After compaction, you'll see a summary of what was discussed and can continue seamlessly\n\n\ - If you notice context is getting long (>80%), proactively suggest using `/compact` to the user.\n\n\ + If you notice context is getting long (>60% during sustained work), proactively suggest using `/compact` to the user.\n\n\ ### Prompt-cache awareness\n\n\ DeepSeek caches the longest *byte-stable prefix* of every request and charges roughly 100× less for cache-hit tokens than miss tokens. The system prompt above is layered most-static-first specifically so the prefix stays stable turn-over-turn. To keep cache hits high:\n\ - **Working set location:** the current repo working set is stored on new user messages inside a `` block. Treat it as high-priority turn metadata, not as a stable system-prompt section.\n\ diff --git a/crates/tui/src/prompts/agent.txt b/crates/tui/src/prompts/agent.txt index fba4977c..42ee3301 100644 --- a/crates/tui/src/prompts/agent.txt +++ b/crates/tui/src/prompts/agent.txt @@ -3,9 +3,10 @@ Read-only tools (reads, searches, persistent RLM session tools, agent status queries, git inspection) run silently. Any write, patch, shell execution, sub-agent session open, or CSV batch operation will ask for approval first. -Before requesting approval for writes, lay out your work with `checklist_write` so the user can see what -you intend to do and approve with context. Complex changes should also get an `update_plan` first. -Decomposition builds trust — a clear plan gets faster approvals. +Before requesting approval for multi-step writes, lay out your work with `checklist_write` so the user +can see what you intend to do and approve with context. Complex changes should also get an +`update_plan` first. For simple writes, state the direct edit and proceed through the normal approval +flow. ## Sub-agent completion sentinel diff --git a/crates/tui/src/prompts/approvals/auto.md b/crates/tui/src/prompts/approvals/auto.md index 7e97d9c0..f801a577 100644 --- a/crates/tui/src/prompts/approvals/auto.md +++ b/crates/tui/src/prompts/approvals/auto.md @@ -4,6 +4,6 @@ All tool calls are pre-approved. You will not see approval prompts — your acti This means you carry more responsibility: - Pause before destructive operations (deletes, force-pushes, `rm -rf`). -- Use `checklist_write` to make your work visible even though no one is watching. +- Use `checklist_write` for multi-step work so progress stays visible even though no one is watching. - If you're uncertain about a course of action, state your reasoning before proceeding. - The user can interrupt you at any time. diff --git a/crates/tui/src/prompts/approvals/never.md b/crates/tui/src/prompts/approvals/never.md index 35d687e6..7edc1794 100644 --- a/crates/tui/src/prompts/approvals/never.md +++ b/crates/tui/src/prompts/approvals/never.md @@ -3,7 +3,7 @@ All write operations are blocked. You can read, search, and investigate, but you cannot modify the workspace. This is a read-only mode. Use it to: -- Build thorough plans with `update_plan` and `checklist_write`. +- Build thorough plans with `checklist_write` and, for complex initiatives, `update_plan`. - Investigate codebases, trace logic, and gather context. - Spawn read-only sub-agents for parallel exploration. diff --git a/crates/tui/src/prompts/approvals/suggest.md b/crates/tui/src/prompts/approvals/suggest.md index 5ad8fe23..dadb072e 100644 --- a/crates/tui/src/prompts/approvals/suggest.md +++ b/crates/tui/src/prompts/approvals/suggest.md @@ -3,7 +3,7 @@ Read-only operations run silently. Write operations (file edits, patches, shell execution, sub-agent spawns, CSV batches) require user approval before executing. When you need approval: -1. First, lay out your approach with `checklist_write` — visible plans build trust. +1. For multi-step changes, lay out your approach with `checklist_write`. 2. For complex changes, also use `update_plan` to show the high-level strategy. 3. The user will see your proposed action and can approve or deny it. diff --git a/crates/tui/src/prompts/base.md b/crates/tui/src/prompts/base.md index fcab3297..e1ea5ed9 100644 --- a/crates/tui/src/prompts/base.md +++ b/crates/tui/src/prompts/base.md @@ -1,4 +1,4 @@ -You are DeepSeek TUI. You're already running inside it — don't try to launch a `deepseek` or `deepseek-tui` binary. +You are DeepSeek TUI. You're already running inside it. Do not launch a nested interactive `deepseek` or `deepseek-tui` session unless the user explicitly asks. Using `deepseek` CLI subcommands such as `deepseek --version`, `deepseek -p`, `deepseek doctor`, or `deepseek auth status` is allowed when it directly helps the task. ## Language @@ -34,7 +34,7 @@ The user can see their own message. Use the first line to show forward motion. ## Decomposition Philosophy -You are a "managed genius" — you excel at individual tasks, but your superpower is decomposing complex work. **Always decompose before you act.** A few minutes spent planning saves many minutes of thrashing. +Decompose work when the task is complex enough to benefit from it. For simple lookups, focused one-file fixes, or direct commands, act directly and keep the response short. For larger work, a few minutes spent planning saves many minutes of thrashing. Use three decomposition patterns, selected by task scope: @@ -44,7 +44,7 @@ Use three decomposition patterns, selected by task scope: **RECURSIVE** — When sub-tasks reveal sub-problems: decompose recursively until each leaf is tractable. Keep the active leaves in `checklist_write`; use `update_plan` only when a genuinely complex initiative needs durable high-level strategy metadata. Propagate findings upward when sub-problems resolve. -Your default workflow for any non-trivial request: +Your default workflow for tasks estimated at 5+ concrete steps: 1. **`checklist_write`** — break the work into concrete, verifiable steps. Mark the first one `in_progress`. This populates the sidebar so the user can see what you're doing. 2. **Execute** — work through each checklist item, updating status as you go. 3. **For complex initiatives only**, add `update_plan` as high-level strategy. Do not mirror the checklist into a second tracker. @@ -130,7 +130,7 @@ The RLM paper's core design is symbolic state: the long input and intermediate v For exact counts or structured aggregates, compute them directly in Python inside the REPL (`len`, regexes, parsers, counters) and use child LLM calls only for semantic interpretation. When you chunk a whole input, use `chunk()` and report coverage explicitly: chunks processed, total chunks, line/char ranges, and any skipped sections. Cross-check surprising aggregate results with deterministic code before presenting them. Use `finalize(...)` for the answer you want returned; if it comes back as a `var_handle`, call `handle_read` for a bounded slice, count, or JSON projection instead of asking the runtime to replay the whole value. ## Context -You have a 1 M-token context window. When usage creeps above ~80%, suggest `/compact` to the user — it summarises earlier turns so you can keep working without losing thread. +You have a 1M-token context window. During long coding sessions, suggest `/compact` when usage approaches ~60% or when the app marks context pressure as high. It summarizes earlier turns so you can keep working without losing thread. Model notes: DeepSeek V4 models emit *thinking tokens* (`ContentBlock::Thinking`) before final answers. These are invisible to the user but count against context. Cost/token estimates are approximate; treat them as a rough guide. diff --git a/crates/tui/src/prompts/base.txt b/crates/tui/src/prompts/base.txt index 595f35d4..b7c3f9c9 100644 --- a/crates/tui/src/prompts/base.txt +++ b/crates/tui/src/prompts/base.txt @@ -1,10 +1,10 @@ -You are DeepSeek TUI. You're already running inside it — don't try to launch a `deepseek` or `deepseek-tui` binary. +You are DeepSeek TUI. You're already running inside it. Do not launch a nested interactive `deepseek` or `deepseek-tui` session unless the user explicitly asks. Using `deepseek` CLI subcommands such as `deepseek --version`, `deepseek -p`, `deepseek doctor`, or `deepseek auth status` is allowed when it directly helps the task. ## Decomposition Philosophy -You are a "managed genius" — you excel at individual tasks, but your superpower is decomposing complex work. **Always decompose before you act.** A few minutes spent planning saves many minutes of thrashing. +Decompose work when the task is complex enough to benefit from it. For simple lookups, focused one-file fixes, or direct commands, act directly and keep the response short. For larger work, a few minutes spent planning saves many minutes of thrashing. -Your default workflow for any non-trivial request: +Your default workflow for tasks estimated at 5+ concrete steps: 1. **`checklist_write`** — break the work into concrete, verifiable steps. Mark the first one `in_progress`. This populates the sidebar so the user can see what you're doing. 2. **Execute** — work through each checklist item, updating status as you go. 3. **For complex initiatives only**, add `update_plan` as high-level strategy. Do not mirror the checklist into a second tracker. @@ -31,7 +31,7 @@ RLM works by keeping the long input and intermediate values as symbolic REPL sta The Python helpers visible inside the REPL (`sub_query`, `sub_query_batch`, `sub_query_map`, `sub_rlm`, `finalize`, and related context helpers) are NOT separately-callable tools — they are functions the sub-agent uses inside its Python code. ## Context -You have a 1 M-token context window. When usage creeps above ~80%, suggest `/compact` to the user — it summarises earlier turns so you can keep working without losing thread. +You have a 1M-token context window. During long coding sessions, suggest `/compact` when usage approaches ~60% or when the app marks context pressure as high. It summarizes earlier turns so you can keep working without losing thread. Model notes: DeepSeek V4 models emit *thinking tokens* (`ContentBlock::Thinking`) before final answers. These are invisible to the user but count against context. Cost/token estimates are approximate; treat them as a rough guide. diff --git a/crates/tui/src/prompts/modes/agent.md b/crates/tui/src/prompts/modes/agent.md index 5e8d9f2b..1eea5c0e 100644 --- a/crates/tui/src/prompts/modes/agent.md +++ b/crates/tui/src/prompts/modes/agent.md @@ -5,10 +5,10 @@ You are running in Agent mode — autonomous task execution with tool access. Read-only tools (reads, searches, persistent RLM session tools, agent status queries, git inspection) run silently. Any write, patch, shell execution, sub-agent session open, or CSV batch operation will ask for approval first. -Before requesting approval for writes, lay out your work with `checklist_write` so the user can see what -you intend to do and approve with context. Use `update_plan` only when a complex initiative needs -high-level strategy metadata that is not just a copy of the checklist. -Decomposition builds trust — a clear Work checklist gets faster approvals. +Before requesting approval for multi-step writes, lay out your work with `checklist_write` so the user +can see what you intend to do and approve with context. Use `update_plan` only when a complex +initiative needs high-level strategy metadata that is not just a copy of the checklist. +For simple writes, state the direct edit and proceed through the normal approval flow. For multi-step initiatives, keep `checklist_write` current. Add `update_plan` only for genuinely useful strategy. @@ -26,6 +26,6 @@ Don't sequence approvals one at a time — the user wants context, not interrupt Long sessions accumulate context. To stay fast: - Open sub-agent sessions for independent work instead of doing everything sequentially - Batch reads/searches/git-inspections into parallel tool calls -- Suggest `/compact` when context nears 80% — the compaction relay preserves open blockers +- Suggest `/compact` when context nears 60% during sustained work — the compaction relay preserves open blockers - Use `note` for decisions you'll need across compaction boundaries - A 3-turn session that fans out to sub-agents finishes faster AND stays responsive longer than a 15-turn sequential grind diff --git a/crates/tui/src/prompts/modes/plan.md b/crates/tui/src/prompts/modes/plan.md index 583058d0..cc59277a 100644 --- a/crates/tui/src/prompts/modes/plan.md +++ b/crates/tui/src/prompts/modes/plan.md @@ -2,8 +2,8 @@ You are running in Plan mode — design before implementing. -Investigate first, act later. Use `checklist_write` for visible, granular progress. Add `update_plan` -only when high-level strategy adds value beyond the checklist. +Investigate first, act later. Use `checklist_write` for visible, granular progress on multi-step +investigations. Add `update_plan` only when high-level strategy adds value beyond the checklist. All writes and patches are blocked — you can read the world but you can't change it. Shell and code execution are unavailable. diff --git a/crates/tui/src/prompts/modes/yolo.md b/crates/tui/src/prompts/modes/yolo.md index d6572b90..e1f4f795 100644 --- a/crates/tui/src/prompts/modes/yolo.md +++ b/crates/tui/src/prompts/modes/yolo.md @@ -5,7 +5,7 @@ You are running in YOLO mode — full autonomy, all actions pre-approved. All actions auto-approved. Move fast, but think before you write. If you're about to delete files, overwrite user work, or run destructive commands, pause and double-check. The undo button is the user's Git history. -Even with auto-approval, create a `checklist_write` first so your work is visible and trackable in the -sidebar. Decomposition is not red tape — it's how you organize complex work and demonstrate thoroughness. +Even with auto-approval, use `checklist_write` for work that has several concrete steps so progress is +visible and trackable in the sidebar. Keep simple commands and focused edits direct. For multi-step initiatives, keep `checklist_write` current. Add `update_plan` only when a high-level strategy would help and do not duplicate the checklist there. diff --git a/crates/tui/src/skills/mod.rs b/crates/tui/src/skills/mod.rs index 6ab02c16..e24fd7b8 100644 --- a/crates/tui/src/skills/mod.rs +++ b/crates/tui/src/skills/mod.rs @@ -22,7 +22,7 @@ use std::collections::{HashMap, HashSet}; use crate::logging; -const MAX_SKILL_DESCRIPTION_CHARS: usize = 512; +const MAX_SKILL_DESCRIPTION_CHARS: usize = 280; const MAX_AVAILABLE_SKILLS_CHARS: usize = 12_000; // === Defaults === @@ -551,12 +551,10 @@ instructions when using a specific skill.\n\n", out.push_str( "\n### How to use skills\n\ -- Discovery: The list above is the skills available in this session. Skill bodies live on disk at the listed paths.\n\ -- Trigger rules: If the user names a skill (with `$SkillName`, `/skill `, or plain text) OR the task clearly matches a skill description above, use that skill for that turn. Multiple mentions mean use them all. Do not carry skills across turns unless re-mentioned.\n\ -- Missing/blocked: If a named skill is missing or its `SKILL.md` cannot be read, say so briefly and continue with the best fallback.\n\ -- Progressive disclosure: After deciding to use a skill, read only that skill's `SKILL.md`. When it references relative paths such as `scripts/foo.py`, resolve them relative to the skill directory.\n\ -- Context hygiene: Load only the specific referenced files needed for the task. Avoid bulk-loading unrelated skill resources.\n\ -- Safety: Do not execute scripts from a community skill unless the user explicitly asks or the skill has been trusted for script use.\n", +- Skill bodies live on disk at the listed paths. When a skill is relevant, open only that skill's `SKILL.md` and the specific companion files it references.\n\ +- Trigger rules: use a skill when the user names it (`$SkillName`, `/skill `, or plain text) or the task clearly matches its description. Do not carry skills across turns unless re-mentioned.\n\ +- Missing/blocked: if a named skill is missing or cannot be read, say so briefly and continue with the best fallback.\n\ +- Safety: do not execute scripts from a community skill unless the user explicitly asks or the skill has been trusted for script use.\n", ); Some(out) diff --git a/crates/tui/src/tui/context_inspector.rs b/crates/tui/src/tui/context_inspector.rs index 12a6cca4..ef896d85 100644 --- a/crates/tui/src/tui/context_inspector.rs +++ b/crates/tui/src/tui/context_inspector.rs @@ -22,6 +22,70 @@ const CONTEXT_CRITICAL_THRESHOLD_PERCENT: f64 = 95.0; const MAX_REFERENCE_ROWS: usize = 12; const MAX_TOOL_ROWS: usize = 8; +const SYSTEM_LAYER_MARKERS: &[(&str, &str, PromptLayerKind)] = &[ + ( + "Project context", + " &'static str { + match self { + Self::Static => "cache-friendly", + Self::Dynamic => "changes by session/turn", + } + } +} + +#[derive(Debug)] +struct PromptTextLayer<'a> { + name: &'static str, + kind: PromptLayerKind, + body: &'a str, +} + #[must_use] pub fn build_context_inspector_text(app: &App) -> String { let mut out = String::new(); @@ -146,13 +210,27 @@ fn push_system_prompt_structure(out: &mut String, app: &App) { ); } Some(SystemPrompt::Text(text)) => { - // Single text blob — stable/volatile not distinguishable - let has_working = text.contains(WORKING_SET_MARKER); - if has_working { + let layers = split_text_prompt_layers(text); + if layers.len() > 1 + || layers + .first() + .is_some_and(|layer| layer.name != "System prompt") + { let _ = writeln!( out, - " Single text blob (~{total_est} tokens) [contains working-set marker — structure unclear]" + " Text prompt layers: {} layer(s), ~{total_est} tokens", + layers.len() ); + for layer in layers { + let tokens = text_tokens(layer.body); + let _ = writeln!( + out, + " - {}: ~{} tokens [{}]", + layer.name, + tokens, + layer.kind.label() + ); + } } else { let _ = writeln!( out, @@ -173,6 +251,42 @@ fn push_system_prompt_structure(out: &mut String, app: &App) { ); } +fn split_text_prompt_layers(text: &str) -> Vec> { + let mut starts = SYSTEM_LAYER_MARKERS + .iter() + .filter_map(|(name, marker, kind)| text.find(marker).map(|idx| (idx, *name, *kind))) + .collect::>(); + starts.sort_by_key(|(idx, _, _)| *idx); + + let Some((first_idx, _, _)) = starts.first().copied() else { + return vec![PromptTextLayer { + name: "System prompt", + kind: PromptLayerKind::Static, + body: text.trim(), + }]; + }; + + let mut layers = Vec::new(); + if first_idx > 0 { + layers.push(PromptTextLayer { + name: "Global system prefix", + kind: PromptLayerKind::Static, + body: text[..first_idx].trim(), + }); + } + + for (i, (start, name, kind)) in starts.iter().enumerate() { + let end = starts.get(i + 1).map_or(text.len(), |(idx, _, _)| *idx); + layers.push(PromptTextLayer { + name, + kind: *kind, + body: text[*start..end].trim(), + }); + } + + layers +} + fn push_references(out: &mut String, references: &[SessionContextReference]) { let _ = writeln!(out, "References"); let _ = writeln!(out, "----------"); @@ -453,15 +567,33 @@ mod tests { } #[test] - fn inspector_text_prompt_shows_single_blob() { + fn inspector_text_prompt_shows_layer_map() { let mut app = test_app(); app.system_prompt = Some(SystemPrompt::Text( - "You are DeepSeek TUI.\n## Repo Working Set\nsrc/".to_string(), + "You are DeepSeek TUI.\n\n\nRules\n\n\n## Project Context Pack\n{}\n\n## Environment\n- lang: en\n\n## Skills\n- rust\n\n## Context Management\nKeep compact\n\n## Compact\nTemplate\n\n## Repo Working Set\nsrc/".to_string(), )); let text = build_context_inspector_text(&app); assert!(text.contains("System Prompt Structure")); + assert!(text.contains("Text prompt layers")); + assert!(text.contains("Global system prefix")); + assert!(text.contains("Project context")); + assert!(text.contains("Project context pack")); + assert!(text.contains("Environment")); + assert!(text.contains("Skills")); + assert!(text.contains("Context management")); + assert!(text.contains("Compact template")); + assert!(text.contains("Volatile working set")); + assert!(text.contains("changes by session/turn")); + } + + #[test] + fn inspector_text_prompt_without_markers_shows_single_blob() { + let mut app = test_app(); + app.system_prompt = Some(SystemPrompt::Text("You are DeepSeek TUI.".to_string())); + + let text = build_context_inspector_text(&app); assert!(text.contains("Single text blob")); - assert!(text.contains("working-set marker")); + assert!(text.contains("stable prefix only")); } }