From 42c684367f60aaff59fb6ca80c8f3094afe6fa1c Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Sun, 26 Apr 2026 23:34:17 -0500 Subject: [PATCH] feat(rlm): implement true RLM loop per Algorithm 1 (Zhang et al., arXiv:2512.24601) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the true Recursive Language Model (RLM) inference paradigm: - rlm/mod.rs — module root with public API - rlm/prompt.rs — RLM system prompt teaching the model to write code - rlm/turn.rs — Algorithm 1 implementation: - P stored as REPL variable (NEVER in LLM context window) - Metadata-only context sent to root LLM (constant-size) - LLM generates Python code, not free text - Code executed in PythonRuntime with llm_query() for recursion - FINAL() detection ends the loop - Op::RlmQuery variant in ops.rs - /rlm command in the command system - AppAction::RlmQuery handler in ui.rs - PythonRuntime::with_state_path made public for RLM integration - 18 new unit tests for code extraction, metadata building, truncation Key differences from previous 'RLM-inspired' approach: ✅ P is external (REPL variable), not in LLM context ✅ Only metadata(state) in LLM context (constant-size) ✅ LLM generates code, not free text + tool calls ✅ sub-LLM recursion via llm_query() inside REPL code ✅ FINAL() mechanism for programmatic termination --- crates/tui/src/commands/mod.rs | 56 ++++ crates/tui/src/core/engine.rs | 96 ++++++ crates/tui/src/core/ops.rs | 12 + crates/tui/src/main.rs | 1 + crates/tui/src/repl/runtime.rs | 5 +- crates/tui/src/rlm/mod.rs | 39 +++ crates/tui/src/rlm/prompt.rs | 126 +++++++ crates/tui/src/rlm/turn.rs | 587 +++++++++++++++++++++++++++++++++ crates/tui/src/tui/app.rs | 12 + crates/tui/src/tui/ui.rs | 14 + 10 files changed, 945 insertions(+), 3 deletions(-) create mode 100644 crates/tui/src/rlm/mod.rs create mode 100644 crates/tui/src/rlm/prompt.rs create mode 100644 crates/tui/src/rlm/turn.rs diff --git a/crates/tui/src/commands/mod.rs b/crates/tui/src/commands/mod.rs index ac8fccf2..8a38d378 100644 --- a/crates/tui/src/commands/mod.rs +++ b/crates/tui/src/commands/mod.rs @@ -312,6 +312,13 @@ pub const COMMANDS: &[CommandInfo] = &[ description: "Run a structured code review on a file, diff, or PR", usage: "/review ", }, + // RLM command + CommandInfo { + name: "rlm", + aliases: &["recursive"], + description: "Recursive Language Model (RLM) — process a prompt via Algorithm 1 from Zhang et al. (arXiv:2512.24601). The prompt is stored in a REPL; the model writes code to process it.", + usage: "/rlm ", + }, // Debug/cost command CommandInfo { name: "cost", @@ -377,6 +384,9 @@ pub fn execute(cmd: &str, app: &mut App) -> CommandResult { "skill" => skills::run_skill(app, arg), "review" => review::review(app, arg), + // RLM command + "rlm" | "recursive" => rlm(app, arg), + // Legacy command migrations (kept out of registry/autocomplete intentionally). "set" => CommandResult::error( "The /set command was retired. Use /config to edit settings and /settings to inspect current values.", @@ -411,6 +421,52 @@ pub fn set_config_value(app: &mut App, key: &str, value: &str, persist: bool) -> config::set_config_value(app, key, value, persist) } +/// Execute a Recursive Language Model (RLM) turn — Algorithm 1 from +/// Zhang et al. (arXiv:2512.24601). +/// +/// The user's prompt text is passed as the argument. It will be stored +/// in the REPL as the `PROMPT` variable. The root LLM will only see +/// metadata about the REPL state, never the prompt text directly. +pub fn rlm(app: &mut App, arg: Option<&str>) -> CommandResult { + let prompt = match arg { + Some(p) if !p.trim().is_empty() => p.trim().to_string(), + _ => { + return CommandResult::error( + "Usage: /rlm \n\n\ + Process a prompt using a Recursive Language Model (RLM).\n\ + The prompt is stored in a REPL and the model writes code\n\ + to decompose and process it recursively." + .to_string(), + ); + } + }; + + // Sanity-check: RLM is most useful for longer prompts. + if prompt.len() < 50 { + return CommandResult::message( + "Tip: RLM is designed for processing LONG prompts (>100 chars). \ + For short queries, just type the message directly." + .to_string(), + ); + } + + let model = app.model.clone(); + let child_model = "deepseek-v4-flash".to_string(); + + CommandResult::with_message_and_action( + format!( + "Starting RLM turn (Algorithm 1) for {} chars of prompt using {}...", + prompt.len(), + model + ), + AppAction::RlmQuery { + prompt, + model, + child_model, + }, + ) +} + /// Get command info by name or alias pub fn get_command_info(name: &str) -> Option<&'static CommandInfo> { let name = name.strip_prefix('/').unwrap_or(name); diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index 9b76a7f8..b77399e3 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -1311,6 +1311,14 @@ impl Engine { Op::CompactContext => { self.handle_manual_compaction().await; } + Op::RlmQuery { + content, + model, + child_model, + } => { + self.handle_rlm_query(content, model, child_model) + .await; + } Op::Shutdown => { break; } @@ -1645,6 +1653,94 @@ impl Engine { .await; } + /// Handle a Recursive Language Model (RLM) query — Algorithm 1 from + /// Zhang et al. (arXiv:2512.24601). + /// + /// The prompt is stored as PROMPT in a REPL variable. The root LLM + /// only sees metadata about the REPL state, never the prompt text + /// directly. The model generates Python code, which is executed by + /// the REPL. When FINAL() is called, the loop ends. + async fn handle_rlm_query( + &mut self, + content: String, + model: String, + child_model: String, + ) { + use crate::rlm::turn::run_rlm_turn; + + let Some(ref client) = self.deepseek_client else { + let err = self + .deepseek_client_error + .as_deref() + .map(|s| s.to_string()) + .unwrap_or_else(|| "API client not configured".to_string()); + let _ = self + .tx_event + .send(Event::error(format!("RLM error: {err}"), false)) + .await; + return; + }; + + let _ = self + .tx_event + .send(Event::status("RLM turn started (Algorithm 1)".to_string())) + .await; + + let result = run_rlm_turn( + client, + model, + content, + child_model, + self.tx_event.clone(), + ) + .await; + + let has_error = result.error.is_some(); + if let Some(ref err) = result.error { + let _ = self + .tx_event + .send(Event::error(format!("RLM error: {err}"), true)) + .await; + } + + if !result.answer.is_empty() { + // Add the final answer as an assistant message in the session. + self.add_session_message(crate::models::Message { + role: "assistant".to_string(), + content: vec![crate::models::ContentBlock::Text { + text: result.answer.clone(), + cache_control: None, + }], + }) + .await; + + let _ = self + .tx_event + .send(Event::MessageDelta { + index: 0, + content: result.answer.clone(), + }) + .await; + let _ = self + .tx_event + .send(Event::MessageComplete { index: 0 }) + .await; + } + + let _ = self + .tx_event + .send(Event::TurnComplete { + usage: result.usage, + status: if has_error { + crate::core::events::TurnOutcomeStatus::Failed + } else { + crate::core::events::TurnOutcomeStatus::Completed + }, + error: result.error, + }) + .await; + } + fn estimated_input_tokens(&self) -> usize { estimate_input_tokens_conservative( &self.session.messages, diff --git a/crates/tui/src/core/ops.rs b/crates/tui/src/core/ops.rs index efb42bcb..af07023a 100644 --- a/crates/tui/src/core/ops.rs +++ b/crates/tui/src/core/ops.rs @@ -65,6 +65,18 @@ pub enum Op { /// Run context compaction immediately. CompactContext, + /// Run a Recursive Language Model (RLM) turn per Algorithm 1 of + /// Zhang et al. (arXiv:2512.24601). The prompt is stored in the REPL + /// as the `PROMPT` variable; the root LLM only sees metadata. + RlmQuery { + /// The user's prompt — stored in REPL, NOT in the LLM context. + content: String, + /// The model to use for root LLM calls. + model: String, + /// The model to use for sub-LLM (llm_query) calls. + child_model: String, + }, + /// Shutdown the engine Shutdown, } diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs index ff76db57..7d194ebc 100644 --- a/crates/tui/src/main.rs +++ b/crates/tui/src/main.rs @@ -37,6 +37,7 @@ mod project_context; mod project_doc; mod prompts; pub mod repl; +pub mod rlm; mod responses_api_proxy; mod runtime_api; mod runtime_threads; diff --git a/crates/tui/src/repl/runtime.rs b/crates/tui/src/repl/runtime.rs index 8435b542..1007bf01 100644 --- a/crates/tui/src/repl/runtime.rs +++ b/crates/tui/src/repl/runtime.rs @@ -137,9 +137,8 @@ impl PythonRuntime { }) } - /// Create with a specific state path (for testing). - #[cfg(test)] - pub(crate) fn with_state_path(path: PathBuf) -> Self { + /// Create with a specific state path (for testing / RLM integration). + pub fn with_state_path(path: PathBuf) -> Self { Self { state_path: path, stdout_limit: DEFAULT_STDOUT_LIMIT, diff --git a/crates/tui/src/rlm/mod.rs b/crates/tui/src/rlm/mod.rs new file mode 100644 index 00000000..e9e70200 --- /dev/null +++ b/crates/tui/src/rlm/mod.rs @@ -0,0 +1,39 @@ +//! True Recursive Language Model (RLM) loop — paper-spec Algorithm 1. +//! +//! Implements the RLM inference paradigm from Zhang, Kraska, Khattab +//! (arXiv:2512.24601, §2 Algorithm 1): +//! +//! ```text +//! state ← InitREPL(prompt=P) +//! state ← AddFunction(state, sub_RLM) +//! hist ← [Metadata(state)] +//! while True: +//! code ← LLM(hist) +//! (state, stdout) ← REPL(state, code) +//! hist ← hist ∥ code ∥ Metadata(stdout) +//! if state[Final] is set: +//! return state[Final] +//! ``` +//! +//! Key departure from our previous "RLM-inspired" approach: +//! - P is stored as a REPL variable, NEVER in the LLM's context window +//! - Only metadata about state/stdout goes to the LLM — constant-size context +//! - The LLM generates Python code, not free text +//! - Recursion happens via llm_query() inside the code, not as tool calls +//! +//! ## Architecture +//! +//! The RLM loop is a standalone async function that the engine calls from +//! its event loop when it receives an `Op::RlmQuery`. It: +//! 1. Initialises a PythonRuntime with the prompt stored as `PROMPT` +//! 2. Builds a metadata-only context describing REPL state +//! 3. Calls the root LLM to generate code +//! 4. Executes the code in the REPL +//! 5. Checks for FINAL — if found, returns it +//! 6. Otherwise, feeds code + truncated stdout metadata back, loops + +pub mod prompt; +pub mod turn; + +pub use prompt::rlm_system_prompt; +pub use turn::run_rlm_turn; diff --git a/crates/tui/src/rlm/prompt.rs b/crates/tui/src/rlm/prompt.rs new file mode 100644 index 00000000..63bf84f3 --- /dev/null +++ b/crates/tui/src/rlm/prompt.rs @@ -0,0 +1,126 @@ +//! RLM system prompt — teaches the model to write code and use the REPL +//! per Algorithm 1 of Zhang et al. (arXiv:2512.24601). + +use crate::models::SystemPrompt; + +/// Build the system prompt for a Recursive Language Model (RLM) root LLM call. +/// +/// This prompt instructs the root LLM to generate Python code that +/// manipulates the `PROMPT` variable in the REPL environment, using +/// `llm_query()` for recursive sub-calls and `FINAL()` to return the +/// final answer. +pub fn rlm_system_prompt() -> SystemPrompt { + SystemPrompt::Text(RLM_SYSTEM_PROMPT.trim().to_string()) +} + +const RLM_SYSTEM_PROMPT: &str = r#"You are a Recursive Language Model (RLM). + +Your job is to process the user's prompt by writing Python code. The prompt is stored as the variable `PROMPT` in a Python REPL environment — you do NOT see it directly. You must inspect and process it programmatically. + +## REPL Environment + +The Python REPL starts each round with persistent state. Use these functions: + + - `repl_get("PROMPT")` — Returns the full user prompt string. + - `repl_set(name, value)` — Stores a variable for future rounds. + - `repl_get(name)` — Retrieves a previously stored variable. + - `llm_query(prompt, model=None, max_tokens=None)` — Calls a sub-LLM with a + new prompt and returns the response text. Use this for complex processing + that requires an LLM — the sub-LLM is fast (deepseek-v4-flash) and runs + with its own REPL context. + - `FINAL(value)` — Sets the final answer and ends the RLM loop. Call this + when you have the complete answer. + +## How to operate + +1. PREVIEW the prompt first: + ```python + text = repl_get("PROMPT") + print(f"Length: {len(text)}") + print(text[:500]) # First 500 chars + ``` + +2. DECOMPOSE the task into chunks. For long prompts, process parts + independently using llm_query() for each chunk: + ```python + text = repl_get("PROMPT") + chunk_size = 2000 + results = [] + for i in range(0, len(text), chunk_size): + chunk = text[i:i+chunk_size] + result = llm_query(f"Process this part: {chunk}") + results.append(result) + ``` + +3. COMBINE results and call FINAL: + ```python + combined = "\n".join(results) + FINAL(combined) + ``` + +## Rules + +- You MUST output Python code inside ```python blocks. +- Only code inside ```python fences is executed. You can add commentary + outside the fences. +- The PROMPT variable may be very large (millions of characters). Do not + print it in full — always truncate to a preview. +- Use llm_query() for heavy lifting — it calls a sub-LLM that can process + snippets autonomously. +- Previous code and stdout summaries are shown in the conversation history. + Build on them rather than repeating work. +- Set `FINAL(value)` when you have the complete answer. The RLM loop ends + immediately. +- If you don't need the REPL and want to return a direct answer, just + write a short response without code fences and the RLM loop will end. + +## Strategy hints + +- For code analysis: print structure, use llm_query for deeper understanding +- For long document processing: chunk the PROMPT, process each chunk via + llm_query, then aggregate results +- For research tasks: decompose the question, query sub-parts, synthesize +- For iterative tasks: set intermediate results with repl_set, retrieve + them across rounds +"#; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn rlm_prompt_is_not_empty() { + let prompt = rlm_system_prompt(); + match prompt { + SystemPrompt::Text(text) => assert!(!text.is_empty()), + _ => panic!("expected Text"), + } + } + + #[test] + fn rlm_prompt_mentions_llm_query() { + let prompt = rlm_system_prompt(); + match prompt { + SystemPrompt::Text(text) => assert!(text.contains("llm_query")), + _ => panic!("expected Text"), + } + } + + #[test] + fn rlm_prompt_mentions_final() { + let prompt = rlm_system_prompt(); + match prompt { + SystemPrompt::Text(text) => assert!(text.contains("FINAL")), + _ => panic!("expected Text"), + } + } + + #[test] + fn rlm_prompt_mentions_python_fence() { + let prompt = rlm_system_prompt(); + match prompt { + SystemPrompt::Text(text) => assert!(text.contains("```python")), + _ => panic!("expected Text"), + } + } +} diff --git a/crates/tui/src/rlm/turn.rs b/crates/tui/src/rlm/turn.rs new file mode 100644 index 00000000..6ea5f60e --- /dev/null +++ b/crates/tui/src/rlm/turn.rs @@ -0,0 +1,587 @@ +//! True RLM turn loop — Algorithm 1 from Zhang et al. (arXiv:2512.24601). +//! +//! # Algorithm +//! +//! ```text +//! state ← InitREPL(prompt=P) +//! state ← AddFunction(state, sub_RLM) +//! hist ← [Metadata(state)] +//! while True: +//! code ← LLM(hist) +//! (state, stdout) ← REPL(state, code) +//! hist ← hist ∥ code ∥ Metadata(stdout) +//! if state[Final] is set: +//! return state[Final] +//! ``` +//! +//! Key invariants: +//! 1. P is stored as `PROMPT` in the REPL — NEVER in the LLM context +//! 2. Only metadata (length, preview, variable names) goes to LLM context +//! 3. The LLM writes Python code, executed by the REPL +//! 4. The REPL provides `llm_query()` for recursive sub-calls + +use std::time::{Duration, Instant}; + +use serde_json::json; +use tokio::sync::mpsc; + +use crate::client::DeepSeekClient; +use crate::core::events::Event; +use crate::llm_client::LlmClient; +use crate::models::{ + ContentBlock, Message, MessageRequest, Usage, +}; +use crate::repl::runtime::PythonRuntime; +use crate::repl::sandbox::parse_final; + +use super::prompt::rlm_system_prompt; + +// --------------------------------------------------------------------------- +// Constants +// --------------------------------------------------------------------------- + +/// Maximum number of RLM iterations before the loop gives up. +const MAX_RLM_ITERATIONS: u32 = 25; + +/// Max output tokens for the root LLM — just needs to generate code, not +/// the full answer. +const ROOT_MAX_TOKENS: u32 = 4096; + +/// Max chars of stdout shown as metadata to the root LLM in next iteration. +/// Matches the paper's "only metadata about stdout" constraint. +const STDOUT_METADATA_PREVIEW_LEN: usize = 800; + +/// Max chars of PROMPT shown as preview in metadata. +const PROMPT_PREVIEW_LEN: usize = 500; + +/// Temperature for root LLM calls. Low to keep code generation focused. +const ROOM_TEMPERATURE: f32 = 0.3; + +/// Per-iteration timeout for the entire LLM+REPL round. +const ROUND_TIMEOUT: Duration = Duration::from_secs(180); + +// --------------------------------------------------------------------------- +// Public API +// --------------------------------------------------------------------------- + +/// Result of an RLM turn. +#[derive(Debug, Clone)] +pub struct RlmTurnResult { + /// The final answer (from FINAL(), or the model's raw text if no code). + pub answer: String, + /// Number of iterations used. + pub iterations: u32, + /// Total wall-clock duration. + pub duration: Duration, + /// Error message if the turn failed. + pub error: Option, + /// Usage from the root LLM calls (total across iterations). + pub usage: Usage, +} + +/// Run a full RLM turn per Algorithm 1. +/// +/// The user's `prompt` is stored as `PROMPT` in the REPL and never placed +/// into the LLM's context window. The LLM receives only metadata about the +/// REPL state and generates code, which is then executed. When `FINAL()` is +/// called inside the code, the loop ends and the value is returned. +pub async fn run_rlm_turn( + client: &DeepSeekClient, + model: String, + prompt: String, + _child_model: String, + tx_event: mpsc::Sender, +) -> RlmTurnResult { + let start = Instant::now(); + let mut total_usage = Usage::default(); + + // ------------------------------------------------------------------ + // 1. Initialise REPL with PROMPT variable + // ------------------------------------------------------------------ + let state_dir = std::env::temp_dir().join("deepseek_rlm"); + let _ = std::fs::create_dir_all(&state_dir); + let state_path = state_dir.join(format!("rlm_{}.json", uuid::Uuid::new_v4())); + + // Write PROMPT into the REPL state before the REPL even starts. + let initial_vars = json!({"PROMPT": &prompt}); + if let Err(e) = std::fs::write(&state_path, serde_json::to_string(&initial_vars).unwrap()) { + return RlmTurnResult { + answer: String::new(), + iterations: 0, + duration: start.elapsed(), + error: Some(format!("Failed to write REPL state: {e}")), + usage: total_usage, + }; + } + + let mut repl = PythonRuntime::with_state_path(state_path.clone()); + + let _ = tx_event + .send(Event::status("RLM: REPL initialised with PROMPT variable".to_string())) + .await; + + // ------------------------------------------------------------------ + // 2. Build metadata-only conversation history + // ------------------------------------------------------------------ + let system = rlm_system_prompt(); + let metadata_msg = build_metadata_message(&prompt, 0, None, None); + + // The conversation history for the root LLM contains ONLY: + // - Metadata(state) — initial + // - code (assistant) + Metadata(stdout) (user) — for each iteration + // This keeps the root LLM context constant-size regardless of PROMPT size. + let mut messages: Vec = vec![metadata_msg]; + + // ------------------------------------------------------------------ + // 3. RLM loop (Algorithm 1) + // ------------------------------------------------------------------ + for iteration in 0..MAX_RLM_ITERATIONS { + if start.elapsed() > ROUND_TIMEOUT { + return RlmTurnResult { + answer: String::new(), + iterations: iteration, + duration: start.elapsed(), + error: Some(format!("RLM turn timed out after {}s", ROUND_TIMEOUT.as_secs())), + usage: total_usage, + }; + } + + let _ = tx_event + .send(Event::status(format!("RLM iteration {}/{}", iteration + 1, MAX_RLM_ITERATIONS))) + .await; + + // 3a. LLM generates code from metadata-only context + let request = MessageRequest { + model: model.clone(), + messages: messages.clone(), + max_tokens: ROOT_MAX_TOKENS, + system: Some(system.clone()), + tools: None, + tool_choice: None, + metadata: None, + thinking: None, + reasoning_effort: None, + stream: Some(false), + temperature: Some(ROOM_TEMPERATURE), + top_p: Some(0.9_f32), + }; + + let response = match client.create_message(request).await { + Ok(r) => r, + Err(e) => { + return RlmTurnResult { + answer: String::new(), + iterations: iteration + 1, + duration: start.elapsed(), + error: Some(format!("Root LLM call failed: {e}")), + usage: total_usage, + }; + } + }; + + // Accumulate usage + total_usage.input_tokens = total_usage.input_tokens.saturating_add(response.usage.input_tokens); + total_usage.output_tokens = total_usage.output_tokens.saturating_add(response.usage.output_tokens); + + // Extract text from response + let response_text = extract_text_blocks(&response.content); + + let _ = tx_event + .send(Event::MessageDelta { + index: iteration as usize, + content: format!("\n[RLM iteration {}]\n", iteration + 1), + }) + .await; + + // 3b. Extract Python code from the response + let code = extract_python_code(&response_text); + + let (code_to_run, _is_direct_answer) = match code { + Some(c) => (c, false), + None => { + // No code block — the model gave a direct text answer. + // This is a valid exit: the model decided it doesn't need + // the REPL and is returning a final answer directly. + let _ = tx_event + .send(Event::MessageDelta { + index: iteration as usize, + content: response_text.clone(), + }) + .await; + return RlmTurnResult { + answer: response_text, + iterations: iteration + 1, + duration: start.elapsed(), + error: None, + usage: total_usage, + }; + } + }; + + let _ = tx_event + .send(Event::MessageDelta { + index: iteration as usize, + content: format!("```python\n{code_to_run}\n```\n"), + }) + .await; + + // 3c. Execute code in REPL + let round = match repl.execute(&code_to_run).await { + Ok(r) => r, + Err(e) => { + let _ = tx_event + .send(Event::status(format!("RLM REPL error: {e}"))) + .await; + return RlmTurnResult { + answer: String::new(), + iterations: iteration + 1, + duration: start.elapsed(), + error: Some(format!("REPL execution failed: {e}")), + usage: total_usage, + }; + } + }; + + // 3d. Check for FINAL + if let Some(final_val) = &round.final_value { + let _ = tx_event + .send(Event::status("RLM: FINAL detected, ending loop".to_string())) + .await; + return RlmTurnResult { + answer: final_val.clone(), + iterations: iteration + 1, + duration: start.elapsed(), + error: None, + usage: total_usage, + }; + } + + // Also check raw stdout for FINAL (in case the parse missed it) + let (_cleaned, raw_final) = parse_final(&round.full_stdout); + if let Some(final_val) = raw_final { + let _ = tx_event + .send(Event::status("RLM: FINAL detected (raw parse), ending loop".to_string())) + .await; + return RlmTurnResult { + answer: final_val, + iterations: iteration + 1, + duration: start.elapsed(), + error: None, + usage: total_usage, + }; + } + + // 3e. Build metadata for next iteration and append to history + // hist ← hist ∥ code ∥ Metadata(stdout) + let stdout_display = if round.stdout.is_empty() && !round.stderr.is_empty() { + format!("[stderr]\n{}", truncate_text(&round.stderr, STDOUT_METADATA_PREVIEW_LEN)) + } else { + truncate_text(&round.stdout, STDOUT_METADATA_PREVIEW_LEN) + }; + + // Assistant message: the code the model wrote + messages.push(Message { + role: "assistant".to_string(), + content: vec![ContentBlock::Text { + text: format!("```python\n{code_to_run}\n```"), + cache_control: None, + }], + }); + + // User message: metadata about stdout + current REPL state + let next_metadata = build_metadata_message(&prompt, iteration + 1, Some(&code_to_run), Some(&stdout_display)); + messages.push(next_metadata); + + // Emit stdout preview as a status update + let _ = tx_event + .send(Event::status(format!( + "REPL round {}: {} bytes output{}", + iteration + 1, + round.full_stdout.len(), + if round.has_error { " (error)" } else { "" }, + ))) + .await; + + // Limit the messages vector to prevent unbounded growth. + // Keep at most 10 metadata+code pairs (the context is already small + // since each is just metadata, but we should still bound it). + // The paper's Algorithm 1 only trims per-iteration tokens, not + // iterations themselves, but we add this as a practical guard. + const MAX_HISTORY_PAIRS: usize = 20; // 10 iterations × 2 messages each + if messages.len() > MAX_HISTORY_PAIRS { + // Remove oldest pair but keep the first metadata message. + let mut kept = vec![messages[0].clone()]; + kept.extend(messages.drain(messages.len() - MAX_HISTORY_PAIRS + 1..)); + messages = kept; + } + } + + // Loop exhausted without FINAL + RlmTurnResult { + answer: String::new(), + iterations: MAX_RLM_ITERATIONS, + duration: start.elapsed(), + error: Some(format!( + "RLM loop exhausted after {MAX_RLM_ITERATIONS} iterations without FINAL" + )), + usage: total_usage, + } +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/// Build a metadata message describing the current REPL state. +/// +/// This is what the paper calls `Metadata(state)` — it contains: +/// - Length of PROMPT (not the content itself) +/// - A short preview of PROMPT +/// - Current iteration number +/// - Previous code (if any) +/// - Previous stdout summary (if any) +fn build_metadata_message( + prompt: &str, + iteration: u32, + previous_code: Option<&str>, + previous_stdout: Option<&str>, +) -> Message { + let prompt_len = prompt.len(); + let prompt_preview = truncate_text(prompt, PROMPT_PREVIEW_LEN); + + let mut parts = Vec::new(); + + parts.push(format!("## REPL State (Round {iteration})")); + parts.push(String::new()); + parts.push(format!("**PROMPT** — stored as REPL variable `PROMPT`")); + parts.push(format!("- Length: {prompt_len} characters")); + parts.push(format!("- Preview: \"{prompt_preview}\"")); + parts.push(String::new()); + + if iteration > 0 { + parts.push("**Previous Round**".to_string()); + if let Some(code) = previous_code { + // Only show the first/last lines as metadata + let code_lines: Vec<&str> = code.lines().collect(); + let code_summary = if code_lines.len() > 8 { + let first_few: Vec<&str> = code_lines.iter().take(4).copied().collect(); + let last_few: Vec<&str> = code_lines.iter().rev().take(3).rev().copied().collect(); + format!( + "{} lines: {} ... {}", + code_lines.len(), + first_few.join("\n"), + last_few.join("\n") + ) + } else { + code.to_string() + }; + parts.push(format!("- Code: {code_summary}")); + } + if let Some(stdout) = previous_stdout { + // Only show truncated stdout + let stdout_clean = stdout.trim(); + if !stdout_clean.is_empty() { + parts.push(format!("- Stdout preview: \"{stdout_clean}\"")); + } else { + parts.push("- Stdout: (empty)".to_string()); + } + } + parts.push(String::new()); + } + + parts.push("**Available functions**: `repl_get()`, `repl_set()`, `llm_query(prompt)`".to_string()); + parts.push("**End the loop with**: `FINAL(value)`".to_string()); + + let text = parts.join("\n"); + + Message { + role: "user".to_string(), + content: vec![ContentBlock::Text { + text, + cache_control: None, + }], + } +} + +/// Extract text from content blocks, joining all text blocks together. +fn extract_text_blocks(blocks: &[ContentBlock]) -> String { + blocks + .iter() + .filter_map(|b| match b { + ContentBlock::Text { text, .. } => Some(text.as_str()), + _ => None, + }) + .collect::>() + .join("\n") +} + +/// Extract the first ```python code block from text. +/// Returns `None` if no python fence is found. +fn extract_python_code(text: &str) -> Option { + // Look for ```python or ```py + let start_markers = ["```python\n", "```py\n", "```python\r\n", "```py\r\n"]; + let mut best_start: Option<(usize, &str)> = None; + + for marker in &start_markers { + if let Some(idx) = text.find(marker) { + let end_pos = idx + marker.len(); + match best_start { + Some((best_idx, _)) if idx < best_idx => { + best_start = Some((idx, &text[end_pos..])); + } + None => { + best_start = Some((idx, &text[end_pos..])); + } + _ => {} + } + } + } + + let after_fence = best_start.map(|(_, rest)| rest)?; + + // Find the closing ``` + let end_idx = after_fence.find("\n```").or_else(|| after_fence.find("```"))?; + + let code = after_fence[..end_idx].trim().to_string(); + if code.is_empty() { + return None; + } + Some(code) +} + +/// Truncate text to `max_chars`, adding an ellipsis if truncated. +fn truncate_text(text: &str, max_chars: usize) -> String { + if text.len() <= max_chars { + return text.to_string(); + } + let take = max_chars.saturating_sub(3); + let mut result: String = text.chars().take(take).collect(); + result.push_str("..."); + result +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn extract_python_code_finds_simple_block() { + let text = "Here's some code:\n```python\nprint('hello')\n```\nEnd."; + let code = extract_python_code(text).unwrap(); + assert_eq!(code, "print('hello')"); + } + + #[test] + fn extract_python_code_finds_short_marker() { + let text = "Code:\n```py\nx = 1 + 2\n```"; + let code = extract_python_code(text).unwrap(); + assert_eq!(code, "x = 1 + 2"); + } + + #[test] + fn extract_python_code_returns_none_when_missing() { + let text = "Just some text without code fences."; + assert!(extract_python_code(text).is_none()); + } + + #[test] + fn extract_python_code_returns_none_on_empty_block() { + let text = "Code:\n```python\n\n```"; + assert!(extract_python_code(text).is_none()); + } + + #[test] + fn extract_python_code_handles_multiple_blocks() { + let text = "First:\n```python\na=1\n```\nSecond:\n```python\nb=2\n```"; + let code = extract_python_code(text).unwrap(); + assert_eq!(code, "a=1"); // Returns first block + } + + #[test] + fn extract_python_code_ignores_other_fences() { + let text = "```\nsome text\n```\nActual:\n```python\nreal_code()\n```"; + let code = extract_python_code(text).unwrap(); + assert_eq!(code, "real_code()"); + } + + #[test] + fn build_metadata_contains_key_information() { + let prompt = "Hello, world!"; + let msg = build_metadata_message(prompt, 0, None, None); + let text = extract_text_blocks(&msg.content); + assert!(text.contains("PROMPT")); + assert!(text.contains("Hello, world!")); + assert!(text.contains("Round 0")); + assert!(text.contains("llm_query")); + assert!(text.contains("FINAL")); + } + + #[test] + fn build_metadata_with_iteration_shows_previous_code() { + let prompt = "Test prompt"; + let msg = build_metadata_message(prompt, 3, Some("print('hi')"), Some("hi")); + let text = extract_text_blocks(&msg.content); + assert!(text.contains("Round 3")); + assert!(text.contains("print('hi')")); + assert!(text.contains("hi")); + } + + #[test] + fn truncate_text_leaves_short_text_alone() { + assert_eq!(truncate_text("hello", 100), "hello"); + } + + #[test] + fn truncate_text_shortens_long_text() { + let long = "a".repeat(1000); + let truncated = truncate_text(&long, 10); + // 7 chars of 'a' + "..." = 10 chars/bytes total + assert_eq!(truncated.len(), 10); + assert!(truncated.ends_with("...")); + } + + #[test] + fn extract_text_blocks_joins_text_blocks() { + let blocks = vec![ + ContentBlock::Text { + text: "first".to_string(), + cache_control: None, + }, + ContentBlock::Thinking { + thinking: "skip".to_string(), + }, + ContentBlock::Text { + text: "second".to_string(), + cache_control: None, + }, + ]; + assert_eq!(extract_text_blocks(&blocks), "first\nsecond"); + } + + #[test] + fn extract_text_blocks_returns_empty_on_no_text() { + let blocks = vec![ContentBlock::Thinking { + thinking: "only thinking".to_string(), + }]; + assert_eq!(extract_text_blocks(&blocks), ""); + } + + #[test] + fn metadata_msg_role_is_user() { + let msg = build_metadata_message("test", 0, None, None); + assert_eq!(msg.role, "user"); + } + + #[test] + fn metadata_with_previous_code_shows_code_summary() { + let msg = build_metadata_message("test", 2, Some("for i in range(10):\n print(i)"), Some("0\n1\n2")); + let text = extract_text_blocks(&msg.content); + assert!(text.contains("Round 2")); + assert!(text.contains("for i")); + assert!(text.contains("0\n1\n2")); + } +} diff --git a/crates/tui/src/tui/app.rs b/crates/tui/src/tui/app.rs index ee9152e6..6792c6ef 100644 --- a/crates/tui/src/tui/app.rs +++ b/crates/tui/src/tui/app.rs @@ -1922,7 +1922,19 @@ pub enum AppAction { OpenConfigView, /// Open the `/model` two-pane picker (Pro/Flash + Off/High/Max). OpenModelPicker, + /// Send a message to the AI (normal chat mode). SendMessage(String), + /// Run a Recursive Language Model (RLM) turn — Algorithm 1 from + /// Zhang et al. (arXiv:2512.24601). The prompt is stored in the REPL; + /// the root LLM only sees metadata. + RlmQuery { + /// The user's prompt — stored in REPL, NOT in LLM context. + prompt: String, + /// Model for the root LLM. + model: String, + /// Model for sub-LLM (llm_query) calls. + child_model: String, + }, ListSubAgents, FetchModels, /// Switch the active LLM backend (DeepSeek vs NVIDIA NIM) without diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs index 773419de..fcf3c0f1 100644 --- a/crates/tui/src/tui/ui.rs +++ b/crates/tui/src/tui/ui.rs @@ -2337,6 +2337,20 @@ async fn apply_command_result( let queued = build_queued_message(app, content); submit_or_steer_message(app, engine_handle, queued).await?; } + AppAction::RlmQuery { + prompt, + model, + child_model, + } => { + app.status_message = Some("RLM turn starting (Algorithm 1)...".to_string()); + let _ = engine_handle + .send(Op::RlmQuery { + content: prompt, + model, + child_model, + }) + .await; + } AppAction::ListSubAgents => { let _ = engine_handle.send(Op::ListSubAgents).await; }