feat(rlm): implement true RLM loop per Algorithm 1 (Zhang et al., arXiv:2512.24601)

Adds the true Recursive Language Model (RLM) inference paradigm: - rlm/mod.rs — module root with public API - rlm/prompt.rs — RLM system prompt teaching the model to write code - rlm/turn.rs — Algorithm 1 implementation: - P stored as REPL variable (NEVER in LLM context window) - Metadata-only context sent to root LLM (constant-size) - LLM generates Python code, not free text - Code executed in PythonRuntime with llm_query() for recursion - FINAL() detection ends the loop - Op::RlmQuery variant in ops.rs - /rlm command in the command system - AppAction::RlmQuery handler in ui.rs - PythonRuntime::with_state_path made public for RLM integration - 18 new unit tests for code extraction, metadata building, truncation Key differences from previous 'RLM-inspired' approach: ✅ P is external (REPL variable), not in LLM context ✅ Only metadata(state) in LLM context (constant-size) ✅ LLM generates code, not free text + tool calls ✅ sub-LLM recursion via llm_query() inside REPL code ✅ FINAL() mechanism for programmatic termination
2026-04-26 23:34:17 -05:00
parent ac8a882be5
commit 42c684367f
10 changed files with 945 additions and 3 deletions
@@ -312,6 +312,13 @@ pub const COMMANDS: &[CommandInfo] = &[
        description: "Run a structured code review on a file, diff, or PR",
        usage: "/review <target>",
    },
+    // RLM command
+    CommandInfo {
+        name: "rlm",
+        aliases: &["recursive"],
+        description: "Recursive Language Model (RLM) — process a prompt via Algorithm 1 from Zhang et al. (arXiv:2512.24601). The prompt is stored in a REPL; the model writes code to process it.",
+        usage: "/rlm <prompt>",
+    },
    // Debug/cost command
    CommandInfo {
        name: "cost",
@@ -377,6 +384,9 @@ pub fn execute(cmd: &str, app: &mut App) -> CommandResult {
        "skill" => skills::run_skill(app, arg),
        "review" => review::review(app, arg),

+        // RLM command
+        "rlm" | "recursive" => rlm(app, arg),
+
        // Legacy command migrations (kept out of registry/autocomplete intentionally).
        "set" => CommandResult::error(
            "The /set command was retired. Use /config to edit settings and /settings to inspect current values.",
@@ -411,6 +421,52 @@ pub fn set_config_value(app: &mut App, key: &str, value: &str, persist: bool) ->
    config::set_config_value(app, key, value, persist)
 }

+/// Execute a Recursive Language Model (RLM) turn — Algorithm 1 from
+/// Zhang et al. (arXiv:2512.24601).
+///
+/// The user's prompt text is passed as the argument. It will be stored
+/// in the REPL as the `PROMPT` variable. The root LLM will only see
+/// metadata about the REPL state, never the prompt text directly.
+pub fn rlm(app: &mut App, arg: Option<&str>) -> CommandResult {
+    let prompt = match arg {
+        Some(p) if !p.trim().is_empty() => p.trim().to_string(),
+        _ => {
+            return CommandResult::error(
+                "Usage: /rlm <prompt>\n\n\
+                 Process a prompt using a Recursive Language Model (RLM).\n\
+                 The prompt is stored in a REPL and the model writes code\n\
+                 to decompose and process it recursively."
+                    .to_string(),
+            );
+        }
+    };
+
+    // Sanity-check: RLM is most useful for longer prompts.
+    if prompt.len() < 50 {
+        return CommandResult::message(
+            "Tip: RLM is designed for processing LONG prompts (>100 chars). \
+             For short queries, just type the message directly."
+                .to_string(),
+        );
+    }
+
+    let model = app.model.clone();
+    let child_model = "deepseek-v4-flash".to_string();
+
+    CommandResult::with_message_and_action(
+        format!(
+            "Starting RLM turn (Algorithm 1) for {} chars of prompt using {}...",
+            prompt.len(),
+            model
+        ),
+        AppAction::RlmQuery {
+            prompt,
+            model,
+            child_model,
+        },
+    )
+}
+
 /// Get command info by name or alias
 pub fn get_command_info(name: &str) -> Option<&'static CommandInfo> {
    let name = name.strip_prefix('/').unwrap_or(name);
@@ -1311,6 +1311,14 @@ impl Engine {
                Op::CompactContext => {
                    self.handle_manual_compaction().await;
                }
+                Op::RlmQuery {
+                    content,
+                    model,
+                    child_model,
+                } => {
+                    self.handle_rlm_query(content, model, child_model)
+                        .await;
+                }
                Op::Shutdown => {
                    break;
                }
@@ -1645,6 +1653,94 @@ impl Engine {
            .await;
    }

+    /// Handle a Recursive Language Model (RLM) query — Algorithm 1 from
+    /// Zhang et al. (arXiv:2512.24601).
+    ///
+    /// The prompt is stored as PROMPT in a REPL variable. The root LLM
+    /// only sees metadata about the REPL state, never the prompt text
+    /// directly. The model generates Python code, which is executed by
+    /// the REPL. When FINAL() is called, the loop ends.
+    async fn handle_rlm_query(
+        &mut self,
+        content: String,
+        model: String,
+        child_model: String,
+    ) {
+        use crate::rlm::turn::run_rlm_turn;
+
+        let Some(ref client) = self.deepseek_client else {
+            let err = self
+                .deepseek_client_error
+                .as_deref()
+                .map(|s| s.to_string())
+                .unwrap_or_else(|| "API client not configured".to_string());
+            let _ = self
+                .tx_event
+                .send(Event::error(format!("RLM error: {err}"), false))
+                .await;
+            return;
+        };
+
+        let _ = self
+            .tx_event
+            .send(Event::status("RLM turn started (Algorithm 1)".to_string()))
+            .await;
+
+        let result = run_rlm_turn(
+            client,
+            model,
+            content,
+            child_model,
+            self.tx_event.clone(),
+        )
+        .await;
+
+        let has_error = result.error.is_some();
+        if let Some(ref err) = result.error {
+            let _ = self
+                .tx_event
+                .send(Event::error(format!("RLM error: {err}"), true))
+                .await;
+        }
+
+        if !result.answer.is_empty() {
+            // Add the final answer as an assistant message in the session.
+            self.add_session_message(crate::models::Message {
+                role: "assistant".to_string(),
+                content: vec![crate::models::ContentBlock::Text {
+                    text: result.answer.clone(),
+                    cache_control: None,
+                }],
+            })
+            .await;
+
+            let _ = self
+                .tx_event
+                .send(Event::MessageDelta {
+                    index: 0,
+                    content: result.answer.clone(),
+                })
+                .await;
+            let _ = self
+                .tx_event
+                .send(Event::MessageComplete { index: 0 })
+                .await;
+        }
+
+        let _ = self
+            .tx_event
+            .send(Event::TurnComplete {
+                usage: result.usage,
+                status: if has_error {
+                    crate::core::events::TurnOutcomeStatus::Failed
+                } else {
+                    crate::core::events::TurnOutcomeStatus::Completed
+                },
+                error: result.error,
+            })
+            .await;
+    }
+
    fn estimated_input_tokens(&self) -> usize {
        estimate_input_tokens_conservative(
            &self.session.messages,
@@ -65,6 +65,18 @@ pub enum Op {
    /// Run context compaction immediately.
    CompactContext,

+    /// Run a Recursive Language Model (RLM) turn per Algorithm 1 of
+    /// Zhang et al. (arXiv:2512.24601). The prompt is stored in the REPL
+    /// as the `PROMPT` variable; the root LLM only sees metadata.
+    RlmQuery {
+        /// The user's prompt — stored in REPL, NOT in the LLM context.
+        content: String,
+        /// The model to use for root LLM calls.
+        model: String,
+        /// The model to use for sub-LLM (llm_query) calls.
+        child_model: String,
+    },
+
    /// Shutdown the engine
    Shutdown,
 }
@@ -37,6 +37,7 @@ mod project_context;
 mod project_doc;
 mod prompts;
 pub mod repl;
+pub mod rlm;
 mod responses_api_proxy;
 mod runtime_api;
 mod runtime_threads;
@@ -137,9 +137,8 @@ impl PythonRuntime {
        })
    }

-    /// Create with a specific state path (for testing).
-    #[cfg(test)]
-    pub(crate) fn with_state_path(path: PathBuf) -> Self {
+    /// Create with a specific state path (for testing / RLM integration).
+    pub fn with_state_path(path: PathBuf) -> Self {
        Self {
            state_path: path,
            stdout_limit: DEFAULT_STDOUT_LIMIT,
@@ -0,0 +1,39 @@
+//! True Recursive Language Model (RLM) loop — paper-spec Algorithm 1.
+//!
+//! Implements the RLM inference paradigm from Zhang, Kraska, Khattab
+//! (arXiv:2512.24601, §2 Algorithm 1):
+//!
+//! ```text
+//! state ← InitREPL(prompt=P)
+//! state ← AddFunction(state, sub_RLM)
+//! hist ← [Metadata(state)]
+//! while True:
+//!     code ← LLM(hist)
+//!     (state, stdout) ← REPL(state, code)
+//!     hist ← hist ∥ code ∥ Metadata(stdout)
+//!     if state[Final] is set:
+//!         return state[Final]
+//! ```
+//!
+//! Key departure from our previous "RLM-inspired" approach:
+//! - P is stored as a REPL variable, NEVER in the LLM's context window
+//! - Only metadata about state/stdout goes to the LLM — constant-size context
+//! - The LLM generates Python code, not free text
+//! - Recursion happens via llm_query() inside the code, not as tool calls
+//!
+//! ## Architecture
+//!
+//! The RLM loop is a standalone async function that the engine calls from
+//! its event loop when it receives an `Op::RlmQuery`. It:
+//! 1. Initialises a PythonRuntime with the prompt stored as `PROMPT`
+//! 2. Builds a metadata-only context describing REPL state
+//! 3. Calls the root LLM to generate code
+//! 4. Executes the code in the REPL
+//! 5. Checks for FINAL — if found, returns it
+//! 6. Otherwise, feeds code + truncated stdout metadata back, loops
+
+pub mod prompt;
+pub mod turn;
+
+pub use prompt::rlm_system_prompt;
+pub use turn::run_rlm_turn;
@@ -0,0 +1,126 @@
+//! RLM system prompt — teaches the model to write code and use the REPL
+//! per Algorithm 1 of Zhang et al. (arXiv:2512.24601).
+
+use crate::models::SystemPrompt;
+
+/// Build the system prompt for a Recursive Language Model (RLM) root LLM call.
+///
+/// This prompt instructs the root LLM to generate Python code that
+/// manipulates the `PROMPT` variable in the REPL environment, using
+/// `llm_query()` for recursive sub-calls and `FINAL()` to return the
+/// final answer.
+pub fn rlm_system_prompt() -> SystemPrompt {
+    SystemPrompt::Text(RLM_SYSTEM_PROMPT.trim().to_string())
+}
+
+const RLM_SYSTEM_PROMPT: &str = r#"You are a Recursive Language Model (RLM).
+
+Your job is to process the user's prompt by writing Python code. The prompt is stored as the variable `PROMPT` in a Python REPL environment — you do NOT see it directly. You must inspect and process it programmatically.
+
+## REPL Environment
+
+The Python REPL starts each round with persistent state. Use these functions:
+
+  - `repl_get("PROMPT")` — Returns the full user prompt string.
+  - `repl_set(name, value)` — Stores a variable for future rounds.
+  - `repl_get(name)` — Retrieves a previously stored variable.
+  - `llm_query(prompt, model=None, max_tokens=None)` — Calls a sub-LLM with a
+    new prompt and returns the response text. Use this for complex processing
+    that requires an LLM — the sub-LLM is fast (deepseek-v4-flash) and runs
+    with its own REPL context.
+  - `FINAL(value)` — Sets the final answer and ends the RLM loop. Call this
+    when you have the complete answer.
+
+## How to operate
+
+1. PREVIEW the prompt first:
+   ```python
+   text = repl_get("PROMPT")
+   print(f"Length: {len(text)}")
+   print(text[:500])  # First 500 chars
+   ```
+
+2. DECOMPOSE the task into chunks. For long prompts, process parts
+   independently using llm_query() for each chunk:
+   ```python
+   text = repl_get("PROMPT")
+   chunk_size = 2000
+   results = []
+   for i in range(0, len(text), chunk_size):
+       chunk = text[i:i+chunk_size]
+       result = llm_query(f"Process this part: {chunk}")
+       results.append(result)
+   ```
+
+3. COMBINE results and call FINAL:
+   ```python
+   combined = "\n".join(results)
+   FINAL(combined)
+   ```
+
+## Rules
+
+- You MUST output Python code inside ```python blocks.
+- Only code inside ```python fences is executed. You can add commentary
+  outside the fences.
+- The PROMPT variable may be very large (millions of characters). Do not
+  print it in full — always truncate to a preview.
+- Use llm_query() for heavy lifting — it calls a sub-LLM that can process
+  snippets autonomously.
+- Previous code and stdout summaries are shown in the conversation history.
+  Build on them rather than repeating work.
+- Set `FINAL(value)` when you have the complete answer. The RLM loop ends
+  immediately.
+- If you don't need the REPL and want to return a direct answer, just
+  write a short response without code fences and the RLM loop will end.
+
+## Strategy hints
+
+- For code analysis: print structure, use llm_query for deeper understanding
+- For long document processing: chunk the PROMPT, process each chunk via
+  llm_query, then aggregate results
+- For research tasks: decompose the question, query sub-parts, synthesize
+- For iterative tasks: set intermediate results with repl_set, retrieve
+  them across rounds
+"#;
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn rlm_prompt_is_not_empty() {
+        let prompt = rlm_system_prompt();
+        match prompt {
+            SystemPrompt::Text(text) => assert!(!text.is_empty()),
+            _ => panic!("expected Text"),
+        }
+    }
+
+    #[test]
+    fn rlm_prompt_mentions_llm_query() {
+        let prompt = rlm_system_prompt();
+        match prompt {
+            SystemPrompt::Text(text) => assert!(text.contains("llm_query")),
+            _ => panic!("expected Text"),
+        }
+    }
+
+    #[test]
+    fn rlm_prompt_mentions_final() {
+        let prompt = rlm_system_prompt();
+        match prompt {
+            SystemPrompt::Text(text) => assert!(text.contains("FINAL")),
+            _ => panic!("expected Text"),
+        }
+    }
+
+    #[test]
+    fn rlm_prompt_mentions_python_fence() {
+        let prompt = rlm_system_prompt();
+        match prompt {
+            SystemPrompt::Text(text) => assert!(text.contains("```python")),
+            _ => panic!("expected Text"),
+        }
+    }
+}
@@ -0,0 +1,587 @@
+//! True RLM turn loop — Algorithm 1 from Zhang et al. (arXiv:2512.24601).
+//!
+//! # Algorithm
+//!
+//! ```text
+//! state ← InitREPL(prompt=P)
+//! state ← AddFunction(state, sub_RLM)
+//! hist ← [Metadata(state)]
+//! while True:
+//!     code ← LLM(hist)
+//!     (state, stdout) ← REPL(state, code)
+//!     hist ← hist ∥ code ∥ Metadata(stdout)
+//!     if state[Final] is set:
+//!         return state[Final]
+//! ```
+//!
+//! Key invariants:
+//! 1. P is stored as `PROMPT` in the REPL — NEVER in the LLM context
+//! 2. Only metadata (length, preview, variable names) goes to LLM context
+//! 3. The LLM writes Python code, executed by the REPL
+//! 4. The REPL provides `llm_query()` for recursive sub-calls
+
+use std::time::{Duration, Instant};
+
+use serde_json::json;
+use tokio::sync::mpsc;
+
+use crate::client::DeepSeekClient;
+use crate::core::events::Event;
+use crate::llm_client::LlmClient;
+use crate::models::{
+    ContentBlock, Message, MessageRequest, Usage,
+};
+use crate::repl::runtime::PythonRuntime;
+use crate::repl::sandbox::parse_final;
+
+use super::prompt::rlm_system_prompt;
+
+// ---------------------------------------------------------------------------
+// Constants
+// ---------------------------------------------------------------------------
+
+/// Maximum number of RLM iterations before the loop gives up.
+const MAX_RLM_ITERATIONS: u32 = 25;
+
+/// Max output tokens for the root LLM — just needs to generate code, not
+/// the full answer.
+const ROOT_MAX_TOKENS: u32 = 4096;
+
+/// Max chars of stdout shown as metadata to the root LLM in next iteration.
+/// Matches the paper's "only metadata about stdout" constraint.
+const STDOUT_METADATA_PREVIEW_LEN: usize = 800;
+
+/// Max chars of PROMPT shown as preview in metadata.
+const PROMPT_PREVIEW_LEN: usize = 500;
+
+/// Temperature for root LLM calls. Low to keep code generation focused.
+const ROOM_TEMPERATURE: f32 = 0.3;
+
+/// Per-iteration timeout for the entire LLM+REPL round.
+const ROUND_TIMEOUT: Duration = Duration::from_secs(180);
+
+// ---------------------------------------------------------------------------
+// Public API
+// ---------------------------------------------------------------------------
+
+/// Result of an RLM turn.
+#[derive(Debug, Clone)]
+pub struct RlmTurnResult {
+    /// The final answer (from FINAL(), or the model's raw text if no code).
+    pub answer: String,
+    /// Number of iterations used.
+    pub iterations: u32,
+    /// Total wall-clock duration.
+    pub duration: Duration,
+    /// Error message if the turn failed.
+    pub error: Option<String>,
+    /// Usage from the root LLM calls (total across iterations).
+    pub usage: Usage,
+}
+
+/// Run a full RLM turn per Algorithm 1.
+///
+/// The user's `prompt` is stored as `PROMPT` in the REPL and never placed
+/// into the LLM's context window. The LLM receives only metadata about the
+/// REPL state and generates code, which is then executed. When `FINAL()` is
+/// called inside the code, the loop ends and the value is returned.
+pub async fn run_rlm_turn(
+    client: &DeepSeekClient,
+    model: String,
+    prompt: String,
+    _child_model: String,
+    tx_event: mpsc::Sender<Event>,
+) -> RlmTurnResult {
+    let start = Instant::now();
+    let mut total_usage = Usage::default();
+
+    // ------------------------------------------------------------------
+    // 1. Initialise REPL with PROMPT variable
+    // ------------------------------------------------------------------
+    let state_dir = std::env::temp_dir().join("deepseek_rlm");
+    let _ = std::fs::create_dir_all(&state_dir);
+    let state_path = state_dir.join(format!("rlm_{}.json", uuid::Uuid::new_v4()));
+
+    // Write PROMPT into the REPL state before the REPL even starts.
+    let initial_vars = json!({"PROMPT": &prompt});
+    if let Err(e) = std::fs::write(&state_path, serde_json::to_string(&initial_vars).unwrap()) {
+        return RlmTurnResult {
+            answer: String::new(),
+            iterations: 0,
+            duration: start.elapsed(),
+            error: Some(format!("Failed to write REPL state: {e}")),
+            usage: total_usage,
+        };
+    }
+
+    let mut repl = PythonRuntime::with_state_path(state_path.clone());
+
+    let _ = tx_event
+        .send(Event::status("RLM: REPL initialised with PROMPT variable".to_string()))
+        .await;
+
+    // ------------------------------------------------------------------
+    // 2. Build metadata-only conversation history
+    // ------------------------------------------------------------------
+    let system = rlm_system_prompt();
+    let metadata_msg = build_metadata_message(&prompt, 0, None, None);
+
+    // The conversation history for the root LLM contains ONLY:
+    //   - Metadata(state) — initial
+    //   - code (assistant) + Metadata(stdout) (user) — for each iteration
+    // This keeps the root LLM context constant-size regardless of PROMPT size.
+    let mut messages: Vec<Message> = vec![metadata_msg];
+
+    // ------------------------------------------------------------------
+    // 3. RLM loop (Algorithm 1)
+    // ------------------------------------------------------------------
+    for iteration in 0..MAX_RLM_ITERATIONS {
+        if start.elapsed() > ROUND_TIMEOUT {
+            return RlmTurnResult {
+                answer: String::new(),
+                iterations: iteration,
+                duration: start.elapsed(),
+                error: Some(format!("RLM turn timed out after {}s", ROUND_TIMEOUT.as_secs())),
+                usage: total_usage,
+            };
+        }
+
+        let _ = tx_event
+            .send(Event::status(format!("RLM iteration {}/{}", iteration + 1, MAX_RLM_ITERATIONS)))
+            .await;
+
+        // 3a. LLM generates code from metadata-only context
+        let request = MessageRequest {
+            model: model.clone(),
+            messages: messages.clone(),
+            max_tokens: ROOT_MAX_TOKENS,
+            system: Some(system.clone()),
+            tools: None,
+            tool_choice: None,
+            metadata: None,
+            thinking: None,
+            reasoning_effort: None,
+            stream: Some(false),
+            temperature: Some(ROOM_TEMPERATURE),
+            top_p: Some(0.9_f32),
+        };
+
+        let response = match client.create_message(request).await {
+            Ok(r) => r,
+            Err(e) => {
+                return RlmTurnResult {
+                    answer: String::new(),
+                    iterations: iteration + 1,
+                    duration: start.elapsed(),
+                    error: Some(format!("Root LLM call failed: {e}")),
+                    usage: total_usage,
+                };
+            }
+        };
+
+        // Accumulate usage
+        total_usage.input_tokens = total_usage.input_tokens.saturating_add(response.usage.input_tokens);
+        total_usage.output_tokens = total_usage.output_tokens.saturating_add(response.usage.output_tokens);
+
+        // Extract text from response
+        let response_text = extract_text_blocks(&response.content);
+
+        let _ = tx_event
+            .send(Event::MessageDelta {
+                index: iteration as usize,
+                content: format!("\n[RLM iteration {}]\n", iteration + 1),
+            })
+            .await;
+
+        // 3b. Extract Python code from the response
+        let code = extract_python_code(&response_text);
+
+        let (code_to_run, _is_direct_answer) = match code {
+            Some(c) => (c, false),
+            None => {
+                // No code block — the model gave a direct text answer.
+                // This is a valid exit: the model decided it doesn't need
+                // the REPL and is returning a final answer directly.
+                let _ = tx_event
+                    .send(Event::MessageDelta {
+                        index: iteration as usize,
+                        content: response_text.clone(),
+                    })
+                    .await;
+                return RlmTurnResult {
+                    answer: response_text,
+                    iterations: iteration + 1,
+                    duration: start.elapsed(),
+                    error: None,
+                    usage: total_usage,
+                };
+            }
+        };
+
+        let _ = tx_event
+            .send(Event::MessageDelta {
+                index: iteration as usize,
+                content: format!("```python\n{code_to_run}\n```\n"),
+            })
+            .await;
+
+        // 3c. Execute code in REPL
+        let round = match repl.execute(&code_to_run).await {
+            Ok(r) => r,
+            Err(e) => {
+                let _ = tx_event
+                    .send(Event::status(format!("RLM REPL error: {e}")))
+                    .await;
+                return RlmTurnResult {
+                    answer: String::new(),
+                    iterations: iteration + 1,
+                    duration: start.elapsed(),
+                    error: Some(format!("REPL execution failed: {e}")),
+                    usage: total_usage,
+                };
+            }
+        };
+
+        // 3d. Check for FINAL
+        if let Some(final_val) = &round.final_value {
+            let _ = tx_event
+                .send(Event::status("RLM: FINAL detected, ending loop".to_string()))
+                .await;
+            return RlmTurnResult {
+                answer: final_val.clone(),
+                iterations: iteration + 1,
+                duration: start.elapsed(),
+                error: None,
+                usage: total_usage,
+            };
+        }
+
+        // Also check raw stdout for FINAL (in case the parse missed it)
+        let (_cleaned, raw_final) = parse_final(&round.full_stdout);
+        if let Some(final_val) = raw_final {
+            let _ = tx_event
+                .send(Event::status("RLM: FINAL detected (raw parse), ending loop".to_string()))
+                .await;
+            return RlmTurnResult {
+                answer: final_val,
+                iterations: iteration + 1,
+                duration: start.elapsed(),
+                error: None,
+                usage: total_usage,
+            };
+        }
+
+        // 3e. Build metadata for next iteration and append to history
+        //     hist ← hist ∥ code ∥ Metadata(stdout)
+        let stdout_display = if round.stdout.is_empty() && !round.stderr.is_empty() {
+            format!("[stderr]\n{}", truncate_text(&round.stderr, STDOUT_METADATA_PREVIEW_LEN))
+        } else {
+            truncate_text(&round.stdout, STDOUT_METADATA_PREVIEW_LEN)
+        };
+
+        // Assistant message: the code the model wrote
+        messages.push(Message {
+            role: "assistant".to_string(),
+            content: vec![ContentBlock::Text {
+                text: format!("```python\n{code_to_run}\n```"),
+                cache_control: None,
+            }],
+        });
+
+        // User message: metadata about stdout + current REPL state
+        let next_metadata = build_metadata_message(&prompt, iteration + 1, Some(&code_to_run), Some(&stdout_display));
+        messages.push(next_metadata);
+
+        // Emit stdout preview as a status update
+        let _ = tx_event
+            .send(Event::status(format!(
+                "REPL round {}: {} bytes output{}",
+                iteration + 1,
+                round.full_stdout.len(),
+                if round.has_error { " (error)" } else { "" },
+            )))
+            .await;
+
+        // Limit the messages vector to prevent unbounded growth.
+        // Keep at most 10 metadata+code pairs (the context is already small
+        // since each is just metadata, but we should still bound it).
+        // The paper's Algorithm 1 only trims per-iteration tokens, not
+        // iterations themselves, but we add this as a practical guard.
+        const MAX_HISTORY_PAIRS: usize = 20; // 10 iterations × 2 messages each
+        if messages.len() > MAX_HISTORY_PAIRS {
+            // Remove oldest pair but keep the first metadata message.
+            let mut kept = vec![messages[0].clone()];
+            kept.extend(messages.drain(messages.len() - MAX_HISTORY_PAIRS + 1..));
+            messages = kept;
+        }
+    }
+
+    // Loop exhausted without FINAL
+    RlmTurnResult {
+        answer: String::new(),
+        iterations: MAX_RLM_ITERATIONS,
+        duration: start.elapsed(),
+        error: Some(format!(
+            "RLM loop exhausted after {MAX_RLM_ITERATIONS} iterations without FINAL"
+        )),
+        usage: total_usage,
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/// Build a metadata message describing the current REPL state.
+///
+/// This is what the paper calls `Metadata(state)` — it contains:
+/// - Length of PROMPT (not the content itself)
+/// - A short preview of PROMPT
+/// - Current iteration number
+/// - Previous code (if any)
+/// - Previous stdout summary (if any)
+fn build_metadata_message(
+    prompt: &str,
+    iteration: u32,
+    previous_code: Option<&str>,
+    previous_stdout: Option<&str>,
+) -> Message {
+    let prompt_len = prompt.len();
+    let prompt_preview = truncate_text(prompt, PROMPT_PREVIEW_LEN);
+
+    let mut parts = Vec::new();
+
+    parts.push(format!("## REPL State (Round {iteration})"));
+    parts.push(String::new());
+    parts.push(format!("**PROMPT** — stored as REPL variable `PROMPT`"));
+    parts.push(format!("- Length: {prompt_len} characters"));
+    parts.push(format!("- Preview: \"{prompt_preview}\""));
+    parts.push(String::new());
+
+    if iteration > 0 {
+        parts.push("**Previous Round**".to_string());
+        if let Some(code) = previous_code {
+            // Only show the first/last lines as metadata
+            let code_lines: Vec<&str> = code.lines().collect();
+            let code_summary = if code_lines.len() > 8 {
+                let first_few: Vec<&str> = code_lines.iter().take(4).copied().collect();
+                let last_few: Vec<&str> = code_lines.iter().rev().take(3).rev().copied().collect();
+                format!(
+                    "{} lines: {} ... {}",
+                    code_lines.len(),
+                    first_few.join("\n"),
+                    last_few.join("\n")
+                )
+            } else {
+                code.to_string()
+            };
+            parts.push(format!("- Code: {code_summary}"));
+        }
+        if let Some(stdout) = previous_stdout {
+            // Only show truncated stdout
+            let stdout_clean = stdout.trim();
+            if !stdout_clean.is_empty() {
+                parts.push(format!("- Stdout preview: \"{stdout_clean}\""));
+            } else {
+                parts.push("- Stdout: (empty)".to_string());
+            }
+        }
+        parts.push(String::new());
+    }
+
+    parts.push("**Available functions**: `repl_get()`, `repl_set()`, `llm_query(prompt)`".to_string());
+    parts.push("**End the loop with**: `FINAL(value)`".to_string());
+
+    let text = parts.join("\n");
+
+    Message {
+        role: "user".to_string(),
+        content: vec![ContentBlock::Text {
+            text,
+            cache_control: None,
+        }],
+    }
+}
+
+/// Extract text from content blocks, joining all text blocks together.
+fn extract_text_blocks(blocks: &[ContentBlock]) -> String {
+    blocks
+        .iter()
+        .filter_map(|b| match b {
+            ContentBlock::Text { text, .. } => Some(text.as_str()),
+            _ => None,
+        })
+        .collect::<Vec<_>>()
+        .join("\n")
+}
+
+/// Extract the first ```python code block from text.
+/// Returns `None` if no python fence is found.
+fn extract_python_code(text: &str) -> Option<String> {
+    // Look for ```python or ```py
+    let start_markers = ["```python\n", "```py\n", "```python\r\n", "```py\r\n"];
+    let mut best_start: Option<(usize, &str)> = None;
+
+    for marker in &start_markers {
+        if let Some(idx) = text.find(marker) {
+            let end_pos = idx + marker.len();
+            match best_start {
+                Some((best_idx, _)) if idx < best_idx => {
+                    best_start = Some((idx, &text[end_pos..]));
+                }
+                None => {
+                    best_start = Some((idx, &text[end_pos..]));
+                }
+                _ => {}
+            }
+        }
+    }
+
+    let after_fence = best_start.map(|(_, rest)| rest)?;
+
+    // Find the closing ```
+    let end_idx = after_fence.find("\n```").or_else(|| after_fence.find("```"))?;
+
+    let code = after_fence[..end_idx].trim().to_string();
+    if code.is_empty() {
+        return None;
+    }
+    Some(code)
+}
+
+/// Truncate text to `max_chars`, adding an ellipsis if truncated.
+fn truncate_text(text: &str, max_chars: usize) -> String {
+    if text.len() <= max_chars {
+        return text.to_string();
+    }
+    let take = max_chars.saturating_sub(3);
+    let mut result: String = text.chars().take(take).collect();
+    result.push_str("...");
+    result
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn extract_python_code_finds_simple_block() {
+        let text = "Here's some code:\n```python\nprint('hello')\n```\nEnd.";
+        let code = extract_python_code(text).unwrap();
+        assert_eq!(code, "print('hello')");
+    }
+
+    #[test]
+    fn extract_python_code_finds_short_marker() {
+        let text = "Code:\n```py\nx = 1 + 2\n```";
+        let code = extract_python_code(text).unwrap();
+        assert_eq!(code, "x = 1 + 2");
+    }
+
+    #[test]
+    fn extract_python_code_returns_none_when_missing() {
+        let text = "Just some text without code fences.";
+        assert!(extract_python_code(text).is_none());
+    }
+
+    #[test]
+    fn extract_python_code_returns_none_on_empty_block() {
+        let text = "Code:\n```python\n\n```";
+        assert!(extract_python_code(text).is_none());
+    }
+
+    #[test]
+    fn extract_python_code_handles_multiple_blocks() {
+        let text = "First:\n```python\na=1\n```\nSecond:\n```python\nb=2\n```";
+        let code = extract_python_code(text).unwrap();
+        assert_eq!(code, "a=1"); // Returns first block
+    }
+
+    #[test]
+    fn extract_python_code_ignores_other_fences() {
+        let text = "```\nsome text\n```\nActual:\n```python\nreal_code()\n```";
+        let code = extract_python_code(text).unwrap();
+        assert_eq!(code, "real_code()");
+    }
+
+    #[test]
+    fn build_metadata_contains_key_information() {
+        let prompt = "Hello, world!";
+        let msg = build_metadata_message(prompt, 0, None, None);
+        let text = extract_text_blocks(&msg.content);
+        assert!(text.contains("PROMPT"));
+        assert!(text.contains("Hello, world!"));
+        assert!(text.contains("Round 0"));
+        assert!(text.contains("llm_query"));
+        assert!(text.contains("FINAL"));
+    }
+
+    #[test]
+    fn build_metadata_with_iteration_shows_previous_code() {
+        let prompt = "Test prompt";
+        let msg = build_metadata_message(prompt, 3, Some("print('hi')"), Some("hi"));
+        let text = extract_text_blocks(&msg.content);
+        assert!(text.contains("Round 3"));
+        assert!(text.contains("print('hi')"));
+        assert!(text.contains("hi"));
+    }
+
+    #[test]
+    fn truncate_text_leaves_short_text_alone() {
+        assert_eq!(truncate_text("hello", 100), "hello");
+    }
+
+    #[test]
+    fn truncate_text_shortens_long_text() {
+        let long = "a".repeat(1000);
+        let truncated = truncate_text(&long, 10);
+        // 7 chars of 'a' + "..." = 10 chars/bytes total
+        assert_eq!(truncated.len(), 10);
+        assert!(truncated.ends_with("..."));
+    }
+
+    #[test]
+    fn extract_text_blocks_joins_text_blocks() {
+        let blocks = vec![
+            ContentBlock::Text {
+                text: "first".to_string(),
+                cache_control: None,
+            },
+            ContentBlock::Thinking {
+                thinking: "skip".to_string(),
+            },
+            ContentBlock::Text {
+                text: "second".to_string(),
+                cache_control: None,
+            },
+        ];
+        assert_eq!(extract_text_blocks(&blocks), "first\nsecond");
+    }
+
+    #[test]
+    fn extract_text_blocks_returns_empty_on_no_text() {
+        let blocks = vec![ContentBlock::Thinking {
+            thinking: "only thinking".to_string(),
+        }];
+        assert_eq!(extract_text_blocks(&blocks), "");
+    }
+
+    #[test]
+    fn metadata_msg_role_is_user() {
+        let msg = build_metadata_message("test", 0, None, None);
+        assert_eq!(msg.role, "user");
+    }
+
+    #[test]
+    fn metadata_with_previous_code_shows_code_summary() {
+        let msg = build_metadata_message("test", 2, Some("for i in range(10):\n    print(i)"), Some("0\n1\n2"));
+        let text = extract_text_blocks(&msg.content);
+        assert!(text.contains("Round 2"));
+        assert!(text.contains("for i"));
+        assert!(text.contains("0\n1\n2"));
+    }
+}
@@ -1922,7 +1922,19 @@ pub enum AppAction {
    OpenConfigView,
    /// Open the `/model` two-pane picker (Pro/Flash + Off/High/Max).
    OpenModelPicker,
+    /// Send a message to the AI (normal chat mode).
    SendMessage(String),
+    /// Run a Recursive Language Model (RLM) turn — Algorithm 1 from
+    /// Zhang et al. (arXiv:2512.24601). The prompt is stored in the REPL;
+    /// the root LLM only sees metadata.
+    RlmQuery {
+        /// The user's prompt — stored in REPL, NOT in LLM context.
+        prompt: String,
+        /// Model for the root LLM.
+        model: String,
+        /// Model for sub-LLM (llm_query) calls.
+        child_model: String,
+    },
    ListSubAgents,
    FetchModels,
    /// Switch the active LLM backend (DeepSeek vs NVIDIA NIM) without
@@ -2337,6 +2337,20 @@ async fn apply_command_result(
                let queued = build_queued_message(app, content);
                submit_or_steer_message(app, engine_handle, queued).await?;
            }
+            AppAction::RlmQuery {
+                prompt,
+                model,
+                child_model,
+            } => {
+                app.status_message = Some("RLM turn starting (Algorithm 1)...".to_string());
+                let _ = engine_handle
+                    .send(Op::RlmQuery {
+                        content: prompt,
+                        model,
+                        child_model,
+                    })
+                    .await;
+            }
            AppAction::ListSubAgents => {
                let _ = engine_handle.send(Op::ListSubAgents).await;
            }