feat(rlm): implement true RLM loop per Algorithm 1 (Zhang et al., arXiv:2512.24601)

Adds the true Recursive Language Model (RLM) inference paradigm:

- rlm/mod.rs — module root with public API
- rlm/prompt.rs — RLM system prompt teaching the model to write code
- rlm/turn.rs — Algorithm 1 implementation:
  - P stored as REPL variable (NEVER in LLM context window)
  - Metadata-only context sent to root LLM (constant-size)
  - LLM generates Python code, not free text
  - Code executed in PythonRuntime with llm_query() for recursion
  - FINAL() detection ends the loop
- Op::RlmQuery variant in ops.rs
- /rlm command in the command system
- AppAction::RlmQuery handler in ui.rs
- PythonRuntime::with_state_path made public for RLM integration
- 18 new unit tests for code extraction, metadata building, truncation

Key differences from previous 'RLM-inspired' approach:
 P is external (REPL variable), not in LLM context
 Only metadata(state) in LLM context (constant-size)
 LLM generates code, not free text + tool calls
 sub-LLM recursion via llm_query() inside REPL code
 FINAL() mechanism for programmatic termination
This commit is contained in:
Hunter Bown
2026-04-26 23:34:17 -05:00
parent ac8a882be5
commit 42c684367f
10 changed files with 945 additions and 3 deletions
+56
View File
@@ -312,6 +312,13 @@ pub const COMMANDS: &[CommandInfo] = &[
description: "Run a structured code review on a file, diff, or PR",
usage: "/review <target>",
},
// RLM command
CommandInfo {
name: "rlm",
aliases: &["recursive"],
description: "Recursive Language Model (RLM) — process a prompt via Algorithm 1 from Zhang et al. (arXiv:2512.24601). The prompt is stored in a REPL; the model writes code to process it.",
usage: "/rlm <prompt>",
},
// Debug/cost command
CommandInfo {
name: "cost",
@@ -377,6 +384,9 @@ pub fn execute(cmd: &str, app: &mut App) -> CommandResult {
"skill" => skills::run_skill(app, arg),
"review" => review::review(app, arg),
// RLM command
"rlm" | "recursive" => rlm(app, arg),
// Legacy command migrations (kept out of registry/autocomplete intentionally).
"set" => CommandResult::error(
"The /set command was retired. Use /config to edit settings and /settings to inspect current values.",
@@ -411,6 +421,52 @@ pub fn set_config_value(app: &mut App, key: &str, value: &str, persist: bool) ->
config::set_config_value(app, key, value, persist)
}
/// Execute a Recursive Language Model (RLM) turn — Algorithm 1 from
/// Zhang et al. (arXiv:2512.24601).
///
/// The user's prompt text is passed as the argument. It will be stored
/// in the REPL as the `PROMPT` variable. The root LLM will only see
/// metadata about the REPL state, never the prompt text directly.
pub fn rlm(app: &mut App, arg: Option<&str>) -> CommandResult {
let prompt = match arg {
Some(p) if !p.trim().is_empty() => p.trim().to_string(),
_ => {
return CommandResult::error(
"Usage: /rlm <prompt>\n\n\
Process a prompt using a Recursive Language Model (RLM).\n\
The prompt is stored in a REPL and the model writes code\n\
to decompose and process it recursively."
.to_string(),
);
}
};
// Sanity-check: RLM is most useful for longer prompts.
if prompt.len() < 50 {
return CommandResult::message(
"Tip: RLM is designed for processing LONG prompts (>100 chars). \
For short queries, just type the message directly."
.to_string(),
);
}
let model = app.model.clone();
let child_model = "deepseek-v4-flash".to_string();
CommandResult::with_message_and_action(
format!(
"Starting RLM turn (Algorithm 1) for {} chars of prompt using {}...",
prompt.len(),
model
),
AppAction::RlmQuery {
prompt,
model,
child_model,
},
)
}
/// Get command info by name or alias
pub fn get_command_info(name: &str) -> Option<&'static CommandInfo> {
let name = name.strip_prefix('/').unwrap_or(name);
+96
View File
@@ -1311,6 +1311,14 @@ impl Engine {
Op::CompactContext => {
self.handle_manual_compaction().await;
}
Op::RlmQuery {
content,
model,
child_model,
} => {
self.handle_rlm_query(content, model, child_model)
.await;
}
Op::Shutdown => {
break;
}
@@ -1645,6 +1653,94 @@ impl Engine {
.await;
}
/// Handle a Recursive Language Model (RLM) query — Algorithm 1 from
/// Zhang et al. (arXiv:2512.24601).
///
/// The prompt is stored as PROMPT in a REPL variable. The root LLM
/// only sees metadata about the REPL state, never the prompt text
/// directly. The model generates Python code, which is executed by
/// the REPL. When FINAL() is called, the loop ends.
async fn handle_rlm_query(
&mut self,
content: String,
model: String,
child_model: String,
) {
use crate::rlm::turn::run_rlm_turn;
let Some(ref client) = self.deepseek_client else {
let err = self
.deepseek_client_error
.as_deref()
.map(|s| s.to_string())
.unwrap_or_else(|| "API client not configured".to_string());
let _ = self
.tx_event
.send(Event::error(format!("RLM error: {err}"), false))
.await;
return;
};
let _ = self
.tx_event
.send(Event::status("RLM turn started (Algorithm 1)".to_string()))
.await;
let result = run_rlm_turn(
client,
model,
content,
child_model,
self.tx_event.clone(),
)
.await;
let has_error = result.error.is_some();
if let Some(ref err) = result.error {
let _ = self
.tx_event
.send(Event::error(format!("RLM error: {err}"), true))
.await;
}
if !result.answer.is_empty() {
// Add the final answer as an assistant message in the session.
self.add_session_message(crate::models::Message {
role: "assistant".to_string(),
content: vec![crate::models::ContentBlock::Text {
text: result.answer.clone(),
cache_control: None,
}],
})
.await;
let _ = self
.tx_event
.send(Event::MessageDelta {
index: 0,
content: result.answer.clone(),
})
.await;
let _ = self
.tx_event
.send(Event::MessageComplete { index: 0 })
.await;
}
let _ = self
.tx_event
.send(Event::TurnComplete {
usage: result.usage,
status: if has_error {
crate::core::events::TurnOutcomeStatus::Failed
} else {
crate::core::events::TurnOutcomeStatus::Completed
},
error: result.error,
})
.await;
}
fn estimated_input_tokens(&self) -> usize {
estimate_input_tokens_conservative(
&self.session.messages,
+12
View File
@@ -65,6 +65,18 @@ pub enum Op {
/// Run context compaction immediately.
CompactContext,
/// Run a Recursive Language Model (RLM) turn per Algorithm 1 of
/// Zhang et al. (arXiv:2512.24601). The prompt is stored in the REPL
/// as the `PROMPT` variable; the root LLM only sees metadata.
RlmQuery {
/// The user's prompt — stored in REPL, NOT in the LLM context.
content: String,
/// The model to use for root LLM calls.
model: String,
/// The model to use for sub-LLM (llm_query) calls.
child_model: String,
},
/// Shutdown the engine
Shutdown,
}
+1
View File
@@ -37,6 +37,7 @@ mod project_context;
mod project_doc;
mod prompts;
pub mod repl;
pub mod rlm;
mod responses_api_proxy;
mod runtime_api;
mod runtime_threads;
+2 -3
View File
@@ -137,9 +137,8 @@ impl PythonRuntime {
})
}
/// Create with a specific state path (for testing).
#[cfg(test)]
pub(crate) fn with_state_path(path: PathBuf) -> Self {
/// Create with a specific state path (for testing / RLM integration).
pub fn with_state_path(path: PathBuf) -> Self {
Self {
state_path: path,
stdout_limit: DEFAULT_STDOUT_LIMIT,
+39
View File
@@ -0,0 +1,39 @@
//! True Recursive Language Model (RLM) loop — paper-spec Algorithm 1.
//!
//! Implements the RLM inference paradigm from Zhang, Kraska, Khattab
//! (arXiv:2512.24601, §2 Algorithm 1):
//!
//! ```text
//! state ← InitREPL(prompt=P)
//! state ← AddFunction(state, sub_RLM)
//! hist ← [Metadata(state)]
//! while True:
//! code ← LLM(hist)
//! (state, stdout) ← REPL(state, code)
//! hist ← hist ∥ code ∥ Metadata(stdout)
//! if state[Final] is set:
//! return state[Final]
//! ```
//!
//! Key departure from our previous "RLM-inspired" approach:
//! - P is stored as a REPL variable, NEVER in the LLM's context window
//! - Only metadata about state/stdout goes to the LLM — constant-size context
//! - The LLM generates Python code, not free text
//! - Recursion happens via llm_query() inside the code, not as tool calls
//!
//! ## Architecture
//!
//! The RLM loop is a standalone async function that the engine calls from
//! its event loop when it receives an `Op::RlmQuery`. It:
//! 1. Initialises a PythonRuntime with the prompt stored as `PROMPT`
//! 2. Builds a metadata-only context describing REPL state
//! 3. Calls the root LLM to generate code
//! 4. Executes the code in the REPL
//! 5. Checks for FINAL — if found, returns it
//! 6. Otherwise, feeds code + truncated stdout metadata back, loops
pub mod prompt;
pub mod turn;
pub use prompt::rlm_system_prompt;
pub use turn::run_rlm_turn;
+126
View File
@@ -0,0 +1,126 @@
//! RLM system prompt — teaches the model to write code and use the REPL
//! per Algorithm 1 of Zhang et al. (arXiv:2512.24601).
use crate::models::SystemPrompt;
/// Build the system prompt for a Recursive Language Model (RLM) root LLM call.
///
/// This prompt instructs the root LLM to generate Python code that
/// manipulates the `PROMPT` variable in the REPL environment, using
/// `llm_query()` for recursive sub-calls and `FINAL()` to return the
/// final answer.
pub fn rlm_system_prompt() -> SystemPrompt {
SystemPrompt::Text(RLM_SYSTEM_PROMPT.trim().to_string())
}
const RLM_SYSTEM_PROMPT: &str = r#"You are a Recursive Language Model (RLM).
Your job is to process the user's prompt by writing Python code. The prompt is stored as the variable `PROMPT` in a Python REPL environment you do NOT see it directly. You must inspect and process it programmatically.
## REPL Environment
The Python REPL starts each round with persistent state. Use these functions:
- `repl_get("PROMPT")` Returns the full user prompt string.
- `repl_set(name, value)` Stores a variable for future rounds.
- `repl_get(name)` Retrieves a previously stored variable.
- `llm_query(prompt, model=None, max_tokens=None)` Calls a sub-LLM with a
new prompt and returns the response text. Use this for complex processing
that requires an LLM the sub-LLM is fast (deepseek-v4-flash) and runs
with its own REPL context.
- `FINAL(value)` Sets the final answer and ends the RLM loop. Call this
when you have the complete answer.
## How to operate
1. PREVIEW the prompt first:
```python
text = repl_get("PROMPT")
print(f"Length: {len(text)}")
print(text[:500]) # First 500 chars
```
2. DECOMPOSE the task into chunks. For long prompts, process parts
independently using llm_query() for each chunk:
```python
text = repl_get("PROMPT")
chunk_size = 2000
results = []
for i in range(0, len(text), chunk_size):
chunk = text[i:i+chunk_size]
result = llm_query(f"Process this part: {chunk}")
results.append(result)
```
3. COMBINE results and call FINAL:
```python
combined = "\n".join(results)
FINAL(combined)
```
## Rules
- You MUST output Python code inside ```python blocks.
- Only code inside ```python fences is executed. You can add commentary
outside the fences.
- The PROMPT variable may be very large (millions of characters). Do not
print it in full always truncate to a preview.
- Use llm_query() for heavy lifting it calls a sub-LLM that can process
snippets autonomously.
- Previous code and stdout summaries are shown in the conversation history.
Build on them rather than repeating work.
- Set `FINAL(value)` when you have the complete answer. The RLM loop ends
immediately.
- If you don't need the REPL and want to return a direct answer, just
write a short response without code fences and the RLM loop will end.
## Strategy hints
- For code analysis: print structure, use llm_query for deeper understanding
- For long document processing: chunk the PROMPT, process each chunk via
llm_query, then aggregate results
- For research tasks: decompose the question, query sub-parts, synthesize
- For iterative tasks: set intermediate results with repl_set, retrieve
them across rounds
"#;
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn rlm_prompt_is_not_empty() {
let prompt = rlm_system_prompt();
match prompt {
SystemPrompt::Text(text) => assert!(!text.is_empty()),
_ => panic!("expected Text"),
}
}
#[test]
fn rlm_prompt_mentions_llm_query() {
let prompt = rlm_system_prompt();
match prompt {
SystemPrompt::Text(text) => assert!(text.contains("llm_query")),
_ => panic!("expected Text"),
}
}
#[test]
fn rlm_prompt_mentions_final() {
let prompt = rlm_system_prompt();
match prompt {
SystemPrompt::Text(text) => assert!(text.contains("FINAL")),
_ => panic!("expected Text"),
}
}
#[test]
fn rlm_prompt_mentions_python_fence() {
let prompt = rlm_system_prompt();
match prompt {
SystemPrompt::Text(text) => assert!(text.contains("```python")),
_ => panic!("expected Text"),
}
}
}
+587
View File
@@ -0,0 +1,587 @@
//! True RLM turn loop — Algorithm 1 from Zhang et al. (arXiv:2512.24601).
//!
//! # Algorithm
//!
//! ```text
//! state ← InitREPL(prompt=P)
//! state ← AddFunction(state, sub_RLM)
//! hist ← [Metadata(state)]
//! while True:
//! code ← LLM(hist)
//! (state, stdout) ← REPL(state, code)
//! hist ← hist ∥ code ∥ Metadata(stdout)
//! if state[Final] is set:
//! return state[Final]
//! ```
//!
//! Key invariants:
//! 1. P is stored as `PROMPT` in the REPL — NEVER in the LLM context
//! 2. Only metadata (length, preview, variable names) goes to LLM context
//! 3. The LLM writes Python code, executed by the REPL
//! 4. The REPL provides `llm_query()` for recursive sub-calls
use std::time::{Duration, Instant};
use serde_json::json;
use tokio::sync::mpsc;
use crate::client::DeepSeekClient;
use crate::core::events::Event;
use crate::llm_client::LlmClient;
use crate::models::{
ContentBlock, Message, MessageRequest, Usage,
};
use crate::repl::runtime::PythonRuntime;
use crate::repl::sandbox::parse_final;
use super::prompt::rlm_system_prompt;
// ---------------------------------------------------------------------------
// Constants
// ---------------------------------------------------------------------------
/// Maximum number of RLM iterations before the loop gives up.
const MAX_RLM_ITERATIONS: u32 = 25;
/// Max output tokens for the root LLM — just needs to generate code, not
/// the full answer.
const ROOT_MAX_TOKENS: u32 = 4096;
/// Max chars of stdout shown as metadata to the root LLM in next iteration.
/// Matches the paper's "only metadata about stdout" constraint.
const STDOUT_METADATA_PREVIEW_LEN: usize = 800;
/// Max chars of PROMPT shown as preview in metadata.
const PROMPT_PREVIEW_LEN: usize = 500;
/// Temperature for root LLM calls. Low to keep code generation focused.
const ROOM_TEMPERATURE: f32 = 0.3;
/// Per-iteration timeout for the entire LLM+REPL round.
const ROUND_TIMEOUT: Duration = Duration::from_secs(180);
// ---------------------------------------------------------------------------
// Public API
// ---------------------------------------------------------------------------
/// Result of an RLM turn.
#[derive(Debug, Clone)]
pub struct RlmTurnResult {
/// The final answer (from FINAL(), or the model's raw text if no code).
pub answer: String,
/// Number of iterations used.
pub iterations: u32,
/// Total wall-clock duration.
pub duration: Duration,
/// Error message if the turn failed.
pub error: Option<String>,
/// Usage from the root LLM calls (total across iterations).
pub usage: Usage,
}
/// Run a full RLM turn per Algorithm 1.
///
/// The user's `prompt` is stored as `PROMPT` in the REPL and never placed
/// into the LLM's context window. The LLM receives only metadata about the
/// REPL state and generates code, which is then executed. When `FINAL()` is
/// called inside the code, the loop ends and the value is returned.
pub async fn run_rlm_turn(
client: &DeepSeekClient,
model: String,
prompt: String,
_child_model: String,
tx_event: mpsc::Sender<Event>,
) -> RlmTurnResult {
let start = Instant::now();
let mut total_usage = Usage::default();
// ------------------------------------------------------------------
// 1. Initialise REPL with PROMPT variable
// ------------------------------------------------------------------
let state_dir = std::env::temp_dir().join("deepseek_rlm");
let _ = std::fs::create_dir_all(&state_dir);
let state_path = state_dir.join(format!("rlm_{}.json", uuid::Uuid::new_v4()));
// Write PROMPT into the REPL state before the REPL even starts.
let initial_vars = json!({"PROMPT": &prompt});
if let Err(e) = std::fs::write(&state_path, serde_json::to_string(&initial_vars).unwrap()) {
return RlmTurnResult {
answer: String::new(),
iterations: 0,
duration: start.elapsed(),
error: Some(format!("Failed to write REPL state: {e}")),
usage: total_usage,
};
}
let mut repl = PythonRuntime::with_state_path(state_path.clone());
let _ = tx_event
.send(Event::status("RLM: REPL initialised with PROMPT variable".to_string()))
.await;
// ------------------------------------------------------------------
// 2. Build metadata-only conversation history
// ------------------------------------------------------------------
let system = rlm_system_prompt();
let metadata_msg = build_metadata_message(&prompt, 0, None, None);
// The conversation history for the root LLM contains ONLY:
// - Metadata(state) — initial
// - code (assistant) + Metadata(stdout) (user) — for each iteration
// This keeps the root LLM context constant-size regardless of PROMPT size.
let mut messages: Vec<Message> = vec![metadata_msg];
// ------------------------------------------------------------------
// 3. RLM loop (Algorithm 1)
// ------------------------------------------------------------------
for iteration in 0..MAX_RLM_ITERATIONS {
if start.elapsed() > ROUND_TIMEOUT {
return RlmTurnResult {
answer: String::new(),
iterations: iteration,
duration: start.elapsed(),
error: Some(format!("RLM turn timed out after {}s", ROUND_TIMEOUT.as_secs())),
usage: total_usage,
};
}
let _ = tx_event
.send(Event::status(format!("RLM iteration {}/{}", iteration + 1, MAX_RLM_ITERATIONS)))
.await;
// 3a. LLM generates code from metadata-only context
let request = MessageRequest {
model: model.clone(),
messages: messages.clone(),
max_tokens: ROOT_MAX_TOKENS,
system: Some(system.clone()),
tools: None,
tool_choice: None,
metadata: None,
thinking: None,
reasoning_effort: None,
stream: Some(false),
temperature: Some(ROOM_TEMPERATURE),
top_p: Some(0.9_f32),
};
let response = match client.create_message(request).await {
Ok(r) => r,
Err(e) => {
return RlmTurnResult {
answer: String::new(),
iterations: iteration + 1,
duration: start.elapsed(),
error: Some(format!("Root LLM call failed: {e}")),
usage: total_usage,
};
}
};
// Accumulate usage
total_usage.input_tokens = total_usage.input_tokens.saturating_add(response.usage.input_tokens);
total_usage.output_tokens = total_usage.output_tokens.saturating_add(response.usage.output_tokens);
// Extract text from response
let response_text = extract_text_blocks(&response.content);
let _ = tx_event
.send(Event::MessageDelta {
index: iteration as usize,
content: format!("\n[RLM iteration {}]\n", iteration + 1),
})
.await;
// 3b. Extract Python code from the response
let code = extract_python_code(&response_text);
let (code_to_run, _is_direct_answer) = match code {
Some(c) => (c, false),
None => {
// No code block — the model gave a direct text answer.
// This is a valid exit: the model decided it doesn't need
// the REPL and is returning a final answer directly.
let _ = tx_event
.send(Event::MessageDelta {
index: iteration as usize,
content: response_text.clone(),
})
.await;
return RlmTurnResult {
answer: response_text,
iterations: iteration + 1,
duration: start.elapsed(),
error: None,
usage: total_usage,
};
}
};
let _ = tx_event
.send(Event::MessageDelta {
index: iteration as usize,
content: format!("```python\n{code_to_run}\n```\n"),
})
.await;
// 3c. Execute code in REPL
let round = match repl.execute(&code_to_run).await {
Ok(r) => r,
Err(e) => {
let _ = tx_event
.send(Event::status(format!("RLM REPL error: {e}")))
.await;
return RlmTurnResult {
answer: String::new(),
iterations: iteration + 1,
duration: start.elapsed(),
error: Some(format!("REPL execution failed: {e}")),
usage: total_usage,
};
}
};
// 3d. Check for FINAL
if let Some(final_val) = &round.final_value {
let _ = tx_event
.send(Event::status("RLM: FINAL detected, ending loop".to_string()))
.await;
return RlmTurnResult {
answer: final_val.clone(),
iterations: iteration + 1,
duration: start.elapsed(),
error: None,
usage: total_usage,
};
}
// Also check raw stdout for FINAL (in case the parse missed it)
let (_cleaned, raw_final) = parse_final(&round.full_stdout);
if let Some(final_val) = raw_final {
let _ = tx_event
.send(Event::status("RLM: FINAL detected (raw parse), ending loop".to_string()))
.await;
return RlmTurnResult {
answer: final_val,
iterations: iteration + 1,
duration: start.elapsed(),
error: None,
usage: total_usage,
};
}
// 3e. Build metadata for next iteration and append to history
// hist ← hist ∥ code ∥ Metadata(stdout)
let stdout_display = if round.stdout.is_empty() && !round.stderr.is_empty() {
format!("[stderr]\n{}", truncate_text(&round.stderr, STDOUT_METADATA_PREVIEW_LEN))
} else {
truncate_text(&round.stdout, STDOUT_METADATA_PREVIEW_LEN)
};
// Assistant message: the code the model wrote
messages.push(Message {
role: "assistant".to_string(),
content: vec![ContentBlock::Text {
text: format!("```python\n{code_to_run}\n```"),
cache_control: None,
}],
});
// User message: metadata about stdout + current REPL state
let next_metadata = build_metadata_message(&prompt, iteration + 1, Some(&code_to_run), Some(&stdout_display));
messages.push(next_metadata);
// Emit stdout preview as a status update
let _ = tx_event
.send(Event::status(format!(
"REPL round {}: {} bytes output{}",
iteration + 1,
round.full_stdout.len(),
if round.has_error { " (error)" } else { "" },
)))
.await;
// Limit the messages vector to prevent unbounded growth.
// Keep at most 10 metadata+code pairs (the context is already small
// since each is just metadata, but we should still bound it).
// The paper's Algorithm 1 only trims per-iteration tokens, not
// iterations themselves, but we add this as a practical guard.
const MAX_HISTORY_PAIRS: usize = 20; // 10 iterations × 2 messages each
if messages.len() > MAX_HISTORY_PAIRS {
// Remove oldest pair but keep the first metadata message.
let mut kept = vec![messages[0].clone()];
kept.extend(messages.drain(messages.len() - MAX_HISTORY_PAIRS + 1..));
messages = kept;
}
}
// Loop exhausted without FINAL
RlmTurnResult {
answer: String::new(),
iterations: MAX_RLM_ITERATIONS,
duration: start.elapsed(),
error: Some(format!(
"RLM loop exhausted after {MAX_RLM_ITERATIONS} iterations without FINAL"
)),
usage: total_usage,
}
}
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
/// Build a metadata message describing the current REPL state.
///
/// This is what the paper calls `Metadata(state)` — it contains:
/// - Length of PROMPT (not the content itself)
/// - A short preview of PROMPT
/// - Current iteration number
/// - Previous code (if any)
/// - Previous stdout summary (if any)
fn build_metadata_message(
prompt: &str,
iteration: u32,
previous_code: Option<&str>,
previous_stdout: Option<&str>,
) -> Message {
let prompt_len = prompt.len();
let prompt_preview = truncate_text(prompt, PROMPT_PREVIEW_LEN);
let mut parts = Vec::new();
parts.push(format!("## REPL State (Round {iteration})"));
parts.push(String::new());
parts.push(format!("**PROMPT** — stored as REPL variable `PROMPT`"));
parts.push(format!("- Length: {prompt_len} characters"));
parts.push(format!("- Preview: \"{prompt_preview}\""));
parts.push(String::new());
if iteration > 0 {
parts.push("**Previous Round**".to_string());
if let Some(code) = previous_code {
// Only show the first/last lines as metadata
let code_lines: Vec<&str> = code.lines().collect();
let code_summary = if code_lines.len() > 8 {
let first_few: Vec<&str> = code_lines.iter().take(4).copied().collect();
let last_few: Vec<&str> = code_lines.iter().rev().take(3).rev().copied().collect();
format!(
"{} lines: {} ... {}",
code_lines.len(),
first_few.join("\n"),
last_few.join("\n")
)
} else {
code.to_string()
};
parts.push(format!("- Code: {code_summary}"));
}
if let Some(stdout) = previous_stdout {
// Only show truncated stdout
let stdout_clean = stdout.trim();
if !stdout_clean.is_empty() {
parts.push(format!("- Stdout preview: \"{stdout_clean}\""));
} else {
parts.push("- Stdout: (empty)".to_string());
}
}
parts.push(String::new());
}
parts.push("**Available functions**: `repl_get()`, `repl_set()`, `llm_query(prompt)`".to_string());
parts.push("**End the loop with**: `FINAL(value)`".to_string());
let text = parts.join("\n");
Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text,
cache_control: None,
}],
}
}
/// Extract text from content blocks, joining all text blocks together.
fn extract_text_blocks(blocks: &[ContentBlock]) -> String {
blocks
.iter()
.filter_map(|b| match b {
ContentBlock::Text { text, .. } => Some(text.as_str()),
_ => None,
})
.collect::<Vec<_>>()
.join("\n")
}
/// Extract the first ```python code block from text.
/// Returns `None` if no python fence is found.
fn extract_python_code(text: &str) -> Option<String> {
// Look for ```python or ```py
let start_markers = ["```python\n", "```py\n", "```python\r\n", "```py\r\n"];
let mut best_start: Option<(usize, &str)> = None;
for marker in &start_markers {
if let Some(idx) = text.find(marker) {
let end_pos = idx + marker.len();
match best_start {
Some((best_idx, _)) if idx < best_idx => {
best_start = Some((idx, &text[end_pos..]));
}
None => {
best_start = Some((idx, &text[end_pos..]));
}
_ => {}
}
}
}
let after_fence = best_start.map(|(_, rest)| rest)?;
// Find the closing ```
let end_idx = after_fence.find("\n```").or_else(|| after_fence.find("```"))?;
let code = after_fence[..end_idx].trim().to_string();
if code.is_empty() {
return None;
}
Some(code)
}
/// Truncate text to `max_chars`, adding an ellipsis if truncated.
fn truncate_text(text: &str, max_chars: usize) -> String {
if text.len() <= max_chars {
return text.to_string();
}
let take = max_chars.saturating_sub(3);
let mut result: String = text.chars().take(take).collect();
result.push_str("...");
result
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn extract_python_code_finds_simple_block() {
let text = "Here's some code:\n```python\nprint('hello')\n```\nEnd.";
let code = extract_python_code(text).unwrap();
assert_eq!(code, "print('hello')");
}
#[test]
fn extract_python_code_finds_short_marker() {
let text = "Code:\n```py\nx = 1 + 2\n```";
let code = extract_python_code(text).unwrap();
assert_eq!(code, "x = 1 + 2");
}
#[test]
fn extract_python_code_returns_none_when_missing() {
let text = "Just some text without code fences.";
assert!(extract_python_code(text).is_none());
}
#[test]
fn extract_python_code_returns_none_on_empty_block() {
let text = "Code:\n```python\n\n```";
assert!(extract_python_code(text).is_none());
}
#[test]
fn extract_python_code_handles_multiple_blocks() {
let text = "First:\n```python\na=1\n```\nSecond:\n```python\nb=2\n```";
let code = extract_python_code(text).unwrap();
assert_eq!(code, "a=1"); // Returns first block
}
#[test]
fn extract_python_code_ignores_other_fences() {
let text = "```\nsome text\n```\nActual:\n```python\nreal_code()\n```";
let code = extract_python_code(text).unwrap();
assert_eq!(code, "real_code()");
}
#[test]
fn build_metadata_contains_key_information() {
let prompt = "Hello, world!";
let msg = build_metadata_message(prompt, 0, None, None);
let text = extract_text_blocks(&msg.content);
assert!(text.contains("PROMPT"));
assert!(text.contains("Hello, world!"));
assert!(text.contains("Round 0"));
assert!(text.contains("llm_query"));
assert!(text.contains("FINAL"));
}
#[test]
fn build_metadata_with_iteration_shows_previous_code() {
let prompt = "Test prompt";
let msg = build_metadata_message(prompt, 3, Some("print('hi')"), Some("hi"));
let text = extract_text_blocks(&msg.content);
assert!(text.contains("Round 3"));
assert!(text.contains("print('hi')"));
assert!(text.contains("hi"));
}
#[test]
fn truncate_text_leaves_short_text_alone() {
assert_eq!(truncate_text("hello", 100), "hello");
}
#[test]
fn truncate_text_shortens_long_text() {
let long = "a".repeat(1000);
let truncated = truncate_text(&long, 10);
// 7 chars of 'a' + "..." = 10 chars/bytes total
assert_eq!(truncated.len(), 10);
assert!(truncated.ends_with("..."));
}
#[test]
fn extract_text_blocks_joins_text_blocks() {
let blocks = vec![
ContentBlock::Text {
text: "first".to_string(),
cache_control: None,
},
ContentBlock::Thinking {
thinking: "skip".to_string(),
},
ContentBlock::Text {
text: "second".to_string(),
cache_control: None,
},
];
assert_eq!(extract_text_blocks(&blocks), "first\nsecond");
}
#[test]
fn extract_text_blocks_returns_empty_on_no_text() {
let blocks = vec![ContentBlock::Thinking {
thinking: "only thinking".to_string(),
}];
assert_eq!(extract_text_blocks(&blocks), "");
}
#[test]
fn metadata_msg_role_is_user() {
let msg = build_metadata_message("test", 0, None, None);
assert_eq!(msg.role, "user");
}
#[test]
fn metadata_with_previous_code_shows_code_summary() {
let msg = build_metadata_message("test", 2, Some("for i in range(10):\n print(i)"), Some("0\n1\n2"));
let text = extract_text_blocks(&msg.content);
assert!(text.contains("Round 2"));
assert!(text.contains("for i"));
assert!(text.contains("0\n1\n2"));
}
}
+12
View File
@@ -1922,7 +1922,19 @@ pub enum AppAction {
OpenConfigView,
/// Open the `/model` two-pane picker (Pro/Flash + Off/High/Max).
OpenModelPicker,
/// Send a message to the AI (normal chat mode).
SendMessage(String),
/// Run a Recursive Language Model (RLM) turn — Algorithm 1 from
/// Zhang et al. (arXiv:2512.24601). The prompt is stored in the REPL;
/// the root LLM only sees metadata.
RlmQuery {
/// The user's prompt — stored in REPL, NOT in LLM context.
prompt: String,
/// Model for the root LLM.
model: String,
/// Model for sub-LLM (llm_query) calls.
child_model: String,
},
ListSubAgents,
FetchModels,
/// Switch the active LLM backend (DeepSeek vs NVIDIA NIM) without
+14
View File
@@ -2337,6 +2337,20 @@ async fn apply_command_result(
let queued = build_queued_message(app, content);
submit_or_steer_message(app, engine_handle, queued).await?;
}
AppAction::RlmQuery {
prompt,
model,
child_model,
} => {
app.status_message = Some("RLM turn starting (Algorithm 1)...".to_string());
let _ = engine_handle
.send(Op::RlmQuery {
content: prompt,
model,
child_model,
})
.await;
}
AppAction::ListSubAgents => {
let _ = engine_handle.send(Op::ListSubAgents).await;
}