feat(rlm): implement true RLM loop per Algorithm 1 (Zhang et al., arXiv:2512.24601)
Adds the true Recursive Language Model (RLM) inference paradigm: - rlm/mod.rs — module root with public API - rlm/prompt.rs — RLM system prompt teaching the model to write code - rlm/turn.rs — Algorithm 1 implementation: - P stored as REPL variable (NEVER in LLM context window) - Metadata-only context sent to root LLM (constant-size) - LLM generates Python code, not free text - Code executed in PythonRuntime with llm_query() for recursion - FINAL() detection ends the loop - Op::RlmQuery variant in ops.rs - /rlm command in the command system - AppAction::RlmQuery handler in ui.rs - PythonRuntime::with_state_path made public for RLM integration - 18 new unit tests for code extraction, metadata building, truncation Key differences from previous 'RLM-inspired' approach: ✅ P is external (REPL variable), not in LLM context ✅ Only metadata(state) in LLM context (constant-size) ✅ LLM generates code, not free text + tool calls ✅ sub-LLM recursion via llm_query() inside REPL code ✅ FINAL() mechanism for programmatic termination
This commit is contained in:
@@ -312,6 +312,13 @@ pub const COMMANDS: &[CommandInfo] = &[
|
||||
description: "Run a structured code review on a file, diff, or PR",
|
||||
usage: "/review <target>",
|
||||
},
|
||||
// RLM command
|
||||
CommandInfo {
|
||||
name: "rlm",
|
||||
aliases: &["recursive"],
|
||||
description: "Recursive Language Model (RLM) — process a prompt via Algorithm 1 from Zhang et al. (arXiv:2512.24601). The prompt is stored in a REPL; the model writes code to process it.",
|
||||
usage: "/rlm <prompt>",
|
||||
},
|
||||
// Debug/cost command
|
||||
CommandInfo {
|
||||
name: "cost",
|
||||
@@ -377,6 +384,9 @@ pub fn execute(cmd: &str, app: &mut App) -> CommandResult {
|
||||
"skill" => skills::run_skill(app, arg),
|
||||
"review" => review::review(app, arg),
|
||||
|
||||
// RLM command
|
||||
"rlm" | "recursive" => rlm(app, arg),
|
||||
|
||||
// Legacy command migrations (kept out of registry/autocomplete intentionally).
|
||||
"set" => CommandResult::error(
|
||||
"The /set command was retired. Use /config to edit settings and /settings to inspect current values.",
|
||||
@@ -411,6 +421,52 @@ pub fn set_config_value(app: &mut App, key: &str, value: &str, persist: bool) ->
|
||||
config::set_config_value(app, key, value, persist)
|
||||
}
|
||||
|
||||
/// Execute a Recursive Language Model (RLM) turn — Algorithm 1 from
|
||||
/// Zhang et al. (arXiv:2512.24601).
|
||||
///
|
||||
/// The user's prompt text is passed as the argument. It will be stored
|
||||
/// in the REPL as the `PROMPT` variable. The root LLM will only see
|
||||
/// metadata about the REPL state, never the prompt text directly.
|
||||
pub fn rlm(app: &mut App, arg: Option<&str>) -> CommandResult {
|
||||
let prompt = match arg {
|
||||
Some(p) if !p.trim().is_empty() => p.trim().to_string(),
|
||||
_ => {
|
||||
return CommandResult::error(
|
||||
"Usage: /rlm <prompt>\n\n\
|
||||
Process a prompt using a Recursive Language Model (RLM).\n\
|
||||
The prompt is stored in a REPL and the model writes code\n\
|
||||
to decompose and process it recursively."
|
||||
.to_string(),
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
// Sanity-check: RLM is most useful for longer prompts.
|
||||
if prompt.len() < 50 {
|
||||
return CommandResult::message(
|
||||
"Tip: RLM is designed for processing LONG prompts (>100 chars). \
|
||||
For short queries, just type the message directly."
|
||||
.to_string(),
|
||||
);
|
||||
}
|
||||
|
||||
let model = app.model.clone();
|
||||
let child_model = "deepseek-v4-flash".to_string();
|
||||
|
||||
CommandResult::with_message_and_action(
|
||||
format!(
|
||||
"Starting RLM turn (Algorithm 1) for {} chars of prompt using {}...",
|
||||
prompt.len(),
|
||||
model
|
||||
),
|
||||
AppAction::RlmQuery {
|
||||
prompt,
|
||||
model,
|
||||
child_model,
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
/// Get command info by name or alias
|
||||
pub fn get_command_info(name: &str) -> Option<&'static CommandInfo> {
|
||||
let name = name.strip_prefix('/').unwrap_or(name);
|
||||
|
||||
@@ -1311,6 +1311,14 @@ impl Engine {
|
||||
Op::CompactContext => {
|
||||
self.handle_manual_compaction().await;
|
||||
}
|
||||
Op::RlmQuery {
|
||||
content,
|
||||
model,
|
||||
child_model,
|
||||
} => {
|
||||
self.handle_rlm_query(content, model, child_model)
|
||||
.await;
|
||||
}
|
||||
Op::Shutdown => {
|
||||
break;
|
||||
}
|
||||
@@ -1645,6 +1653,94 @@ impl Engine {
|
||||
.await;
|
||||
}
|
||||
|
||||
/// Handle a Recursive Language Model (RLM) query — Algorithm 1 from
|
||||
/// Zhang et al. (arXiv:2512.24601).
|
||||
///
|
||||
/// The prompt is stored as PROMPT in a REPL variable. The root LLM
|
||||
/// only sees metadata about the REPL state, never the prompt text
|
||||
/// directly. The model generates Python code, which is executed by
|
||||
/// the REPL. When FINAL() is called, the loop ends.
|
||||
async fn handle_rlm_query(
|
||||
&mut self,
|
||||
content: String,
|
||||
model: String,
|
||||
child_model: String,
|
||||
) {
|
||||
use crate::rlm::turn::run_rlm_turn;
|
||||
|
||||
let Some(ref client) = self.deepseek_client else {
|
||||
let err = self
|
||||
.deepseek_client_error
|
||||
.as_deref()
|
||||
.map(|s| s.to_string())
|
||||
.unwrap_or_else(|| "API client not configured".to_string());
|
||||
let _ = self
|
||||
.tx_event
|
||||
.send(Event::error(format!("RLM error: {err}"), false))
|
||||
.await;
|
||||
return;
|
||||
};
|
||||
|
||||
let _ = self
|
||||
.tx_event
|
||||
.send(Event::status("RLM turn started (Algorithm 1)".to_string()))
|
||||
.await;
|
||||
|
||||
let result = run_rlm_turn(
|
||||
client,
|
||||
model,
|
||||
content,
|
||||
child_model,
|
||||
self.tx_event.clone(),
|
||||
)
|
||||
.await;
|
||||
|
||||
let has_error = result.error.is_some();
|
||||
if let Some(ref err) = result.error {
|
||||
let _ = self
|
||||
.tx_event
|
||||
.send(Event::error(format!("RLM error: {err}"), true))
|
||||
.await;
|
||||
}
|
||||
|
||||
if !result.answer.is_empty() {
|
||||
// Add the final answer as an assistant message in the session.
|
||||
self.add_session_message(crate::models::Message {
|
||||
role: "assistant".to_string(),
|
||||
content: vec![crate::models::ContentBlock::Text {
|
||||
text: result.answer.clone(),
|
||||
cache_control: None,
|
||||
}],
|
||||
})
|
||||
.await;
|
||||
|
||||
let _ = self
|
||||
.tx_event
|
||||
.send(Event::MessageDelta {
|
||||
index: 0,
|
||||
content: result.answer.clone(),
|
||||
})
|
||||
.await;
|
||||
let _ = self
|
||||
.tx_event
|
||||
.send(Event::MessageComplete { index: 0 })
|
||||
.await;
|
||||
}
|
||||
|
||||
let _ = self
|
||||
.tx_event
|
||||
.send(Event::TurnComplete {
|
||||
usage: result.usage,
|
||||
status: if has_error {
|
||||
crate::core::events::TurnOutcomeStatus::Failed
|
||||
} else {
|
||||
crate::core::events::TurnOutcomeStatus::Completed
|
||||
},
|
||||
error: result.error,
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
fn estimated_input_tokens(&self) -> usize {
|
||||
estimate_input_tokens_conservative(
|
||||
&self.session.messages,
|
||||
|
||||
@@ -65,6 +65,18 @@ pub enum Op {
|
||||
/// Run context compaction immediately.
|
||||
CompactContext,
|
||||
|
||||
/// Run a Recursive Language Model (RLM) turn per Algorithm 1 of
|
||||
/// Zhang et al. (arXiv:2512.24601). The prompt is stored in the REPL
|
||||
/// as the `PROMPT` variable; the root LLM only sees metadata.
|
||||
RlmQuery {
|
||||
/// The user's prompt — stored in REPL, NOT in the LLM context.
|
||||
content: String,
|
||||
/// The model to use for root LLM calls.
|
||||
model: String,
|
||||
/// The model to use for sub-LLM (llm_query) calls.
|
||||
child_model: String,
|
||||
},
|
||||
|
||||
/// Shutdown the engine
|
||||
Shutdown,
|
||||
}
|
||||
|
||||
@@ -37,6 +37,7 @@ mod project_context;
|
||||
mod project_doc;
|
||||
mod prompts;
|
||||
pub mod repl;
|
||||
pub mod rlm;
|
||||
mod responses_api_proxy;
|
||||
mod runtime_api;
|
||||
mod runtime_threads;
|
||||
|
||||
@@ -137,9 +137,8 @@ impl PythonRuntime {
|
||||
})
|
||||
}
|
||||
|
||||
/// Create with a specific state path (for testing).
|
||||
#[cfg(test)]
|
||||
pub(crate) fn with_state_path(path: PathBuf) -> Self {
|
||||
/// Create with a specific state path (for testing / RLM integration).
|
||||
pub fn with_state_path(path: PathBuf) -> Self {
|
||||
Self {
|
||||
state_path: path,
|
||||
stdout_limit: DEFAULT_STDOUT_LIMIT,
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
//! True Recursive Language Model (RLM) loop — paper-spec Algorithm 1.
|
||||
//!
|
||||
//! Implements the RLM inference paradigm from Zhang, Kraska, Khattab
|
||||
//! (arXiv:2512.24601, §2 Algorithm 1):
|
||||
//!
|
||||
//! ```text
|
||||
//! state ← InitREPL(prompt=P)
|
||||
//! state ← AddFunction(state, sub_RLM)
|
||||
//! hist ← [Metadata(state)]
|
||||
//! while True:
|
||||
//! code ← LLM(hist)
|
||||
//! (state, stdout) ← REPL(state, code)
|
||||
//! hist ← hist ∥ code ∥ Metadata(stdout)
|
||||
//! if state[Final] is set:
|
||||
//! return state[Final]
|
||||
//! ```
|
||||
//!
|
||||
//! Key departure from our previous "RLM-inspired" approach:
|
||||
//! - P is stored as a REPL variable, NEVER in the LLM's context window
|
||||
//! - Only metadata about state/stdout goes to the LLM — constant-size context
|
||||
//! - The LLM generates Python code, not free text
|
||||
//! - Recursion happens via llm_query() inside the code, not as tool calls
|
||||
//!
|
||||
//! ## Architecture
|
||||
//!
|
||||
//! The RLM loop is a standalone async function that the engine calls from
|
||||
//! its event loop when it receives an `Op::RlmQuery`. It:
|
||||
//! 1. Initialises a PythonRuntime with the prompt stored as `PROMPT`
|
||||
//! 2. Builds a metadata-only context describing REPL state
|
||||
//! 3. Calls the root LLM to generate code
|
||||
//! 4. Executes the code in the REPL
|
||||
//! 5. Checks for FINAL — if found, returns it
|
||||
//! 6. Otherwise, feeds code + truncated stdout metadata back, loops
|
||||
|
||||
pub mod prompt;
|
||||
pub mod turn;
|
||||
|
||||
pub use prompt::rlm_system_prompt;
|
||||
pub use turn::run_rlm_turn;
|
||||
@@ -0,0 +1,126 @@
|
||||
//! RLM system prompt — teaches the model to write code and use the REPL
|
||||
//! per Algorithm 1 of Zhang et al. (arXiv:2512.24601).
|
||||
|
||||
use crate::models::SystemPrompt;
|
||||
|
||||
/// Build the system prompt for a Recursive Language Model (RLM) root LLM call.
|
||||
///
|
||||
/// This prompt instructs the root LLM to generate Python code that
|
||||
/// manipulates the `PROMPT` variable in the REPL environment, using
|
||||
/// `llm_query()` for recursive sub-calls and `FINAL()` to return the
|
||||
/// final answer.
|
||||
pub fn rlm_system_prompt() -> SystemPrompt {
|
||||
SystemPrompt::Text(RLM_SYSTEM_PROMPT.trim().to_string())
|
||||
}
|
||||
|
||||
const RLM_SYSTEM_PROMPT: &str = r#"You are a Recursive Language Model (RLM).
|
||||
|
||||
Your job is to process the user's prompt by writing Python code. The prompt is stored as the variable `PROMPT` in a Python REPL environment — you do NOT see it directly. You must inspect and process it programmatically.
|
||||
|
||||
## REPL Environment
|
||||
|
||||
The Python REPL starts each round with persistent state. Use these functions:
|
||||
|
||||
- `repl_get("PROMPT")` — Returns the full user prompt string.
|
||||
- `repl_set(name, value)` — Stores a variable for future rounds.
|
||||
- `repl_get(name)` — Retrieves a previously stored variable.
|
||||
- `llm_query(prompt, model=None, max_tokens=None)` — Calls a sub-LLM with a
|
||||
new prompt and returns the response text. Use this for complex processing
|
||||
that requires an LLM — the sub-LLM is fast (deepseek-v4-flash) and runs
|
||||
with its own REPL context.
|
||||
- `FINAL(value)` — Sets the final answer and ends the RLM loop. Call this
|
||||
when you have the complete answer.
|
||||
|
||||
## How to operate
|
||||
|
||||
1. PREVIEW the prompt first:
|
||||
```python
|
||||
text = repl_get("PROMPT")
|
||||
print(f"Length: {len(text)}")
|
||||
print(text[:500]) # First 500 chars
|
||||
```
|
||||
|
||||
2. DECOMPOSE the task into chunks. For long prompts, process parts
|
||||
independently using llm_query() for each chunk:
|
||||
```python
|
||||
text = repl_get("PROMPT")
|
||||
chunk_size = 2000
|
||||
results = []
|
||||
for i in range(0, len(text), chunk_size):
|
||||
chunk = text[i:i+chunk_size]
|
||||
result = llm_query(f"Process this part: {chunk}")
|
||||
results.append(result)
|
||||
```
|
||||
|
||||
3. COMBINE results and call FINAL:
|
||||
```python
|
||||
combined = "\n".join(results)
|
||||
FINAL(combined)
|
||||
```
|
||||
|
||||
## Rules
|
||||
|
||||
- You MUST output Python code inside ```python blocks.
|
||||
- Only code inside ```python fences is executed. You can add commentary
|
||||
outside the fences.
|
||||
- The PROMPT variable may be very large (millions of characters). Do not
|
||||
print it in full — always truncate to a preview.
|
||||
- Use llm_query() for heavy lifting — it calls a sub-LLM that can process
|
||||
snippets autonomously.
|
||||
- Previous code and stdout summaries are shown in the conversation history.
|
||||
Build on them rather than repeating work.
|
||||
- Set `FINAL(value)` when you have the complete answer. The RLM loop ends
|
||||
immediately.
|
||||
- If you don't need the REPL and want to return a direct answer, just
|
||||
write a short response without code fences and the RLM loop will end.
|
||||
|
||||
## Strategy hints
|
||||
|
||||
- For code analysis: print structure, use llm_query for deeper understanding
|
||||
- For long document processing: chunk the PROMPT, process each chunk via
|
||||
llm_query, then aggregate results
|
||||
- For research tasks: decompose the question, query sub-parts, synthesize
|
||||
- For iterative tasks: set intermediate results with repl_set, retrieve
|
||||
them across rounds
|
||||
"#;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn rlm_prompt_is_not_empty() {
|
||||
let prompt = rlm_system_prompt();
|
||||
match prompt {
|
||||
SystemPrompt::Text(text) => assert!(!text.is_empty()),
|
||||
_ => panic!("expected Text"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rlm_prompt_mentions_llm_query() {
|
||||
let prompt = rlm_system_prompt();
|
||||
match prompt {
|
||||
SystemPrompt::Text(text) => assert!(text.contains("llm_query")),
|
||||
_ => panic!("expected Text"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rlm_prompt_mentions_final() {
|
||||
let prompt = rlm_system_prompt();
|
||||
match prompt {
|
||||
SystemPrompt::Text(text) => assert!(text.contains("FINAL")),
|
||||
_ => panic!("expected Text"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rlm_prompt_mentions_python_fence() {
|
||||
let prompt = rlm_system_prompt();
|
||||
match prompt {
|
||||
SystemPrompt::Text(text) => assert!(text.contains("```python")),
|
||||
_ => panic!("expected Text"),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,587 @@
|
||||
//! True RLM turn loop — Algorithm 1 from Zhang et al. (arXiv:2512.24601).
|
||||
//!
|
||||
//! # Algorithm
|
||||
//!
|
||||
//! ```text
|
||||
//! state ← InitREPL(prompt=P)
|
||||
//! state ← AddFunction(state, sub_RLM)
|
||||
//! hist ← [Metadata(state)]
|
||||
//! while True:
|
||||
//! code ← LLM(hist)
|
||||
//! (state, stdout) ← REPL(state, code)
|
||||
//! hist ← hist ∥ code ∥ Metadata(stdout)
|
||||
//! if state[Final] is set:
|
||||
//! return state[Final]
|
||||
//! ```
|
||||
//!
|
||||
//! Key invariants:
|
||||
//! 1. P is stored as `PROMPT` in the REPL — NEVER in the LLM context
|
||||
//! 2. Only metadata (length, preview, variable names) goes to LLM context
|
||||
//! 3. The LLM writes Python code, executed by the REPL
|
||||
//! 4. The REPL provides `llm_query()` for recursive sub-calls
|
||||
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use serde_json::json;
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
use crate::client::DeepSeekClient;
|
||||
use crate::core::events::Event;
|
||||
use crate::llm_client::LlmClient;
|
||||
use crate::models::{
|
||||
ContentBlock, Message, MessageRequest, Usage,
|
||||
};
|
||||
use crate::repl::runtime::PythonRuntime;
|
||||
use crate::repl::sandbox::parse_final;
|
||||
|
||||
use super::prompt::rlm_system_prompt;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Maximum number of RLM iterations before the loop gives up.
|
||||
const MAX_RLM_ITERATIONS: u32 = 25;
|
||||
|
||||
/// Max output tokens for the root LLM — just needs to generate code, not
|
||||
/// the full answer.
|
||||
const ROOT_MAX_TOKENS: u32 = 4096;
|
||||
|
||||
/// Max chars of stdout shown as metadata to the root LLM in next iteration.
|
||||
/// Matches the paper's "only metadata about stdout" constraint.
|
||||
const STDOUT_METADATA_PREVIEW_LEN: usize = 800;
|
||||
|
||||
/// Max chars of PROMPT shown as preview in metadata.
|
||||
const PROMPT_PREVIEW_LEN: usize = 500;
|
||||
|
||||
/// Temperature for root LLM calls. Low to keep code generation focused.
|
||||
const ROOM_TEMPERATURE: f32 = 0.3;
|
||||
|
||||
/// Per-iteration timeout for the entire LLM+REPL round.
|
||||
const ROUND_TIMEOUT: Duration = Duration::from_secs(180);
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Public API
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Result of an RLM turn.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RlmTurnResult {
|
||||
/// The final answer (from FINAL(), or the model's raw text if no code).
|
||||
pub answer: String,
|
||||
/// Number of iterations used.
|
||||
pub iterations: u32,
|
||||
/// Total wall-clock duration.
|
||||
pub duration: Duration,
|
||||
/// Error message if the turn failed.
|
||||
pub error: Option<String>,
|
||||
/// Usage from the root LLM calls (total across iterations).
|
||||
pub usage: Usage,
|
||||
}
|
||||
|
||||
/// Run a full RLM turn per Algorithm 1.
|
||||
///
|
||||
/// The user's `prompt` is stored as `PROMPT` in the REPL and never placed
|
||||
/// into the LLM's context window. The LLM receives only metadata about the
|
||||
/// REPL state and generates code, which is then executed. When `FINAL()` is
|
||||
/// called inside the code, the loop ends and the value is returned.
|
||||
pub async fn run_rlm_turn(
|
||||
client: &DeepSeekClient,
|
||||
model: String,
|
||||
prompt: String,
|
||||
_child_model: String,
|
||||
tx_event: mpsc::Sender<Event>,
|
||||
) -> RlmTurnResult {
|
||||
let start = Instant::now();
|
||||
let mut total_usage = Usage::default();
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// 1. Initialise REPL with PROMPT variable
|
||||
// ------------------------------------------------------------------
|
||||
let state_dir = std::env::temp_dir().join("deepseek_rlm");
|
||||
let _ = std::fs::create_dir_all(&state_dir);
|
||||
let state_path = state_dir.join(format!("rlm_{}.json", uuid::Uuid::new_v4()));
|
||||
|
||||
// Write PROMPT into the REPL state before the REPL even starts.
|
||||
let initial_vars = json!({"PROMPT": &prompt});
|
||||
if let Err(e) = std::fs::write(&state_path, serde_json::to_string(&initial_vars).unwrap()) {
|
||||
return RlmTurnResult {
|
||||
answer: String::new(),
|
||||
iterations: 0,
|
||||
duration: start.elapsed(),
|
||||
error: Some(format!("Failed to write REPL state: {e}")),
|
||||
usage: total_usage,
|
||||
};
|
||||
}
|
||||
|
||||
let mut repl = PythonRuntime::with_state_path(state_path.clone());
|
||||
|
||||
let _ = tx_event
|
||||
.send(Event::status("RLM: REPL initialised with PROMPT variable".to_string()))
|
||||
.await;
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// 2. Build metadata-only conversation history
|
||||
// ------------------------------------------------------------------
|
||||
let system = rlm_system_prompt();
|
||||
let metadata_msg = build_metadata_message(&prompt, 0, None, None);
|
||||
|
||||
// The conversation history for the root LLM contains ONLY:
|
||||
// - Metadata(state) — initial
|
||||
// - code (assistant) + Metadata(stdout) (user) — for each iteration
|
||||
// This keeps the root LLM context constant-size regardless of PROMPT size.
|
||||
let mut messages: Vec<Message> = vec![metadata_msg];
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// 3. RLM loop (Algorithm 1)
|
||||
// ------------------------------------------------------------------
|
||||
for iteration in 0..MAX_RLM_ITERATIONS {
|
||||
if start.elapsed() > ROUND_TIMEOUT {
|
||||
return RlmTurnResult {
|
||||
answer: String::new(),
|
||||
iterations: iteration,
|
||||
duration: start.elapsed(),
|
||||
error: Some(format!("RLM turn timed out after {}s", ROUND_TIMEOUT.as_secs())),
|
||||
usage: total_usage,
|
||||
};
|
||||
}
|
||||
|
||||
let _ = tx_event
|
||||
.send(Event::status(format!("RLM iteration {}/{}", iteration + 1, MAX_RLM_ITERATIONS)))
|
||||
.await;
|
||||
|
||||
// 3a. LLM generates code from metadata-only context
|
||||
let request = MessageRequest {
|
||||
model: model.clone(),
|
||||
messages: messages.clone(),
|
||||
max_tokens: ROOT_MAX_TOKENS,
|
||||
system: Some(system.clone()),
|
||||
tools: None,
|
||||
tool_choice: None,
|
||||
metadata: None,
|
||||
thinking: None,
|
||||
reasoning_effort: None,
|
||||
stream: Some(false),
|
||||
temperature: Some(ROOM_TEMPERATURE),
|
||||
top_p: Some(0.9_f32),
|
||||
};
|
||||
|
||||
let response = match client.create_message(request).await {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
return RlmTurnResult {
|
||||
answer: String::new(),
|
||||
iterations: iteration + 1,
|
||||
duration: start.elapsed(),
|
||||
error: Some(format!("Root LLM call failed: {e}")),
|
||||
usage: total_usage,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
// Accumulate usage
|
||||
total_usage.input_tokens = total_usage.input_tokens.saturating_add(response.usage.input_tokens);
|
||||
total_usage.output_tokens = total_usage.output_tokens.saturating_add(response.usage.output_tokens);
|
||||
|
||||
// Extract text from response
|
||||
let response_text = extract_text_blocks(&response.content);
|
||||
|
||||
let _ = tx_event
|
||||
.send(Event::MessageDelta {
|
||||
index: iteration as usize,
|
||||
content: format!("\n[RLM iteration {}]\n", iteration + 1),
|
||||
})
|
||||
.await;
|
||||
|
||||
// 3b. Extract Python code from the response
|
||||
let code = extract_python_code(&response_text);
|
||||
|
||||
let (code_to_run, _is_direct_answer) = match code {
|
||||
Some(c) => (c, false),
|
||||
None => {
|
||||
// No code block — the model gave a direct text answer.
|
||||
// This is a valid exit: the model decided it doesn't need
|
||||
// the REPL and is returning a final answer directly.
|
||||
let _ = tx_event
|
||||
.send(Event::MessageDelta {
|
||||
index: iteration as usize,
|
||||
content: response_text.clone(),
|
||||
})
|
||||
.await;
|
||||
return RlmTurnResult {
|
||||
answer: response_text,
|
||||
iterations: iteration + 1,
|
||||
duration: start.elapsed(),
|
||||
error: None,
|
||||
usage: total_usage,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
let _ = tx_event
|
||||
.send(Event::MessageDelta {
|
||||
index: iteration as usize,
|
||||
content: format!("```python\n{code_to_run}\n```\n"),
|
||||
})
|
||||
.await;
|
||||
|
||||
// 3c. Execute code in REPL
|
||||
let round = match repl.execute(&code_to_run).await {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
let _ = tx_event
|
||||
.send(Event::status(format!("RLM REPL error: {e}")))
|
||||
.await;
|
||||
return RlmTurnResult {
|
||||
answer: String::new(),
|
||||
iterations: iteration + 1,
|
||||
duration: start.elapsed(),
|
||||
error: Some(format!("REPL execution failed: {e}")),
|
||||
usage: total_usage,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
// 3d. Check for FINAL
|
||||
if let Some(final_val) = &round.final_value {
|
||||
let _ = tx_event
|
||||
.send(Event::status("RLM: FINAL detected, ending loop".to_string()))
|
||||
.await;
|
||||
return RlmTurnResult {
|
||||
answer: final_val.clone(),
|
||||
iterations: iteration + 1,
|
||||
duration: start.elapsed(),
|
||||
error: None,
|
||||
usage: total_usage,
|
||||
};
|
||||
}
|
||||
|
||||
// Also check raw stdout for FINAL (in case the parse missed it)
|
||||
let (_cleaned, raw_final) = parse_final(&round.full_stdout);
|
||||
if let Some(final_val) = raw_final {
|
||||
let _ = tx_event
|
||||
.send(Event::status("RLM: FINAL detected (raw parse), ending loop".to_string()))
|
||||
.await;
|
||||
return RlmTurnResult {
|
||||
answer: final_val,
|
||||
iterations: iteration + 1,
|
||||
duration: start.elapsed(),
|
||||
error: None,
|
||||
usage: total_usage,
|
||||
};
|
||||
}
|
||||
|
||||
// 3e. Build metadata for next iteration and append to history
|
||||
// hist ← hist ∥ code ∥ Metadata(stdout)
|
||||
let stdout_display = if round.stdout.is_empty() && !round.stderr.is_empty() {
|
||||
format!("[stderr]\n{}", truncate_text(&round.stderr, STDOUT_METADATA_PREVIEW_LEN))
|
||||
} else {
|
||||
truncate_text(&round.stdout, STDOUT_METADATA_PREVIEW_LEN)
|
||||
};
|
||||
|
||||
// Assistant message: the code the model wrote
|
||||
messages.push(Message {
|
||||
role: "assistant".to_string(),
|
||||
content: vec![ContentBlock::Text {
|
||||
text: format!("```python\n{code_to_run}\n```"),
|
||||
cache_control: None,
|
||||
}],
|
||||
});
|
||||
|
||||
// User message: metadata about stdout + current REPL state
|
||||
let next_metadata = build_metadata_message(&prompt, iteration + 1, Some(&code_to_run), Some(&stdout_display));
|
||||
messages.push(next_metadata);
|
||||
|
||||
// Emit stdout preview as a status update
|
||||
let _ = tx_event
|
||||
.send(Event::status(format!(
|
||||
"REPL round {}: {} bytes output{}",
|
||||
iteration + 1,
|
||||
round.full_stdout.len(),
|
||||
if round.has_error { " (error)" } else { "" },
|
||||
)))
|
||||
.await;
|
||||
|
||||
// Limit the messages vector to prevent unbounded growth.
|
||||
// Keep at most 10 metadata+code pairs (the context is already small
|
||||
// since each is just metadata, but we should still bound it).
|
||||
// The paper's Algorithm 1 only trims per-iteration tokens, not
|
||||
// iterations themselves, but we add this as a practical guard.
|
||||
const MAX_HISTORY_PAIRS: usize = 20; // 10 iterations × 2 messages each
|
||||
if messages.len() > MAX_HISTORY_PAIRS {
|
||||
// Remove oldest pair but keep the first metadata message.
|
||||
let mut kept = vec![messages[0].clone()];
|
||||
kept.extend(messages.drain(messages.len() - MAX_HISTORY_PAIRS + 1..));
|
||||
messages = kept;
|
||||
}
|
||||
}
|
||||
|
||||
// Loop exhausted without FINAL
|
||||
RlmTurnResult {
|
||||
answer: String::new(),
|
||||
iterations: MAX_RLM_ITERATIONS,
|
||||
duration: start.elapsed(),
|
||||
error: Some(format!(
|
||||
"RLM loop exhausted after {MAX_RLM_ITERATIONS} iterations without FINAL"
|
||||
)),
|
||||
usage: total_usage,
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Build a metadata message describing the current REPL state.
|
||||
///
|
||||
/// This is what the paper calls `Metadata(state)` — it contains:
|
||||
/// - Length of PROMPT (not the content itself)
|
||||
/// - A short preview of PROMPT
|
||||
/// - Current iteration number
|
||||
/// - Previous code (if any)
|
||||
/// - Previous stdout summary (if any)
|
||||
fn build_metadata_message(
|
||||
prompt: &str,
|
||||
iteration: u32,
|
||||
previous_code: Option<&str>,
|
||||
previous_stdout: Option<&str>,
|
||||
) -> Message {
|
||||
let prompt_len = prompt.len();
|
||||
let prompt_preview = truncate_text(prompt, PROMPT_PREVIEW_LEN);
|
||||
|
||||
let mut parts = Vec::new();
|
||||
|
||||
parts.push(format!("## REPL State (Round {iteration})"));
|
||||
parts.push(String::new());
|
||||
parts.push(format!("**PROMPT** — stored as REPL variable `PROMPT`"));
|
||||
parts.push(format!("- Length: {prompt_len} characters"));
|
||||
parts.push(format!("- Preview: \"{prompt_preview}\""));
|
||||
parts.push(String::new());
|
||||
|
||||
if iteration > 0 {
|
||||
parts.push("**Previous Round**".to_string());
|
||||
if let Some(code) = previous_code {
|
||||
// Only show the first/last lines as metadata
|
||||
let code_lines: Vec<&str> = code.lines().collect();
|
||||
let code_summary = if code_lines.len() > 8 {
|
||||
let first_few: Vec<&str> = code_lines.iter().take(4).copied().collect();
|
||||
let last_few: Vec<&str> = code_lines.iter().rev().take(3).rev().copied().collect();
|
||||
format!(
|
||||
"{} lines: {} ... {}",
|
||||
code_lines.len(),
|
||||
first_few.join("\n"),
|
||||
last_few.join("\n")
|
||||
)
|
||||
} else {
|
||||
code.to_string()
|
||||
};
|
||||
parts.push(format!("- Code: {code_summary}"));
|
||||
}
|
||||
if let Some(stdout) = previous_stdout {
|
||||
// Only show truncated stdout
|
||||
let stdout_clean = stdout.trim();
|
||||
if !stdout_clean.is_empty() {
|
||||
parts.push(format!("- Stdout preview: \"{stdout_clean}\""));
|
||||
} else {
|
||||
parts.push("- Stdout: (empty)".to_string());
|
||||
}
|
||||
}
|
||||
parts.push(String::new());
|
||||
}
|
||||
|
||||
parts.push("**Available functions**: `repl_get()`, `repl_set()`, `llm_query(prompt)`".to_string());
|
||||
parts.push("**End the loop with**: `FINAL(value)`".to_string());
|
||||
|
||||
let text = parts.join("\n");
|
||||
|
||||
Message {
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentBlock::Text {
|
||||
text,
|
||||
cache_control: None,
|
||||
}],
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract text from content blocks, joining all text blocks together.
|
||||
fn extract_text_blocks(blocks: &[ContentBlock]) -> String {
|
||||
blocks
|
||||
.iter()
|
||||
.filter_map(|b| match b {
|
||||
ContentBlock::Text { text, .. } => Some(text.as_str()),
|
||||
_ => None,
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n")
|
||||
}
|
||||
|
||||
/// Extract the first ```python code block from text.
|
||||
/// Returns `None` if no python fence is found.
|
||||
fn extract_python_code(text: &str) -> Option<String> {
|
||||
// Look for ```python or ```py
|
||||
let start_markers = ["```python\n", "```py\n", "```python\r\n", "```py\r\n"];
|
||||
let mut best_start: Option<(usize, &str)> = None;
|
||||
|
||||
for marker in &start_markers {
|
||||
if let Some(idx) = text.find(marker) {
|
||||
let end_pos = idx + marker.len();
|
||||
match best_start {
|
||||
Some((best_idx, _)) if idx < best_idx => {
|
||||
best_start = Some((idx, &text[end_pos..]));
|
||||
}
|
||||
None => {
|
||||
best_start = Some((idx, &text[end_pos..]));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let after_fence = best_start.map(|(_, rest)| rest)?;
|
||||
|
||||
// Find the closing ```
|
||||
let end_idx = after_fence.find("\n```").or_else(|| after_fence.find("```"))?;
|
||||
|
||||
let code = after_fence[..end_idx].trim().to_string();
|
||||
if code.is_empty() {
|
||||
return None;
|
||||
}
|
||||
Some(code)
|
||||
}
|
||||
|
||||
/// Truncate text to `max_chars`, adding an ellipsis if truncated.
|
||||
fn truncate_text(text: &str, max_chars: usize) -> String {
|
||||
if text.len() <= max_chars {
|
||||
return text.to_string();
|
||||
}
|
||||
let take = max_chars.saturating_sub(3);
|
||||
let mut result: String = text.chars().take(take).collect();
|
||||
result.push_str("...");
|
||||
result
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn extract_python_code_finds_simple_block() {
|
||||
let text = "Here's some code:\n```python\nprint('hello')\n```\nEnd.";
|
||||
let code = extract_python_code(text).unwrap();
|
||||
assert_eq!(code, "print('hello')");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_python_code_finds_short_marker() {
|
||||
let text = "Code:\n```py\nx = 1 + 2\n```";
|
||||
let code = extract_python_code(text).unwrap();
|
||||
assert_eq!(code, "x = 1 + 2");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_python_code_returns_none_when_missing() {
|
||||
let text = "Just some text without code fences.";
|
||||
assert!(extract_python_code(text).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_python_code_returns_none_on_empty_block() {
|
||||
let text = "Code:\n```python\n\n```";
|
||||
assert!(extract_python_code(text).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_python_code_handles_multiple_blocks() {
|
||||
let text = "First:\n```python\na=1\n```\nSecond:\n```python\nb=2\n```";
|
||||
let code = extract_python_code(text).unwrap();
|
||||
assert_eq!(code, "a=1"); // Returns first block
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_python_code_ignores_other_fences() {
|
||||
let text = "```\nsome text\n```\nActual:\n```python\nreal_code()\n```";
|
||||
let code = extract_python_code(text).unwrap();
|
||||
assert_eq!(code, "real_code()");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn build_metadata_contains_key_information() {
|
||||
let prompt = "Hello, world!";
|
||||
let msg = build_metadata_message(prompt, 0, None, None);
|
||||
let text = extract_text_blocks(&msg.content);
|
||||
assert!(text.contains("PROMPT"));
|
||||
assert!(text.contains("Hello, world!"));
|
||||
assert!(text.contains("Round 0"));
|
||||
assert!(text.contains("llm_query"));
|
||||
assert!(text.contains("FINAL"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn build_metadata_with_iteration_shows_previous_code() {
|
||||
let prompt = "Test prompt";
|
||||
let msg = build_metadata_message(prompt, 3, Some("print('hi')"), Some("hi"));
|
||||
let text = extract_text_blocks(&msg.content);
|
||||
assert!(text.contains("Round 3"));
|
||||
assert!(text.contains("print('hi')"));
|
||||
assert!(text.contains("hi"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_text_leaves_short_text_alone() {
|
||||
assert_eq!(truncate_text("hello", 100), "hello");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_text_shortens_long_text() {
|
||||
let long = "a".repeat(1000);
|
||||
let truncated = truncate_text(&long, 10);
|
||||
// 7 chars of 'a' + "..." = 10 chars/bytes total
|
||||
assert_eq!(truncated.len(), 10);
|
||||
assert!(truncated.ends_with("..."));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_text_blocks_joins_text_blocks() {
|
||||
let blocks = vec![
|
||||
ContentBlock::Text {
|
||||
text: "first".to_string(),
|
||||
cache_control: None,
|
||||
},
|
||||
ContentBlock::Thinking {
|
||||
thinking: "skip".to_string(),
|
||||
},
|
||||
ContentBlock::Text {
|
||||
text: "second".to_string(),
|
||||
cache_control: None,
|
||||
},
|
||||
];
|
||||
assert_eq!(extract_text_blocks(&blocks), "first\nsecond");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_text_blocks_returns_empty_on_no_text() {
|
||||
let blocks = vec![ContentBlock::Thinking {
|
||||
thinking: "only thinking".to_string(),
|
||||
}];
|
||||
assert_eq!(extract_text_blocks(&blocks), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn metadata_msg_role_is_user() {
|
||||
let msg = build_metadata_message("test", 0, None, None);
|
||||
assert_eq!(msg.role, "user");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn metadata_with_previous_code_shows_code_summary() {
|
||||
let msg = build_metadata_message("test", 2, Some("for i in range(10):\n print(i)"), Some("0\n1\n2"));
|
||||
let text = extract_text_blocks(&msg.content);
|
||||
assert!(text.contains("Round 2"));
|
||||
assert!(text.contains("for i"));
|
||||
assert!(text.contains("0\n1\n2"));
|
||||
}
|
||||
}
|
||||
@@ -1922,7 +1922,19 @@ pub enum AppAction {
|
||||
OpenConfigView,
|
||||
/// Open the `/model` two-pane picker (Pro/Flash + Off/High/Max).
|
||||
OpenModelPicker,
|
||||
/// Send a message to the AI (normal chat mode).
|
||||
SendMessage(String),
|
||||
/// Run a Recursive Language Model (RLM) turn — Algorithm 1 from
|
||||
/// Zhang et al. (arXiv:2512.24601). The prompt is stored in the REPL;
|
||||
/// the root LLM only sees metadata.
|
||||
RlmQuery {
|
||||
/// The user's prompt — stored in REPL, NOT in LLM context.
|
||||
prompt: String,
|
||||
/// Model for the root LLM.
|
||||
model: String,
|
||||
/// Model for sub-LLM (llm_query) calls.
|
||||
child_model: String,
|
||||
},
|
||||
ListSubAgents,
|
||||
FetchModels,
|
||||
/// Switch the active LLM backend (DeepSeek vs NVIDIA NIM) without
|
||||
|
||||
@@ -2337,6 +2337,20 @@ async fn apply_command_result(
|
||||
let queued = build_queued_message(app, content);
|
||||
submit_or_steer_message(app, engine_handle, queued).await?;
|
||||
}
|
||||
AppAction::RlmQuery {
|
||||
prompt,
|
||||
model,
|
||||
child_model,
|
||||
} => {
|
||||
app.status_message = Some("RLM turn starting (Algorithm 1)...".to_string());
|
||||
let _ = engine_handle
|
||||
.send(Op::RlmQuery {
|
||||
content: prompt,
|
||||
model,
|
||||
child_model,
|
||||
})
|
||||
.await;
|
||||
}
|
||||
AppAction::ListSubAgents => {
|
||||
let _ = engine_handle.send(Op::ListSubAgents).await;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user