42c684367f
Adds the true Recursive Language Model (RLM) inference paradigm: - rlm/mod.rs — module root with public API - rlm/prompt.rs — RLM system prompt teaching the model to write code - rlm/turn.rs — Algorithm 1 implementation: - P stored as REPL variable (NEVER in LLM context window) - Metadata-only context sent to root LLM (constant-size) - LLM generates Python code, not free text - Code executed in PythonRuntime with llm_query() for recursion - FINAL() detection ends the loop - Op::RlmQuery variant in ops.rs - /rlm command in the command system - AppAction::RlmQuery handler in ui.rs - PythonRuntime::with_state_path made public for RLM integration - 18 new unit tests for code extraction, metadata building, truncation Key differences from previous 'RLM-inspired' approach: ✅ P is external (REPL variable), not in LLM context ✅ Only metadata(state) in LLM context (constant-size) ✅ LLM generates code, not free text + tool calls ✅ sub-LLM recursion via llm_query() inside REPL code ✅ FINAL() mechanism for programmatic termination
329 lines
10 KiB
Rust
329 lines
10 KiB
Rust
//! Python sandbox runtime for the REPL.
|
|
//!
|
|
//! Each code-execution round spawns a fresh `python3` process with all
|
|
//! state loaded from / saved to a JSON file. This is simpler and more
|
|
//! robust than trying to manage a long-lived subprocess with async
|
|
//! stdout re-attachment.
|
|
//!
|
|
//! State persistence across rounds:
|
|
//! - `_repl_vars` dict is serialized to a JSON file after each round
|
|
//! - The next round reads it back before executing new code
|
|
//! - This matches the paper's "persistent variable store" design
|
|
|
|
use std::path::PathBuf;
|
|
use std::time::{Duration, Instant};
|
|
|
|
use tokio::process::Command;
|
|
|
|
use super::sandbox::parse_final;
|
|
|
|
/// Python REPL runtime — executes code blocks in isolated processes
|
|
/// with persistent variable state via a JSON state file.
|
|
#[derive(Debug, Clone)]
|
|
pub struct PythonRuntime {
|
|
/// Path to the state file for variable persistence.
|
|
state_path: PathBuf,
|
|
/// Max bytes of stdout to return per round.
|
|
stdout_limit: usize,
|
|
/// Total rounds executed.
|
|
round_count: u64,
|
|
/// When the runtime was created.
|
|
started: Instant,
|
|
}
|
|
|
|
/// Result of executing one code block.
|
|
#[derive(Debug, Clone)]
|
|
pub struct ReplRound {
|
|
/// Truncated stdout (for LLM feedback — paper's "metadata only").
|
|
pub stdout: String,
|
|
/// Full stdout (for debugging).
|
|
pub full_stdout: String,
|
|
/// Stderr from this round.
|
|
pub stderr: String,
|
|
/// Whether the code raised an unhandled Python exception.
|
|
pub has_error: bool,
|
|
/// If a FINAL(answer) or FINAL_VAR(var) was detected.
|
|
pub final_value: Option<String>,
|
|
/// Wall-clock duration.
|
|
pub elapsed: Duration,
|
|
}
|
|
|
|
const DEFAULT_STDOUT_LIMIT: usize = 8_192;
|
|
const ROUND_TIMEOUT: Duration = Duration::from_secs(120);
|
|
|
|
/// Python bootstrap — loaded at the top of every execution round.
|
|
/// Provides `llm_query()`, `FINAL()`, `FINAL_VAR()`, `repl_get/set`,
|
|
/// and loads/saves the persistent variable state.
|
|
const PYTHON_BOOTSTRAP: &str = r#"
|
|
import sys, json, os
|
|
|
|
# --- Persistent variable store ---
|
|
_repl_vars = {}
|
|
_STATE_FILE = os.environ.get('REPL_STATE_FILE', '')
|
|
if _STATE_FILE and os.path.exists(_STATE_FILE):
|
|
try:
|
|
with open(_STATE_FILE, 'r') as f:
|
|
_repl_vars = json.load(f)
|
|
except:
|
|
pass
|
|
|
|
# --- llm_query function ---
|
|
# This is a stub that calls back to Rust via a side-channel.
|
|
# The Rust side writes a _llm_query_result to the state file
|
|
# after this process writes its request.
|
|
def llm_query(prompt, model=None, max_tokens=None):
|
|
"""Query a sub-LLM. Writes request to stdout; Rust reads it and
|
|
writes result to a result file."""
|
|
request = {
|
|
'prompt': str(prompt),
|
|
'model': model,
|
|
'max_tokens': max_tokens,
|
|
}
|
|
# Signal to Rust that we want an LLM query.
|
|
print(f'__REPL_LLM_QUERY__::{json.dumps(request)}', flush=True)
|
|
# Rust will inject the result. For now, return a stub.
|
|
return f'[llm_query stub: {str(prompt)[:100]}...]'
|
|
|
|
# --- FINAL / FINAL_VAR ---
|
|
def FINAL(value):
|
|
"""Signal the REPL to stop with this final answer."""
|
|
print(f'__REPL_FINAL__::{json.dumps(str(value))}', flush=True)
|
|
|
|
def FINAL_VAR(name):
|
|
"""Signal the REPL to stop, returning the named variable."""
|
|
val = _repl_vars.get(str(name), f'<variable {name!r} not found>')
|
|
print(f'__REPL_FINAL__::{json.dumps(str(val))}', flush=True)
|
|
|
|
# --- State helpers ---
|
|
def repl_get(name, default=None):
|
|
return _repl_vars.get(str(name), default)
|
|
|
|
def repl_set(name, value):
|
|
_repl_vars[str(name)] = value
|
|
|
|
# --- Save state after execution ---
|
|
def _save_state():
|
|
if _STATE_FILE:
|
|
try:
|
|
with open(_STATE_FILE, 'w') as f:
|
|
json.dump(_repl_vars, f)
|
|
except:
|
|
pass
|
|
|
|
# Import commonly needed modules
|
|
import re as _re
|
|
"#;
|
|
|
|
/// Code suffix — appended after user code to save state.
|
|
const PYTHON_SUFFIX: &str = r#"
|
|
# --- Save state after execution ---
|
|
_save_state()
|
|
"#;
|
|
|
|
impl PythonRuntime {
|
|
/// Create a new Python REPL runtime.
|
|
pub async fn new() -> Result<Self, String> {
|
|
let dir = std::env::temp_dir().join("deepseek_repl");
|
|
std::fs::create_dir_all(&dir)
|
|
.map_err(|e| format!("Failed to create REPL temp dir: {e}"))?;
|
|
|
|
let state_path = dir.join(format!("state_{}.json", std::process::id()));
|
|
|
|
Ok(Self {
|
|
state_path,
|
|
stdout_limit: DEFAULT_STDOUT_LIMIT,
|
|
round_count: 0,
|
|
started: Instant::now(),
|
|
})
|
|
}
|
|
|
|
/// Create with a specific state path (for testing / RLM integration).
|
|
pub fn with_state_path(path: PathBuf) -> Self {
|
|
Self {
|
|
state_path: path,
|
|
stdout_limit: DEFAULT_STDOUT_LIMIT,
|
|
round_count: 0,
|
|
started: Instant::now(),
|
|
}
|
|
}
|
|
|
|
/// Execute a block of Python code.
|
|
///
|
|
/// Spawns a `python3 -u` process with the bootstrap, the user code,
|
|
/// and the suffix, then collects stdout/stderr.
|
|
pub async fn execute(&mut self, code: &str) -> Result<ReplRound, String> {
|
|
let round_start = Instant::now();
|
|
self.round_count += 1;
|
|
|
|
// Build the full script: bootstrap + user code + suffix.
|
|
let full_script = format!(
|
|
"{}\n\n# --- User code (round {}) ---\ntry:\n{}\nexcept Exception as _repl_err:\n print(f'__REPL_ERROR__::{{_repl_err}}', flush=True)\n\n{}",
|
|
PYTHON_BOOTSTRAP,
|
|
self.round_count,
|
|
indent_code(code, 4),
|
|
PYTHON_SUFFIX,
|
|
);
|
|
|
|
let output = tokio::time::timeout(ROUND_TIMEOUT, async {
|
|
Command::new("python3")
|
|
.arg("-u") // unbuffered
|
|
.arg("-c")
|
|
.arg(&full_script)
|
|
.env(
|
|
"REPL_STATE_FILE",
|
|
self.state_path.to_string_lossy().as_ref(),
|
|
)
|
|
.output()
|
|
.await
|
|
.map_err(|e| format!("Failed to execute python3: {e}"))
|
|
})
|
|
.await
|
|
.map_err(|_| {
|
|
format!(
|
|
"Python REPL round timed out after {}s",
|
|
ROUND_TIMEOUT.as_secs()
|
|
)
|
|
})??;
|
|
|
|
let full_stdout = String::from_utf8_lossy(&output.stdout).to_string();
|
|
let stderr = String::from_utf8_lossy(&output.stderr).to_string();
|
|
let has_error = !output.status.success() || full_stdout.contains("__REPL_ERROR__::");
|
|
|
|
// Parse FINAL markers and clean up protocol lines.
|
|
let (display_stdout, final_value) = parse_final(&full_stdout);
|
|
let display_stdout = clean_repl_output(&display_stdout);
|
|
let display_stdout = truncate_stdout(&display_stdout, self.stdout_limit);
|
|
|
|
Ok(ReplRound {
|
|
stdout: display_stdout,
|
|
full_stdout,
|
|
stderr,
|
|
has_error,
|
|
final_value,
|
|
elapsed: round_start.elapsed(),
|
|
})
|
|
}
|
|
|
|
/// Total rounds executed.
|
|
pub fn round_count(&self) -> u64 {
|
|
self.round_count
|
|
}
|
|
|
|
/// Wall-clock uptime.
|
|
pub fn uptime(&self) -> Duration {
|
|
self.started.elapsed()
|
|
}
|
|
}
|
|
|
|
/// Clean protocol lines (__REPL_LLM_QUERY__, etc.) from stdout.
|
|
fn clean_repl_output(raw: &str) -> String {
|
|
raw.lines()
|
|
.filter(|line| {
|
|
!line.starts_with("__REPL_LLM_QUERY__::")
|
|
&& !line.starts_with("__REPL_FINAL__::")
|
|
&& !line.starts_with("__REPL_ERROR__::")
|
|
&& !line.starts_with("__REPL_DONE__")
|
|
&& !line.starts_with("__REPL_READY__")
|
|
})
|
|
.collect::<Vec<_>>()
|
|
.join("\n")
|
|
}
|
|
|
|
fn indent_code(code: &str, spaces: usize) -> String {
|
|
let indent = " ".repeat(spaces);
|
|
code.lines()
|
|
.map(|line| {
|
|
if line.is_empty() {
|
|
String::new()
|
|
} else {
|
|
format!("{indent}{line}")
|
|
}
|
|
})
|
|
.collect::<Vec<_>>()
|
|
.join("\n")
|
|
}
|
|
|
|
fn truncate_stdout(stdout: &str, limit: usize) -> String {
|
|
if stdout.len() <= limit {
|
|
return stdout.to_string();
|
|
}
|
|
let take = limit.saturating_sub(80);
|
|
let mut out: String = stdout.chars().take(take).collect();
|
|
let omitted = stdout.len().saturating_sub(take);
|
|
out.push_str(&format!(
|
|
"\n\n[... REPL output truncated: {omitted} bytes omitted ...]\n"
|
|
));
|
|
out
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[tokio::test]
|
|
async fn repl_executes_simple_code() {
|
|
let mut rt = PythonRuntime::new().await.expect("create runtime");
|
|
let round = rt
|
|
.execute("print('hello from repl')")
|
|
.await
|
|
.expect("execute");
|
|
assert!(round.stdout.contains("hello from repl"));
|
|
assert!(!round.has_error);
|
|
assert!(round.final_value.is_none());
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn repl_handles_final() {
|
|
let mut rt = PythonRuntime::new().await.expect("create runtime");
|
|
let round = rt
|
|
.execute("FINAL('the answer is 42')")
|
|
.await
|
|
.expect("execute");
|
|
assert_eq!(round.final_value.as_deref(), Some("the answer is 42"));
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn repl_persists_variables_across_rounds() {
|
|
let dir = std::env::temp_dir().join("deepseek_repl_test");
|
|
std::fs::create_dir_all(&dir).ok();
|
|
let state_path = dir.join(format!("test_state_{}.json", std::process::id()));
|
|
let _ = std::fs::remove_file(&state_path);
|
|
|
|
let mut rt = PythonRuntime::with_state_path(state_path.clone());
|
|
|
|
// Round 1: set a variable.
|
|
rt.execute("repl_set('count', 41)").await.expect("round 1");
|
|
// Round 2: read it back and increment.
|
|
let round = rt
|
|
.execute(
|
|
"val = repl_get('count', 0); repl_set('count', val + 1); print(f'count={val+1}')",
|
|
)
|
|
.await
|
|
.expect("round 2");
|
|
assert!(round.stdout.contains("count=42"));
|
|
|
|
// Round 3: verify via FINAL_VAR.
|
|
let round = rt.execute("FINAL_VAR('count')").await.expect("round 3");
|
|
assert_eq!(round.final_value.as_deref(), Some("42"));
|
|
|
|
let _ = std::fs::remove_file(&state_path);
|
|
}
|
|
|
|
#[test]
|
|
fn clean_output_removes_protocol_lines() {
|
|
let raw = "hello\n__REPL_FINAL__::\"done\"\nworld\n__REPL_LLM_QUERY__::{}";
|
|
let cleaned = clean_repl_output(raw);
|
|
assert!(cleaned.contains("hello"));
|
|
assert!(cleaned.contains("world"));
|
|
assert!(!cleaned.contains("__REPL_FINAL__"));
|
|
assert!(!cleaned.contains("__REPL_LLM_QUERY__"));
|
|
}
|
|
|
|
#[test]
|
|
fn indent_preserves_empty_lines() {
|
|
let code = "print(1)\n\nprint(2)";
|
|
let result = indent_code(code, 4);
|
|
assert_eq!(result, " print(1)\n\n print(2)");
|
|
}
|
|
}
|