perf(capacity): let scan_canonical_inputs early-exit without verified-user lookup
The build_canonical_state path never reads CanonicalStateScan::latest_verified_user_idx, but the previous patch required is_complete() to find a verified user message before it would short-circuit. On a long history with no verification replay — the common case — the scan walked the entire message list looking for a match that could not exist. Add a find_verified: bool parameter to scan_canonical_inputs and CanonicalStateScan::is_complete. build_canonical_state now passes false, so the loop stops as soon as the goal and CANONICAL_SCAN_MAX_FACTS facts are found. The replan path (apply_verify_and_replan) keeps the existing true behavior so it still locates the latest verified user message. Test calls are updated to match; no behavior change for any test.
This commit is contained in:
@@ -659,8 +659,10 @@ impl Engine {
|
||||
|
||||
// The replan path needs the *full* messages, not summaries.
|
||||
// `scan_canonical_inputs` already located the indices in a single
|
||||
// reverse pass; clone from the live `messages` slice once.
|
||||
let scan = scan_canonical_inputs(&self.session.messages);
|
||||
// reverse pass; clone from the live `messages` slice once. We
|
||||
// pass `true` because the replan path consumes
|
||||
// `latest_verified_user_idx` below.
|
||||
let scan = scan_canonical_inputs(&self.session.messages, true);
|
||||
let latest_user = scan
|
||||
.latest_user_text_idx
|
||||
.and_then(|idx| self.session.messages.get(idx).cloned());
|
||||
@@ -751,8 +753,11 @@ impl Engine {
|
||||
// message index. Previously this function did two reverse
|
||||
// `.iter().rev().find_map()` walks and a third for facts; the
|
||||
// dedicated scan below replaces all three with one pass that
|
||||
// also early-exits once every collector is satisfied.
|
||||
let scan = scan_canonical_inputs(&self.session.messages);
|
||||
// also early-exits once every collector is satisfied. We pass
|
||||
// `false` here because build_canonical_state does not consume
|
||||
// `latest_verified_user_idx`, so we don't need the scan to keep
|
||||
// looking for it.
|
||||
let scan = scan_canonical_inputs(&self.session.messages, false);
|
||||
let goal = scan
|
||||
.goal
|
||||
.unwrap_or_else(|| "Continue current task from compact state".to_string());
|
||||
@@ -968,11 +973,16 @@ struct CanonicalStateScan {
|
||||
}
|
||||
|
||||
impl CanonicalStateScan {
|
||||
/// `true` once every collector is satisfied. The single-pass
|
||||
/// caller can use this to break out of the reverse iteration.
|
||||
fn is_complete(&self) -> bool {
|
||||
/// `true` once every collector the caller actually needs is satisfied.
|
||||
///
|
||||
/// `find_verified` controls whether `latest_verified_user_idx` is part
|
||||
/// of the early-exit gate. The build_canonical_state path does not
|
||||
/// consume that field, so passing `false` lets the scan stop as soon
|
||||
/// as the goal and `CANONICAL_SCAN_MAX_FACTS` facts are found — a
|
||||
/// huge win on long histories with no verification replay.
|
||||
fn is_complete(&self, find_verified: bool) -> bool {
|
||||
self.goal.is_some()
|
||||
&& self.latest_verified_user_idx.is_some()
|
||||
&& (!find_verified || self.latest_verified_user_idx.is_some())
|
||||
&& self.facts_collected >= CANONICAL_SCAN_MAX_FACTS
|
||||
}
|
||||
}
|
||||
@@ -981,20 +991,25 @@ impl CanonicalStateScan {
|
||||
/// state and re-plan paths need. Replaces the previous pattern of three
|
||||
/// independent reverse scans: one for the goal, one for confirmed facts,
|
||||
/// and one for the latest verified user message.
|
||||
fn scan_canonical_inputs(messages: &[Message]) -> CanonicalStateScan {
|
||||
///
|
||||
/// `find_verified` controls whether the scan bothers locating the
|
||||
/// latest verified user message. Callers that don't need it (e.g.
|
||||
/// `build_canonical_state`) should pass `false` so the early-exit
|
||||
/// condition can fire as soon as the goal + facts are gathered.
|
||||
fn scan_canonical_inputs(messages: &[Message], find_verified: bool) -> CanonicalStateScan {
|
||||
let mut scan = CanonicalStateScan::default();
|
||||
for (idx, msg) in messages.iter().enumerate().rev() {
|
||||
if msg.role == "user" {
|
||||
if scan.goal.is_none() {
|
||||
if let Some(text) = msg.content.iter().find_map(|b| match b {
|
||||
if scan.goal.is_none()
|
||||
&& let Some(text) = msg.content.iter().find_map(|b| match b {
|
||||
ContentBlock::Text { text, .. } => Some(text.as_str()),
|
||||
_ => None,
|
||||
}) {
|
||||
scan.goal = Some(summarize_text(text, 220));
|
||||
scan.latest_user_text_idx = Some(idx);
|
||||
}
|
||||
})
|
||||
{
|
||||
scan.goal = Some(summarize_text(text, 220));
|
||||
scan.latest_user_text_idx = Some(idx);
|
||||
}
|
||||
if scan.latest_verified_user_idx.is_none() {
|
||||
if find_verified && scan.latest_verified_user_idx.is_none() {
|
||||
let verified = msg.content.iter().any(|b| match b {
|
||||
ContentBlock::ToolResult { content, .. } => {
|
||||
content.contains("[verification replay]")
|
||||
@@ -1019,7 +1034,7 @@ fn scan_canonical_inputs(messages: &[Message]) -> CanonicalStateScan {
|
||||
}
|
||||
}
|
||||
}
|
||||
if scan.is_complete() {
|
||||
if scan.is_complete(find_verified) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -1044,15 +1059,18 @@ mod canonical_scan_tests {
|
||||
fn user_with_verified_replay(text: &str) -> Message {
|
||||
Message {
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentBlock::Text {
|
||||
text: text.to_string(),
|
||||
cache_control: None,
|
||||
}, ContentBlock::ToolResult {
|
||||
tool_use_id: "x".to_string(),
|
||||
content: "[verification replay] pass=true".to_string(),
|
||||
is_error: None,
|
||||
content_blocks: None,
|
||||
}],
|
||||
content: vec![
|
||||
ContentBlock::Text {
|
||||
text: text.to_string(),
|
||||
cache_control: None,
|
||||
},
|
||||
ContentBlock::ToolResult {
|
||||
tool_use_id: "x".to_string(),
|
||||
content: "[verification replay] pass=true".to_string(),
|
||||
is_error: None,
|
||||
content_blocks: None,
|
||||
},
|
||||
],
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1077,10 +1095,13 @@ mod canonical_scan_tests {
|
||||
tool_result_msg("b"),
|
||||
user_text_msg("third"),
|
||||
];
|
||||
let scan = scan_canonical_inputs(&messages);
|
||||
let scan = scan_canonical_inputs(&messages, false);
|
||||
// Goal should be the most recent user text.
|
||||
let goal = scan.goal.expect("goal");
|
||||
assert!(goal.contains("third"), "expected the most recent, got {goal}");
|
||||
assert!(
|
||||
goal.contains("third"),
|
||||
"expected the most recent, got {goal}"
|
||||
);
|
||||
assert_eq!(scan.latest_user_text_idx, Some(4));
|
||||
}
|
||||
|
||||
@@ -1093,7 +1114,7 @@ mod canonical_scan_tests {
|
||||
tool_result_msg("fact-D"),
|
||||
tool_result_msg("fact-E"),
|
||||
];
|
||||
let scan = scan_canonical_inputs(&messages);
|
||||
let scan = scan_canonical_inputs(&messages, false);
|
||||
assert_eq!(scan.confirmed_facts.len(), 4);
|
||||
// The four most recent (newest first) are E, D, C, B.
|
||||
assert!(scan.confirmed_facts[0].contains("fact-E"));
|
||||
@@ -1109,7 +1130,7 @@ mod canonical_scan_tests {
|
||||
tool_result_msg("Error: bad"),
|
||||
tool_result_msg("good-B"),
|
||||
];
|
||||
let scan = scan_canonical_inputs(&messages);
|
||||
let scan = scan_canonical_inputs(&messages, false);
|
||||
assert_eq!(scan.confirmed_facts.len(), 2);
|
||||
assert!(scan.confirmed_facts[0].contains("good-B"));
|
||||
assert!(scan.confirmed_facts[1].contains("good-A"));
|
||||
@@ -1122,7 +1143,7 @@ mod canonical_scan_tests {
|
||||
user_with_verified_replay("verified"),
|
||||
user_text_msg("third"),
|
||||
];
|
||||
let scan = scan_canonical_inputs(&messages);
|
||||
let scan = scan_canonical_inputs(&messages, true);
|
||||
// The verified marker is on the *middle* message, not the most
|
||||
// recent. The scan should report its actual position.
|
||||
assert_eq!(scan.latest_verified_user_idx, Some(1));
|
||||
@@ -1132,7 +1153,7 @@ mod canonical_scan_tests {
|
||||
|
||||
#[test]
|
||||
fn scan_handles_empty_input() {
|
||||
let scan = scan_canonical_inputs(&[]);
|
||||
let scan = scan_canonical_inputs(&[], false);
|
||||
assert!(scan.goal.is_none());
|
||||
assert!(scan.latest_verified_user_idx.is_none());
|
||||
assert!(scan.latest_user_text_idx.is_none());
|
||||
@@ -1144,13 +1165,15 @@ mod canonical_scan_tests {
|
||||
// 1000 tool results — the scan should stop walking once the
|
||||
// first 4 facts and a goal are found. We can't directly assert
|
||||
// "didn't visit every element" without instrumentation, but the
|
||||
// call must return promptly with the right slice.
|
||||
// call must return promptly with the right slice. We pass
|
||||
// `find_verified=false` so the scan does not have to keep
|
||||
// walking looking for a verified user message that isn't there.
|
||||
let mut messages: Vec<Message> = (0..1000)
|
||||
.map(|i| tool_result_msg(&format!("fact-{i}")))
|
||||
.collect();
|
||||
// Most recent user message comes last.
|
||||
messages.push(user_text_msg("goal"));
|
||||
let scan = scan_canonical_inputs(&messages);
|
||||
let scan = scan_canonical_inputs(&messages, false);
|
||||
assert!(scan.goal.as_deref().unwrap_or("").contains("goal"));
|
||||
assert_eq!(scan.confirmed_facts.len(), 4);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user