chore(tui): harden exec harness signals

This commit is contained in:
Hunter B
2026-06-06 22:55:23 -07:00
parent fde931ee89
commit 3d676c2509
7 changed files with 190 additions and 10 deletions
+105 -1
View File
@@ -561,10 +561,59 @@ impl ConfigToml {
) -> Option<&HarnessProfile> {
self.harness_profiles
.iter()
.chain(built_in_harness_profiles().iter())
.find(|profile| profile.matches_route(provider_route, model))
}
}
/// Built-in profile seeds for common provider/model families.
///
/// User-configured profiles are always checked first; these seeds only provide
/// a stable resolver result when config has no narrower match.
#[must_use]
pub fn built_in_harness_profiles() -> &'static [HarnessProfile] {
static PROFILES: OnceLock<Vec<HarnessProfile>> = OnceLock::new();
PROFILES.get_or_init(|| {
vec![
HarnessProfile {
provider_route: "deepseek".to_string(),
model_pattern: "deepseek-v4*".to_string(),
posture: HarnessPosture::cache_heavy(),
},
HarnessProfile {
provider_route: "xiaomi-mimo".to_string(),
model_pattern: "mimo-v2.5*".to_string(),
posture: HarnessPosture::cache_heavy(),
},
HarnessProfile {
provider_route: "arcee".to_string(),
model_pattern: "trinity-large-thinking".to_string(),
posture: HarnessPosture::cache_heavy(),
},
HarnessProfile {
provider_route: "huggingface".to_string(),
model_pattern: "*".to_string(),
posture: HarnessPosture::lean(),
},
HarnessProfile {
provider_route: "sglang".to_string(),
model_pattern: "*".to_string(),
posture: HarnessPosture::lean(),
},
HarnessProfile {
provider_route: "vllm".to_string(),
model_pattern: "*".to_string(),
posture: HarnessPosture::lean(),
},
HarnessProfile {
provider_route: "ollama".to_string(),
model_pattern: "*".to_string(),
posture: HarnessPosture::lean(),
},
]
})
}
fn provider_routes_equal(expected: &str, actual: &str) -> bool {
match (ProviderKind::parse(expected), ProviderKind::parse(actual)) {
(Some(expected), Some(actual)) => expected == actual,
@@ -6022,6 +6071,56 @@ safety_posture = "strict"
assert_eq!(pro.posture.kind, HarnessPostureKind::CacheHeavy);
}
#[test]
fn resolve_harness_profile_uses_built_in_seed_when_config_has_no_match() {
let config = ConfigToml::default();
let xiaomi = config
.resolve_harness_profile("xiaomi", "mimo-v2.5-pro")
.expect("direct Xiaomi MiMo seed should resolve");
assert_eq!(xiaomi.provider_route, "xiaomi-mimo");
assert_eq!(xiaomi.posture.kind, HarnessPostureKind::CacheHeavy);
let arcee = config
.resolve_harness_profile("arcee", "trinity-large-thinking")
.expect("direct Arcee seed should resolve");
assert_eq!(arcee.posture.kind, HarnessPostureKind::CacheHeavy);
let local = config
.resolve_harness_profile("vllm", "Qwen/Qwen3.6-Coder")
.expect("local seed should resolve");
assert_eq!(local.posture.kind, HarnessPostureKind::Lean);
assert!(local.posture.prefer_codebase_search);
}
#[test]
fn configured_harness_profile_overrides_built_in_seed() {
let config = ConfigToml {
harness_profiles: vec![HarnessProfile {
provider_route: "xiaomi-mimo".to_string(),
model_pattern: "mimo-v2.5-pro".to_string(),
posture: HarnessPosture {
kind: HarnessPostureKind::Custom,
max_subagents: 3,
prefer_codebase_search: true,
compaction_strategy: HarnessCompactionStrategy::Default,
tool_surface: HarnessToolSurface::Auto,
safety_posture: HarnessSafetyPosture::Strict,
},
}],
..ConfigToml::default()
};
let profile = config
.resolve_harness_profile("xiaomi-mimo", "mimo-v2.5-pro")
.expect("configured profile should match first");
assert_eq!(profile.posture.kind, HarnessPostureKind::Custom);
assert_eq!(profile.posture.max_subagents, 3);
assert_eq!(profile.posture.tool_surface, HarnessToolSurface::Auto);
assert_eq!(profile.posture.safety_posture, HarnessSafetyPosture::Strict);
}
#[test]
fn resolve_harness_profile_returns_none_when_route_or_model_misses() {
let config = ConfigToml {
@@ -6040,7 +6139,12 @@ safety_posture = "strict"
);
assert!(
config
.resolve_harness_profile("hf", "Qwen/Qwen3.6-Coder")
.resolve_harness_profile("deepseek", "Qwen/Qwen3.6-Coder")
.is_none()
);
assert!(
config
.resolve_harness_profile("openai", "mimo-v2.5-pro")
.is_none()
);
}
+45
View File
@@ -5630,6 +5630,9 @@ struct ExecStreamMeta {
input_tokens: u32,
output_tokens: u32,
session_id: String,
resume_command: String,
workspace: String,
message_count: usize,
status: Option<String>,
}
@@ -5665,6 +5668,14 @@ fn emit_exec_stream_event(event: &ExecStreamEvent) -> Result<()> {
Ok(())
}
fn exec_resume_command(session_id: &str) -> String {
if session_id.trim().is_empty() {
String::new()
} else {
format!("codewhale exec --resume {session_id}")
}
}
fn persist_exec_session(
messages: &[Message],
model: &str,
@@ -6133,7 +6144,13 @@ async fn run_exec_agent(
model: latest_model.clone(),
input_tokens: usage.input_tokens,
output_tokens: usage.output_tokens,
resume_command: saved_session_id
.as_deref()
.map(exec_resume_command)
.unwrap_or_default(),
session_id: saved_session_id.unwrap_or_default(),
workspace: latest_workspace.display().to_string(),
message_count: latest_messages.len(),
status: summary.status.clone(),
},
})?;
@@ -6792,6 +6809,34 @@ mod terminal_mode_tests {
assert_eq!(parsed["type"], "tool_result");
}
#[test]
fn exec_stream_metadata_includes_resume_breadcrumbs() {
let event = ExecStreamEvent::Metadata {
meta: ExecStreamMeta {
model: "deepseek-v4-flash".to_string(),
input_tokens: 123,
output_tokens: 45,
session_id: "abc123".to_string(),
resume_command: exec_resume_command("abc123"),
workspace: "/tmp/work".to_string(),
message_count: 4,
status: Some("completed".to_string()),
},
};
let json = serde_json::to_string(&event).expect("serializes");
assert!(!json.contains('\n'));
let parsed: serde_json::Value = serde_json::from_str(&json).expect("valid json");
assert_eq!(parsed["type"], "metadata");
assert_eq!(parsed["meta"]["session_id"], "abc123");
assert_eq!(
parsed["meta"]["resume_command"],
"codewhale exec --resume abc123"
);
assert_eq!(parsed["meta"]["workspace"], "/tmp/work");
assert_eq!(parsed["meta"]["message_count"], 4);
}
#[test]
fn alternate_screen_defaults_on_in_auto_mode() {
let cli = parse_cli(&["codewhale"]);
+2
View File
@@ -48,6 +48,7 @@ fn spawn_minimal(
) -> anyhow::Result<(qa_harness::harness::SealedWorkspace, Harness)> {
let h = Harness::builder(Harness::cargo_bin("codewhale-tui"))
.cwd(ws.workspace())
.clear_env()
.seal_home(ws.home())
// Provide a stub key so the onboarding screen is bypassed and the TUI
// boots straight into the composer. The harness never makes a live
@@ -179,6 +180,7 @@ fn skills_menu_shows_local_and_global_skills() -> anyhow::Result<()> {
let mut h = Harness::builder(Harness::cargo_bin("codewhale-tui"))
.cwd(ws.workspace())
.clear_env()
.seal_home(ws.home())
.env("DEEPSEEK_API_KEY", "ci-test-key-not-real")
.env("DEEPSEEK_BASE_URL", "http://127.0.0.1:1")
@@ -79,8 +79,8 @@ impl HarnessBuilder {
self
}
/// Point `$HOME` (and `XDG_*` defaults) at a fresh dir so the spawned
/// binary cannot read or mutate the developer's real `~/.deepseek/`.
/// Point `$HOME` (and config/cache defaults) at a fresh dir so the spawned
/// binary cannot read or mutate the developer's real user config.
pub fn seal_home(mut self, home: impl Into<PathBuf>) -> Self {
self.seal_home = Some(home.into());
self
@@ -98,12 +98,16 @@ impl HarnessBuilder {
}
if let Some(home) = self.seal_home.as_deref() {
std::fs::create_dir_all(home).context("create sealed HOME")?;
let codewhale_config = home.join(".codewhale").join("config.toml");
let deepseek_config = home.join(".deepseek").join("config.toml");
builder = builder
.env("HOME", home.to_string_lossy())
.env("XDG_CONFIG_HOME", home.join(".config").to_string_lossy())
.env("XDG_DATA_HOME", home.join(".local/share").to_string_lossy())
.env("XDG_CACHE_HOME", home.join(".cache").to_string_lossy())
.env("USERPROFILE", home.to_string_lossy());
.env("USERPROFILE", home.to_string_lossy())
.env("CODEWHALE_CONFIG_PATH", codewhale_config.to_string_lossy())
.env("DEEPSEEK_CONFIG_PATH", deepseek_config.to_string_lossy());
}
for (k, v) in &self.env {
builder = builder.env(k, v);
@@ -247,6 +251,7 @@ pub fn make_sealed_workspace() -> Result<SealedWorkspace> {
let workspace = tmp.path().join("workspace");
let home = tmp.path().join("home");
std::fs::create_dir_all(&workspace).context("mkdir workspace")?;
std::fs::create_dir_all(home.join(".codewhale")).context("mkdir home/.codewhale")?;
std::fs::create_dir_all(home.join(".deepseek")).context("mkdir home/.deepseek")?;
Ok(SealedWorkspace {
_tmp: tmp,
@@ -107,6 +107,9 @@ impl<'a> PtySessionBuilder<'a> {
}
if self.clear_env {
cmd.env_clear();
if let Some(path) = std::env::var_os("PATH") {
cmd.env("PATH", path);
}
}
// TERM must be set to something xterm-ish so crossterm enables the
// capabilities the TUI assumes (256 color, bracketed paste, …).
+3
View File
@@ -383,6 +383,9 @@ If a profile is selected but missing, codewhale exits with an error listing avai
v0.9 adds a config data model for model-specific harness posture. This is a
preview schema: it can be parsed and tested, but runtime provider/model
selection and prompt/tool behavior are wired in later v0.9 slices.
When no configured profile matches, the resolver falls back to built-in seed
profiles for the model families listed in the cutline doc. Configured profiles
always take precedence over those seeds.
```toml
[[harness_profiles]]
+24 -6
View File
@@ -2,13 +2,31 @@
# verify_task.sh <task_id> <docker_image>
# Runs the DeepSWE verifier inside the task's Docker container.
# Expects model.patch at /tmp/deep-swe-verify/<task_id>/model.patch
set -euo pipefail
if [[ $# -ne 2 ]]; then
echo "Usage: $0 <task_id> <docker_image>" >&2
exit 64
fi
TASK_ID="$1"
IMAGE="$2"
TASKS_DIR="/Volumes/VIXinSSD/whalebro/codewhale/deep-swe/tasks"
WORK_DIR="/tmp/deep-swe-verify/$TASK_ID"
TASKS_DIR="${DEEPSWE_TASKS_DIR:-/Volumes/VIXinSSD/whalebro/codewhale/deep-swe/tasks}"
WORK_BASE="${DEEPSWE_VERIFY_DIR:-/tmp/deep-swe-verify}"
WORK_DIR="$WORK_BASE/$TASK_ID"
mkdir -p "$WORK_DIR"
RESULT_FILE="$WORK_DIR/result.txt"
MODEL_PATCH="$WORK_DIR/model.patch"
TEST_PATCH="$TASKS_DIR/$TASK_ID/tests/test.patch"
TEST_SCRIPT="$TASKS_DIR/$TASK_ID/tests/test.sh"
for required in "$MODEL_PATCH" "$TEST_PATCH" "$TEST_SCRIPT"; do
if [[ ! -f "$required" ]]; then
echo "missing required file: $required" >&2
exit 66
fi
done
echo "[$TASK_ID] Pulling image..."
docker pull "$IMAGE" 2>&1 | tail -1
@@ -16,9 +34,9 @@ docker pull "$IMAGE" 2>&1 | tail -1
echo "[$TASK_ID] Running verifier..."
docker run --rm \
--platform linux/amd64 \
-v "$WORK_DIR/model.patch:/model.patch:ro" \
-v "$TASKS_DIR/$TASK_ID/tests/test.patch:/tests/test.patch:ro" \
-v "$TASKS_DIR/$TASK_ID/tests/test.sh:/verify.sh:ro" \
-v "$MODEL_PATCH:/model.patch:ro" \
-v "$TEST_PATCH:/tests/test.patch:ro" \
-v "$TEST_SCRIPT:/verify.sh:ro" \
"$IMAGE" \
bash -c '
set -e
@@ -44,5 +62,5 @@ docker run --rm \
' > "$RESULT_FILE" 2>&1
echo "[$TASK_ID] Done. Result:"
cat "$RESULT_FILE" | grep -E 'REWARD|FAILED|PATCH_FAILED|passed'
grep -E 'REWARD|FAILED|PATCH_FAILED|passed' "$RESULT_FILE" || true
echo ""