diff --git a/crates/config/src/lib.rs b/crates/config/src/lib.rs index bbad2352..d406fd96 100644 --- a/crates/config/src/lib.rs +++ b/crates/config/src/lib.rs @@ -561,10 +561,59 @@ impl ConfigToml { ) -> Option<&HarnessProfile> { self.harness_profiles .iter() + .chain(built_in_harness_profiles().iter()) .find(|profile| profile.matches_route(provider_route, model)) } } +/// Built-in profile seeds for common provider/model families. +/// +/// User-configured profiles are always checked first; these seeds only provide +/// a stable resolver result when config has no narrower match. +#[must_use] +pub fn built_in_harness_profiles() -> &'static [HarnessProfile] { + static PROFILES: OnceLock> = OnceLock::new(); + PROFILES.get_or_init(|| { + vec![ + HarnessProfile { + provider_route: "deepseek".to_string(), + model_pattern: "deepseek-v4*".to_string(), + posture: HarnessPosture::cache_heavy(), + }, + HarnessProfile { + provider_route: "xiaomi-mimo".to_string(), + model_pattern: "mimo-v2.5*".to_string(), + posture: HarnessPosture::cache_heavy(), + }, + HarnessProfile { + provider_route: "arcee".to_string(), + model_pattern: "trinity-large-thinking".to_string(), + posture: HarnessPosture::cache_heavy(), + }, + HarnessProfile { + provider_route: "huggingface".to_string(), + model_pattern: "*".to_string(), + posture: HarnessPosture::lean(), + }, + HarnessProfile { + provider_route: "sglang".to_string(), + model_pattern: "*".to_string(), + posture: HarnessPosture::lean(), + }, + HarnessProfile { + provider_route: "vllm".to_string(), + model_pattern: "*".to_string(), + posture: HarnessPosture::lean(), + }, + HarnessProfile { + provider_route: "ollama".to_string(), + model_pattern: "*".to_string(), + posture: HarnessPosture::lean(), + }, + ] + }) +} + fn provider_routes_equal(expected: &str, actual: &str) -> bool { match (ProviderKind::parse(expected), ProviderKind::parse(actual)) { (Some(expected), Some(actual)) => expected == actual, @@ -6022,6 +6071,56 @@ safety_posture = "strict" assert_eq!(pro.posture.kind, HarnessPostureKind::CacheHeavy); } + #[test] + fn resolve_harness_profile_uses_built_in_seed_when_config_has_no_match() { + let config = ConfigToml::default(); + + let xiaomi = config + .resolve_harness_profile("xiaomi", "mimo-v2.5-pro") + .expect("direct Xiaomi MiMo seed should resolve"); + assert_eq!(xiaomi.provider_route, "xiaomi-mimo"); + assert_eq!(xiaomi.posture.kind, HarnessPostureKind::CacheHeavy); + + let arcee = config + .resolve_harness_profile("arcee", "trinity-large-thinking") + .expect("direct Arcee seed should resolve"); + assert_eq!(arcee.posture.kind, HarnessPostureKind::CacheHeavy); + + let local = config + .resolve_harness_profile("vllm", "Qwen/Qwen3.6-Coder") + .expect("local seed should resolve"); + assert_eq!(local.posture.kind, HarnessPostureKind::Lean); + assert!(local.posture.prefer_codebase_search); + } + + #[test] + fn configured_harness_profile_overrides_built_in_seed() { + let config = ConfigToml { + harness_profiles: vec![HarnessProfile { + provider_route: "xiaomi-mimo".to_string(), + model_pattern: "mimo-v2.5-pro".to_string(), + posture: HarnessPosture { + kind: HarnessPostureKind::Custom, + max_subagents: 3, + prefer_codebase_search: true, + compaction_strategy: HarnessCompactionStrategy::Default, + tool_surface: HarnessToolSurface::Auto, + safety_posture: HarnessSafetyPosture::Strict, + }, + }], + ..ConfigToml::default() + }; + + let profile = config + .resolve_harness_profile("xiaomi-mimo", "mimo-v2.5-pro") + .expect("configured profile should match first"); + + assert_eq!(profile.posture.kind, HarnessPostureKind::Custom); + assert_eq!(profile.posture.max_subagents, 3); + assert_eq!(profile.posture.tool_surface, HarnessToolSurface::Auto); + assert_eq!(profile.posture.safety_posture, HarnessSafetyPosture::Strict); + } + #[test] fn resolve_harness_profile_returns_none_when_route_or_model_misses() { let config = ConfigToml { @@ -6040,7 +6139,12 @@ safety_posture = "strict" ); assert!( config - .resolve_harness_profile("hf", "Qwen/Qwen3.6-Coder") + .resolve_harness_profile("deepseek", "Qwen/Qwen3.6-Coder") + .is_none() + ); + assert!( + config + .resolve_harness_profile("openai", "mimo-v2.5-pro") .is_none() ); } diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs index 4973c106..0c4f2f0a 100644 --- a/crates/tui/src/main.rs +++ b/crates/tui/src/main.rs @@ -5630,6 +5630,9 @@ struct ExecStreamMeta { input_tokens: u32, output_tokens: u32, session_id: String, + resume_command: String, + workspace: String, + message_count: usize, status: Option, } @@ -5665,6 +5668,14 @@ fn emit_exec_stream_event(event: &ExecStreamEvent) -> Result<()> { Ok(()) } +fn exec_resume_command(session_id: &str) -> String { + if session_id.trim().is_empty() { + String::new() + } else { + format!("codewhale exec --resume {session_id}") + } +} + fn persist_exec_session( messages: &[Message], model: &str, @@ -6133,7 +6144,13 @@ async fn run_exec_agent( model: latest_model.clone(), input_tokens: usage.input_tokens, output_tokens: usage.output_tokens, + resume_command: saved_session_id + .as_deref() + .map(exec_resume_command) + .unwrap_or_default(), session_id: saved_session_id.unwrap_or_default(), + workspace: latest_workspace.display().to_string(), + message_count: latest_messages.len(), status: summary.status.clone(), }, })?; @@ -6792,6 +6809,34 @@ mod terminal_mode_tests { assert_eq!(parsed["type"], "tool_result"); } + #[test] + fn exec_stream_metadata_includes_resume_breadcrumbs() { + let event = ExecStreamEvent::Metadata { + meta: ExecStreamMeta { + model: "deepseek-v4-flash".to_string(), + input_tokens: 123, + output_tokens: 45, + session_id: "abc123".to_string(), + resume_command: exec_resume_command("abc123"), + workspace: "/tmp/work".to_string(), + message_count: 4, + status: Some("completed".to_string()), + }, + }; + + let json = serde_json::to_string(&event).expect("serializes"); + assert!(!json.contains('\n')); + let parsed: serde_json::Value = serde_json::from_str(&json).expect("valid json"); + assert_eq!(parsed["type"], "metadata"); + assert_eq!(parsed["meta"]["session_id"], "abc123"); + assert_eq!( + parsed["meta"]["resume_command"], + "codewhale exec --resume abc123" + ); + assert_eq!(parsed["meta"]["workspace"], "/tmp/work"); + assert_eq!(parsed["meta"]["message_count"], 4); + } + #[test] fn alternate_screen_defaults_on_in_auto_mode() { let cli = parse_cli(&["codewhale"]); diff --git a/crates/tui/tests/qa_pty.rs b/crates/tui/tests/qa_pty.rs index 8e9a8644..130b84d4 100644 --- a/crates/tui/tests/qa_pty.rs +++ b/crates/tui/tests/qa_pty.rs @@ -48,6 +48,7 @@ fn spawn_minimal( ) -> anyhow::Result<(qa_harness::harness::SealedWorkspace, Harness)> { let h = Harness::builder(Harness::cargo_bin("codewhale-tui")) .cwd(ws.workspace()) + .clear_env() .seal_home(ws.home()) // Provide a stub key so the onboarding screen is bypassed and the TUI // boots straight into the composer. The harness never makes a live @@ -179,6 +180,7 @@ fn skills_menu_shows_local_and_global_skills() -> anyhow::Result<()> { let mut h = Harness::builder(Harness::cargo_bin("codewhale-tui")) .cwd(ws.workspace()) + .clear_env() .seal_home(ws.home()) .env("DEEPSEEK_API_KEY", "ci-test-key-not-real") .env("DEEPSEEK_BASE_URL", "http://127.0.0.1:1") diff --git a/crates/tui/tests/support/qa_harness/harness.rs b/crates/tui/tests/support/qa_harness/harness.rs index a32b4c7b..83572655 100644 --- a/crates/tui/tests/support/qa_harness/harness.rs +++ b/crates/tui/tests/support/qa_harness/harness.rs @@ -79,8 +79,8 @@ impl HarnessBuilder { self } - /// Point `$HOME` (and `XDG_*` defaults) at a fresh dir so the spawned - /// binary cannot read or mutate the developer's real `~/.deepseek/`. + /// Point `$HOME` (and config/cache defaults) at a fresh dir so the spawned + /// binary cannot read or mutate the developer's real user config. pub fn seal_home(mut self, home: impl Into) -> Self { self.seal_home = Some(home.into()); self @@ -98,12 +98,16 @@ impl HarnessBuilder { } if let Some(home) = self.seal_home.as_deref() { std::fs::create_dir_all(home).context("create sealed HOME")?; + let codewhale_config = home.join(".codewhale").join("config.toml"); + let deepseek_config = home.join(".deepseek").join("config.toml"); builder = builder .env("HOME", home.to_string_lossy()) .env("XDG_CONFIG_HOME", home.join(".config").to_string_lossy()) .env("XDG_DATA_HOME", home.join(".local/share").to_string_lossy()) .env("XDG_CACHE_HOME", home.join(".cache").to_string_lossy()) - .env("USERPROFILE", home.to_string_lossy()); + .env("USERPROFILE", home.to_string_lossy()) + .env("CODEWHALE_CONFIG_PATH", codewhale_config.to_string_lossy()) + .env("DEEPSEEK_CONFIG_PATH", deepseek_config.to_string_lossy()); } for (k, v) in &self.env { builder = builder.env(k, v); @@ -247,6 +251,7 @@ pub fn make_sealed_workspace() -> Result { let workspace = tmp.path().join("workspace"); let home = tmp.path().join("home"); std::fs::create_dir_all(&workspace).context("mkdir workspace")?; + std::fs::create_dir_all(home.join(".codewhale")).context("mkdir home/.codewhale")?; std::fs::create_dir_all(home.join(".deepseek")).context("mkdir home/.deepseek")?; Ok(SealedWorkspace { _tmp: tmp, diff --git a/crates/tui/tests/support/qa_harness/pty.rs b/crates/tui/tests/support/qa_harness/pty.rs index 04bd72a1..ff2aa6fd 100644 --- a/crates/tui/tests/support/qa_harness/pty.rs +++ b/crates/tui/tests/support/qa_harness/pty.rs @@ -107,6 +107,9 @@ impl<'a> PtySessionBuilder<'a> { } if self.clear_env { cmd.env_clear(); + if let Some(path) = std::env::var_os("PATH") { + cmd.env("PATH", path); + } } // TERM must be set to something xterm-ish so crossterm enables the // capabilities the TUI assumes (256 color, bracketed paste, …). diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index 13ee42bb..9ab09d9f 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -383,6 +383,9 @@ If a profile is selected but missing, codewhale exits with an error listing avai v0.9 adds a config data model for model-specific harness posture. This is a preview schema: it can be parsed and tested, but runtime provider/model selection and prompt/tool behavior are wired in later v0.9 slices. +When no configured profile matches, the resolver falls back to built-in seed +profiles for the model families listed in the cutline doc. Configured profiles +always take precedence over those seeds. ```toml [[harness_profiles]] diff --git a/scripts/verify_task.sh b/scripts/verify_task.sh index 97689ebf..5b759174 100644 --- a/scripts/verify_task.sh +++ b/scripts/verify_task.sh @@ -2,13 +2,31 @@ # verify_task.sh # Runs the DeepSWE verifier inside the task's Docker container. # Expects model.patch at /tmp/deep-swe-verify//model.patch +set -euo pipefail + +if [[ $# -ne 2 ]]; then + echo "Usage: $0 " >&2 + exit 64 +fi + TASK_ID="$1" IMAGE="$2" -TASKS_DIR="/Volumes/VIXinSSD/whalebro/codewhale/deep-swe/tasks" -WORK_DIR="/tmp/deep-swe-verify/$TASK_ID" +TASKS_DIR="${DEEPSWE_TASKS_DIR:-/Volumes/VIXinSSD/whalebro/codewhale/deep-swe/tasks}" +WORK_BASE="${DEEPSWE_VERIFY_DIR:-/tmp/deep-swe-verify}" +WORK_DIR="$WORK_BASE/$TASK_ID" mkdir -p "$WORK_DIR" RESULT_FILE="$WORK_DIR/result.txt" +MODEL_PATCH="$WORK_DIR/model.patch" +TEST_PATCH="$TASKS_DIR/$TASK_ID/tests/test.patch" +TEST_SCRIPT="$TASKS_DIR/$TASK_ID/tests/test.sh" + +for required in "$MODEL_PATCH" "$TEST_PATCH" "$TEST_SCRIPT"; do + if [[ ! -f "$required" ]]; then + echo "missing required file: $required" >&2 + exit 66 + fi +done echo "[$TASK_ID] Pulling image..." docker pull "$IMAGE" 2>&1 | tail -1 @@ -16,9 +34,9 @@ docker pull "$IMAGE" 2>&1 | tail -1 echo "[$TASK_ID] Running verifier..." docker run --rm \ --platform linux/amd64 \ - -v "$WORK_DIR/model.patch:/model.patch:ro" \ - -v "$TASKS_DIR/$TASK_ID/tests/test.patch:/tests/test.patch:ro" \ - -v "$TASKS_DIR/$TASK_ID/tests/test.sh:/verify.sh:ro" \ + -v "$MODEL_PATCH:/model.patch:ro" \ + -v "$TEST_PATCH:/tests/test.patch:ro" \ + -v "$TEST_SCRIPT:/verify.sh:ro" \ "$IMAGE" \ bash -c ' set -e @@ -44,5 +62,5 @@ docker run --rm \ ' > "$RESULT_FILE" 2>&1 echo "[$TASK_ID] Done. Result:" -cat "$RESULT_FILE" | grep -E 'REWARD|FAILED|PATCH_FAILED|passed' +grep -E 'REWARD|FAILED|PATCH_FAILED|passed' "$RESULT_FILE" || true echo ""