chore: drop unused deps (tracing-appender, zeroize, rustls in release), orphaned vendor lockfile and one-off verify_task.sh
This commit is contained in:
Generated
-24
@@ -911,7 +911,6 @@ version = "0.8.56"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"reqwest",
|
||||
"rustls",
|
||||
"semver",
|
||||
"serde",
|
||||
"serde_json",
|
||||
@@ -1016,7 +1015,6 @@ dependencies = [
|
||||
"toml 0.9.11+spec-1.1.0",
|
||||
"tower-http",
|
||||
"tracing",
|
||||
"tracing-appender",
|
||||
"tracing-subscriber",
|
||||
"unicode-segmentation",
|
||||
"unicode-width 0.2.2",
|
||||
@@ -1025,7 +1023,6 @@ dependencies = [
|
||||
"wait-timeout",
|
||||
"windows",
|
||||
"wiremock",
|
||||
"zeroize",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1193,15 +1190,6 @@ dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-channel"
|
||||
version = "0.5.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2"
|
||||
dependencies = [
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-deque"
|
||||
version = "0.8.6"
|
||||
@@ -5483,18 +5471,6 @@ dependencies = [
|
||||
"tracing-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-appender"
|
||||
version = "0.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "786d480bce6247ab75f005b14ae1624ad978d3029d9113f0a22fa1ac773faeaf"
|
||||
dependencies = [
|
||||
"crossbeam-channel",
|
||||
"thiserror 2.0.18",
|
||||
"time",
|
||||
"tracing-subscriber",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-attributes"
|
||||
version = "0.1.31"
|
||||
|
||||
@@ -9,7 +9,6 @@ description = "Shared CodeWhale release discovery and version comparison helpers
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
reqwest = { workspace = true, features = ["blocking"] }
|
||||
rustls.workspace = true
|
||||
semver.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
|
||||
@@ -58,14 +58,12 @@ chrono = { version = "0.4", features = ["serde"] }
|
||||
tempfile = "3.16"
|
||||
thiserror = "2.0"
|
||||
tracing = "0.1"
|
||||
tracing-appender = { workspace = true }
|
||||
tracing-subscriber = { workspace = true }
|
||||
tower-http = { version = "0.6", features = ["cors"] }
|
||||
wait-timeout = "0.2"
|
||||
multimap = "0.10.0"
|
||||
shlex = "1.3.0"
|
||||
tiny_http = "0.12"
|
||||
zeroize = "1.8.2"
|
||||
ignore = "0.4"
|
||||
image = { version = "0.25", default-features = false, features = ["png"] }
|
||||
lru = "0.16"
|
||||
|
||||
@@ -32,7 +32,7 @@ pub(super) const STREAM_MAX_CONTENT_BYTES: usize = 10 * 1024 * 1024; // 10 MB
|
||||
/// History: this used to be 300s (5 min) which was too aggressive — V4
|
||||
/// thinking turns on hard prompts legitimately exceed 5 minutes wall-clock
|
||||
/// while still emitting reasoning_content chunks the whole way. Bumped to
|
||||
/// 30 min in v0.6.6 to address `TODO_FIXES.md` #1. Codex defaults to a
|
||||
/// 30 min in v0.6.6 after long-reasoning turns hit the old cap. Codex defaults to a
|
||||
/// per-chunk idle of 300s with no wall-clock cap; we keep both layers but
|
||||
/// give the wall-clock a generous window so it never fires in practice.
|
||||
pub(super) const STREAM_MAX_DURATION_SECS: u64 = 1800; // 30 minutes (was 300s; #103/#1)
|
||||
|
||||
@@ -1,66 +0,0 @@
|
||||
#!/bin/bash
|
||||
# verify_task.sh <task_id> <docker_image>
|
||||
# Runs the DeepSWE verifier inside the task's Docker container.
|
||||
# Expects model.patch at /tmp/deep-swe-verify/<task_id>/model.patch
|
||||
set -euo pipefail
|
||||
|
||||
if [[ $# -ne 2 ]]; then
|
||||
echo "Usage: $0 <task_id> <docker_image>" >&2
|
||||
exit 64
|
||||
fi
|
||||
|
||||
TASK_ID="$1"
|
||||
IMAGE="$2"
|
||||
TASKS_DIR="${DEEPSWE_TASKS_DIR:-/Volumes/VIXinSSD/whalebro/codewhale/deep-swe/tasks}"
|
||||
WORK_BASE="${DEEPSWE_VERIFY_DIR:-/tmp/deep-swe-verify}"
|
||||
WORK_DIR="$WORK_BASE/$TASK_ID"
|
||||
|
||||
mkdir -p "$WORK_DIR"
|
||||
RESULT_FILE="$WORK_DIR/result.txt"
|
||||
MODEL_PATCH="$WORK_DIR/model.patch"
|
||||
TEST_PATCH="$TASKS_DIR/$TASK_ID/tests/test.patch"
|
||||
TEST_SCRIPT="$TASKS_DIR/$TASK_ID/tests/test.sh"
|
||||
|
||||
for required in "$MODEL_PATCH" "$TEST_PATCH" "$TEST_SCRIPT"; do
|
||||
if [[ ! -f "$required" ]]; then
|
||||
echo "missing required file: $required" >&2
|
||||
exit 66
|
||||
fi
|
||||
done
|
||||
|
||||
echo "[$TASK_ID] Pulling image..."
|
||||
docker pull "$IMAGE" 2>&1 | tail -1
|
||||
|
||||
echo "[$TASK_ID] Running verifier..."
|
||||
docker run --rm \
|
||||
--platform linux/amd64 \
|
||||
-v "$MODEL_PATCH:/model.patch:ro" \
|
||||
-v "$TEST_PATCH:/tests/test.patch:ro" \
|
||||
-v "$TEST_SCRIPT:/verify.sh:ro" \
|
||||
"$IMAGE" \
|
||||
bash -c '
|
||||
set -e
|
||||
mkdir -p /logs/verifier /logs/artifacts
|
||||
cd /app
|
||||
git apply --whitespace=nowarn /model.patch 2>/dev/null || { echo "PATCH_FAILED"; exit 2; }
|
||||
bash /verify.sh > /logs/verifier/output.txt 2>&1
|
||||
EC=$?
|
||||
if [ -f /logs/verifier/reward.txt ]; then
|
||||
REWARD=$(cat /logs/verifier/reward.txt)
|
||||
echo "REWARD=$REWARD"
|
||||
else
|
||||
# Extract from output
|
||||
if grep -q "New tests exit code: 0" /logs/verifier/output.txt && \
|
||||
grep -q "Baseline exit code: 0" /logs/verifier/output.txt; then
|
||||
echo "REWARD=1"
|
||||
else
|
||||
echo "REWARD=0"
|
||||
fi
|
||||
fi
|
||||
echo "---OUTPUT_TAIL---"
|
||||
tail -30 /logs/verifier/output.txt
|
||||
' > "$RESULT_FILE" 2>&1
|
||||
|
||||
echo "[$TASK_ID] Done. Result:"
|
||||
grep -E 'REWARD|FAILED|PATCH_FAILED|passed' "$RESULT_FILE" || true
|
||||
echo ""
|
||||
-2188
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user