fix: hash only API-visible tool fields, not internal metadata

Addresses chatgpt-codex review: the previous full serde_json::to_string
included internal-only fields (allowed_callers, defer_loading,
input_examples, cache_control) that are never sent to the chat API.
This caused spurious drift detection when those fields changed.

- New tool_to_api_json() helper mirrors tool_to_chat() serialization:
  only type, name, description, parameters, strict
- Doc comment fixed: 'sorted by name' → 'sorted lexicographically
  by JSON text' (greptile review)
This commit is contained in:
Justin Gao
2026-06-01 11:55:12 +08:00
committed by Hunter B
parent 37cfd97b5c
commit c22b60c2b0
+27 -7
View File
@@ -51,18 +51,18 @@ pub struct PrefixFingerprint {
impl PrefixFingerprint {
/// Compute a fingerprint from system prompt text and tool list.
///
/// Tools are serialized to JSON (name + description + schema), sorted
/// by name for deterministic ordering, then SHA-256 hashed. This
/// catches schema/description drift, not just name changes (#2264).
/// Tools are serialized to the same JSON shape the chat API receives
/// (`type`, `name`, `description`, `parameters`, `strict`), sorted
/// lexicographically by JSON text, then SHA-256 hashed. This catches
/// schema/description drift that actually affects the API prefix,
/// while ignoring internal-only fields like `allowed_callers` (#2264).
pub fn compute(system_text: &str, tools: Option<&[Tool]>) -> Self {
let system_sha256 = sha256_hex(system_text.as_bytes());
let tools_sha256 = match tools {
Some(tools) if !tools.is_empty() => {
let mut serialized: Vec<String> = tools
.iter()
.filter_map(|t| serde_json::to_string(t).ok())
.collect();
let mut serialized: Vec<String> =
tools.iter().filter_map(tool_to_api_json).collect();
serialized.sort();
let joined = serialized.join("\n");
sha256_hex(joined.as_bytes())
@@ -310,6 +310,26 @@ impl PrefixStabilityManager {
}
}
/// Serialize a tool to the same JSON shape the chat API receives,
/// excluding internal-only fields like `allowed_callers`, `defer_loading`,
/// `input_examples`, and `cache_control` that are never sent to DeepSeek.
fn tool_to_api_json(tool: &Tool) -> Option<String> {
let mut value = serde_json::json!({
"type": "function",
"function": {
"name": tool.name,
"description": tool.description,
"parameters": tool.input_schema,
}
});
if let Some(strict) = tool.strict
&& let Some(function) = value.get_mut("function")
{
function["strict"] = serde_json::json!(strict);
}
serde_json::to_string(&value).ok()
}
/// Compute the SHA-256 hex digest of a byte slice.
fn sha256_hex(bytes: &[u8]) -> String {
let mut hasher = Sha256::new();