ci: enforce mappable co-author credit

Add AUTHOR_MAP plus a lightweight co-author trailer checker so harvested commits use numeric GitHub noreply identities, reject bot/tool trailers, and require machine-readable credit when a commit says it was harvested from a PR.

Also normalize the local unpushed v0.9 harvest range so existing contributor authors/trailers for HUQIANTAO, Implementist, jrcjrcc, xyuai, cyq1017, idling11, and shenjackyuanjie use GitHub-mappable identities before the branch is published.

Validation: python3 scripts/check-coauthor-trailers.py --author-map .github/AUTHOR_MAP --range origin/main..HEAD --check-authors; python3 -m py_compile scripts/check-coauthor-trailers.py; ruby -e 'require "yaml"; YAML.load_file(".github/workflows/ci.yml")'; git diff --check; negative in-process validation for raw email, missing harvested credit, and bot author cases.
This commit is contained in:
Hunter B
2026-06-03 21:07:33 -07:00
parent fb86737a8c
commit 002f8f0ba1
8 changed files with 369 additions and 1 deletions
+90
View File
@@ -0,0 +1,90 @@
# Contributor credit identity map.
#
# Format:
# alias = Display Name <id+login@users.noreply.github.com>
#
# The right-hand side must use GitHub's numeric noreply address so harvested
# co-author credit lands in the contributor graph. The left-hand side may be a
# GitHub login, old-style noreply address, raw email from a contributor commit,
# or local machine email seen in older harvested history.
hmbown = Hmbown <101357273+Hmbown@users.noreply.github.com>
reidliu41 = reidliu41 <61492567+reidliu41@users.noreply.github.com>
reid201711@gmail.com = reidliu41 <61492567+reidliu41@users.noreply.github.com>
HUQIANTAO = HUQIANTAO <58421104+HUQIANTAO@users.noreply.github.com>
Hu Qiantao = HUQIANTAO <58421104+HUQIANTAO@users.noreply.github.com>
huqiantao@users.noreply.github.com = HUQIANTAO <58421104+HUQIANTAO@users.noreply.github.com>
huqiantao@HudeMacBook-Air.local = HUQIANTAO <58421104+HUQIANTAO@users.noreply.github.com>
tom_huu@qq.com = HUQIANTAO <58421104+HUQIANTAO@users.noreply.github.com>
merchloubna70-dot = merchloubna70-dot <258170091+merchloubna70-dot@users.noreply.github.com>
h3c-hexin = h3c-hexin <13790929+h3c-hexin@users.noreply.github.com>
he.xin@h3c.com = h3c-hexin <13790929+h3c-hexin@users.noreply.github.com>
axobase001 = axobase001 <138223345+axobase001@users.noreply.github.com>
donglovejava = donglovejava <211940267+donglovejava@users.noreply.github.com>
Oliver-ZPLiu = Oliver-ZPLiu <47081637+Oliver-ZPLiu@users.noreply.github.com>
idling11 = idling11 <8055620+idling11@users.noreply.github.com>
Hanmiao Li = idling11 <8055620+idling11@users.noreply.github.com>
894876246@qq.com = idling11 <8055620+idling11@users.noreply.github.com>
angziii = angziii <177907677+angziii@users.noreply.github.com>
aboimpinto = aboimpinto <1231687+aboimpinto@users.noreply.github.com>
Paulo Aboim Pinto = aboimpinto <1231687+aboimpinto@users.noreply.github.com>
aboimpinto@gmail.com = aboimpinto <1231687+aboimpinto@users.noreply.github.com>
encyc = encyc <62669951+encyc@users.noreply.github.com>
Duducoco = Duducoco <69681789+Duducoco@users.noreply.github.com>
cyq1017 = cyq1017 <61975706+cyq1017@users.noreply.github.com>
cyq = cyq1017 <61975706+cyq1017@users.noreply.github.com>
15000851237@163.com = cyq1017 <61975706+cyq1017@users.noreply.github.com>
zlh124 = zlh124 <56312993+zlh124@users.noreply.github.com>
THINKER-ONLY = THINKER-ONLY <181556007+THINKER-ONLY@users.noreply.github.com>
nightt5879 = nightt5879 <87569709+nightt5879@users.noreply.github.com>
Liu-Vince = Liu-Vince <56624166+Liu-Vince@users.noreply.github.com>
Vince = Liu-Vince <56624166+Liu-Vince@users.noreply.github.com>
liuwenchang.x@qq.com = Liu-Vince <56624166+Liu-Vince@users.noreply.github.com>
JiarenWang = JiarenWang <33421508+JiarenWang@users.noreply.github.com>
wdw8276 = wdw8276 <3972439+wdw8276@users.noreply.github.com>
pengyou200902 = pengyou200902 <35026241+pengyou200902@users.noreply.github.com>
linzhiqin2003 = linzhiqin2003 <123250980+linzhiqin2003@users.noreply.github.com>
LING71671 = LING71671 <231181387+LING71671@users.noreply.github.com>
JasonOA888 = JasonOA888 <101583541+JasonOA888@users.noreply.github.com>
Inference1 = Inference1 <68734681+Inference1@users.noreply.github.com>
hongqitai = hongqitai <188678175+hongqitai@users.noreply.github.com>
gordonlu = gordonlu <3125629+gordonlu@users.noreply.github.com>
gaord = gaord <9567937+gaord@users.noreply.github.com>
Ben Gao = gaord <9567937+gaord@users.noreply.github.com>
bengao168@msn.com = gaord <9567937+gaord@users.noreply.github.com>
zhuangbiaowei = zhuangbiaowei <93194+zhuangbiaowei@users.noreply.github.com>
yuanchenglu = yuanchenglu <4088730+yuanchenglu@users.noreply.github.com>
Vishnu1837 = Vishnu1837 <104626273+Vishnu1837@users.noreply.github.com>
sximelon = sximelon <15710511+sximelon@users.noreply.github.com>
Sskift = Sskift <163287349+Sskift@users.noreply.github.com>
New2Niu = New2Niu <19551155+New2Niu@users.noreply.github.com>
mvanhorn = mvanhorn <455140+mvanhorn@users.noreply.github.com>
MengZ-super = MengZ-super <121712068+MengZ-super@users.noreply.github.com>
membphis = membphis <6814606+membphis@users.noreply.github.com>
LeoAlex0 = LeoAlex0 <31839998+LeoAlex0@users.noreply.github.com>
Lee-take = Lee-take <210963840+Lee-take@users.noreply.github.com>
lbcheng888 = lbcheng888 <6716643+lbcheng888@users.noreply.github.com>
kunpeng-ai-lab = kunpeng-ai-lab <16793595+kunpeng-ai-lab@users.noreply.github.com>
elowen53 = elowen53 <88364845+elowen53@users.noreply.github.com>
Elowen = elowen53 <88364845+elowen53@users.noreply.github.com>
xrnc@outlook.com = elowen53 <88364845+elowen53@users.noreply.github.com>
CrepuscularIRIS = CrepuscularIRIS <126939795+CrepuscularIRIS@users.noreply.github.com>
chnjames = chnjames <44110547+chnjames@users.noreply.github.com>
ChaceLyee2101 = ChaceLyee2101 <95995339+ChaceLyee2101@users.noreply.github.com>
AresNing = AresNing <49557311+AresNing@users.noreply.github.com>
shenjackyuanjie = shenjackyuanjie <54507071+shenjackyuanjie@users.noreply.github.com>
shenjack = shenjackyuanjie <54507071+shenjackyuanjie@users.noreply.github.com>
3695888@qq.com = shenjackyuanjie <54507071+shenjackyuanjie@users.noreply.github.com>
xyuai = xyuai <281015099+xyuai@users.noreply.github.com>
Implementist = Implementist <24910011+Implementist@users.noreply.github.com>
implecao = Implementist <24910011+Implementist@users.noreply.github.com>
yuyuyu4993@qq.com = Implementist <24910011+Implementist@users.noreply.github.com>
jrcjrcc = jrcjrcc <192965070+jrcjrcc@users.noreply.github.com>
jrcjrcc@users.noreply.github.com = jrcjrcc <192965070+jrcjrcc@users.noreply.github.com>
RefuseOdd = RefuseOdd <192543033+RefuseOdd@users.noreply.github.com>
wywsoor = wywsoor <26341601+wywsoor@users.noreply.github.com>
hsdbeebou = hsdbeebou <284843096+hsdbeebou@users.noreply.github.com>
tdccccc = tdccccc <79492752+tdccccc@users.noreply.github.com>
greyfreedom = greyfreedom <11493871+greyfreedom@users.noreply.github.com>
greyfreedom@163.com = greyfreedom <11493871+greyfreedom@users.noreply.github.com>
+1
View File
@@ -11,3 +11,4 @@
- [ ] Updated docs or comments as needed
- [ ] Added or updated tests where relevant
- [ ] Verified TUI behavior manually if UI changes
- [ ] Harvested/co-authored credit uses a GitHub numeric noreply address
+18
View File
@@ -33,6 +33,8 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- uses: dtolnay/rust-toolchain@stable
with:
components: rustfmt, clippy
@@ -50,6 +52,22 @@ jobs:
run: cargo clippy --workspace --all-features --locked -- -D warnings
- name: Check provider registry drift
run: python3 scripts/check-provider-registry.py
- name: Check harvested contributor credit
if: github.event_name != 'schedule'
shell: bash
run: |
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
git fetch --no-tags origin "${{ github.base_ref }}"
RANGE="origin/${{ github.base_ref }}..HEAD"
elif [[ "${{ github.event.before }}" != "0000000000000000000000000000000000000000" ]]; then
RANGE="${{ github.event.before }}..${{ github.sha }}"
else
RANGE="HEAD~1..HEAD"
fi
python3 scripts/check-coauthor-trailers.py \
--author-map .github/AUTHOR_MAP \
--range "$RANGE" \
--check-authors
- name: Linux clippy location
run: echo "Linux clippy/test gates run on CNB for mirrored fix/*, rebrand/*, work/v*, and main branches."
+3
View File
@@ -19,6 +19,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
explicit Rustls ring-provider installation for the no-provider TLS build, and
OHOS fallbacks for unsupported keyring, clipboard, sandbox, browser-open, TTY,
execpolicy Starlark parsing, and self-update surfaces.
- Added `.github/AUTHOR_MAP` and a CI co-author credit check so harvested
commits use GitHub-mappable numeric noreply identities instead of `.local`,
placeholder, bot/tool, or raw third-party emails.
### Changed
+5 -1
View File
@@ -98,8 +98,12 @@ When this happens:
- If the maintainer copies or adapts your code, the harvested commit also
keeps attribution with the original author identity when possible: either by
preserving the commit author on a cherry-pick or by adding a
`Co-authored-by: Name <email>` trailer from the original PR commit. This is
`Co-authored-by: Name <id+login@users.noreply.github.com>` trailer. This is
what lets GitHub's contribution surfaces recognize more than prose credit.
Maintainers should use `.github/AUTHOR_MAP`, or run
`gh api users/<login> --jq '"\(.id)+\(.login)@users.noreply.github.com"'`,
rather than copying raw, `.local`, or old-style noreply emails from a
contributor's machine.
- The `CHANGELOG.md` entry for the next release credits you by handle.
- The auto-close workflow closes your PR with a templated thank-you and
a link to the commit on `main`.
+6
View File
@@ -23,6 +23,12 @@ could not cover by ourselves.
issues, keep author/co-author attribution where possible, add
`Harvested from PR #N by @handle`, and credit the contributor in the
changelog or release notes.
- Make credit machine-readable. If a harvested commit cannot preserve the
contributor as the author, add a `Co-authored-by` trailer with the GitHub
numeric noreply address from `.github/AUTHOR_MAP` or
`gh api users/<login> --jq '"\(.id)+\(.login)@users.noreply.github.com"'`.
Do not use `.local`, placeholder, bot/tool, or raw third-party emails for
human contributor credit.
- Deferral is a maintainer action, not a dismissal. If a PR or issue is not
ready, say what is blocked, what evidence would change the decision, and
which part of the work remains valuable.
+1
View File
@@ -43,6 +43,7 @@ harvest/stewardship commits:
| #2708 Windows sub-agent completion halves TUI render width | Cherry-picked as `e933a11d7`; follow-up fix `72653f8ef` invalidates reused fanout-card rows. | `cargo test -p codewhale-tui --locked subagent`; `cargo test -p codewhale-tui --locked terminal_size`; `cargo clippy -p codewhale-tui --locked -- -D warnings` passed. |
| #2627 Xiaomi MiMo Token Plan mode | Harvested only the auth-header behavior as `5aa68d986`; did not merge the conflicting mode/env changes. | `cargo test -p codewhale-tui --bin codewhale-tui --locked xiaomi_mimo`; `cargo test -p codewhale-secrets --locked xiaomi_mimo`; `cargo test -p codewhale-config --locked xiaomi_mimo`; `cargo clippy -p codewhale-tui --locked -- -D warnings` passed. |
| #2730 canonical codewhale settings path | Already harvested as `9e15805f6`; follow-up reviewer assertion added on this branch. | Fixes #2664 by reading legacy DeepSeek settings fallbacks, migrating them into `~/.codewhale/settings.toml`, and ensuring `/config` displays the canonical CodeWhale path. `cargo test -p codewhale-tui --bin codewhale-tui --locked settings_ -- --nocapture` passed. |
| Contributor credit plumbing | Added locally after the co-author audit. | Normalized unpushed harvest author/trailer emails to numeric GitHub noreply identities, added `.github/AUTHOR_MAP`, and wired `scripts/check-coauthor-trailers.py` into CI so future `Harvested from PR #N by @handle` commits require machine-readable credit. |
| #2636 project-context mtime cache | Defer direct merge; harvest only after cache key/signature is widened. | Must include constitution changes, auto-generated context deletion, canonical path equivalence, and overwrite detection before landing. |
| #2634 HarmonyOS port | Locally harvested with additional Nix-chain clearance; keep credited and do not close until the integration branch is public. | User-supplied MatePad Edge demo (`https://bilibili.com/video/av116689597368905`) confirms real-device interest. Added env-driven OpenHarmony SDK setup, OHOS platform guards/fallbacks, self-update disablement, and OHOS target gating for Starlark execpolicy parsing plus PTY support so published OHOS builds do not pull `nix` 0.28 through `rustyline` or `portable-pty`. `cargo check --workspace --all-features --locked`, focused PTY/clipboard tests, and `cargo tree --locked -p codewhale-tui --target aarch64-unknown-linux-ohos -i nix@0.28.0` passed; full OHOS target check is blocked on this host because `OHOS_NATIVE_SDK`/target CC/sysroot are not configured and `ring` cannot find `assert.h`. |
| #2687 append-only mode/approval prompt | Defer direct merge; draft has compile failures and Plan-mode prompt correctness risks. | Any future harvest must keep stable `message[0]` genuinely mode-agnostic, preserve mode/approval suffixes after capacity replans, and distinguish external overrides from persisted generated prompts. |
+245
View File
@@ -0,0 +1,245 @@
#!/usr/bin/env python3
"""Validate that harvested contributor credit is GitHub-mappable.
The check is intentionally scoped to new commits. Historical commits may carry
raw or local emails, but new harvested commits should use GitHub's numeric
`id+login@users.noreply.github.com` address so co-author credit lands in the
contributor graph.
"""
from __future__ import annotations
import argparse
import re
import subprocess
import sys
from dataclasses import dataclass
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
DEFAULT_AUTHOR_MAP = ROOT / ".github" / "AUTHOR_MAP"
IDENTITY_RE = re.compile(r"^\s*(?P<name>.+?)\s*<(?P<email>[^<>]+)>\s*$")
CANONICAL_NOREPLY_RE = re.compile(
r"^[0-9]+\+[^@\s]+@users\.noreply\.github\.com$", re.IGNORECASE
)
COAUTHOR_RE = re.compile(
r"^Co-authored-by:\s*(?P<name>.*?)\s*<(?P<email>[^<>]+)>\s*$",
re.IGNORECASE | re.MULTILINE,
)
HARVEST_RE = re.compile(r"Harvested from PR #[0-9]+ by @([A-Za-z0-9-]+)")
BOT_EMAILS = {
"codex@local",
"codex@example.com",
"cursoragent@cursor.com",
"noreply@anthropic.com",
}
BOT_NAMES = ("claude", "codex", "cursor")
@dataclass(frozen=True)
class Identity:
name: str
email: str
def trailer(self) -> str:
return f"Co-authored-by: {self.name} <{self.email}>"
def author(self) -> str:
return f"{self.name} <{self.email}>"
@dataclass(frozen=True)
class Commit:
sha: str
author_name: str
author_email: str
subject: str
body: str
def norm_key(value: str) -> str:
return value.strip().lower()
def github_login_from_noreply(email: str) -> str | None:
if not CANONICAL_NOREPLY_RE.match(email):
return None
local = email.split("@", 1)[0]
return local.split("+", 1)[1]
def parse_identity(raw: str, context: str) -> Identity:
match = IDENTITY_RE.match(raw)
if not match:
raise ValueError(f"{context}: expected 'Name <id+login@users.noreply.github.com>'")
identity = Identity(match.group("name").strip(), match.group("email").strip())
if not CANONICAL_NOREPLY_RE.match(identity.email):
raise ValueError(
f"{context}: right-hand email must be numeric GitHub noreply, got {identity.email}"
)
return identity
def load_author_map(path: Path) -> dict[str, Identity]:
aliases: dict[str, Identity] = {}
for lineno, raw_line in enumerate(path.read_text(encoding="utf-8").splitlines(), start=1):
line = raw_line.split("#", 1)[0].strip()
if not line:
continue
if "=" not in line:
raise ValueError(f"{path}:{lineno}: expected 'alias = Name <email>'")
alias, raw_identity = [part.strip() for part in line.split("=", 1)]
identity = parse_identity(raw_identity, f"{path}:{lineno}")
key = norm_key(alias)
if key in aliases and aliases[key] != identity:
raise ValueError(f"{path}:{lineno}: duplicate alias {alias!r}")
aliases[key] = identity
aliases.setdefault(norm_key(identity.email), identity)
aliases.setdefault(norm_key(identity.name), identity)
if login := github_login_from_noreply(identity.email):
aliases.setdefault(norm_key(login), identity)
return aliases
def git_log(commit_range: str) -> list[Commit]:
try:
raw = subprocess.check_output(
[
"git",
"log",
"--format=%H%x00%an%x00%ae%x00%s%x00%B%x1e",
commit_range,
],
cwd=ROOT,
text=True,
)
except subprocess.CalledProcessError as exc:
raise RuntimeError(f"failed to read git range {commit_range!r}: {exc}") from exc
commits: list[Commit] = []
for record in raw.split("\x1e"):
if not record.strip():
continue
parts = record.split("\x00", 4)
if len(parts) != 5:
raise RuntimeError("failed to parse git log output")
commits.append(Commit(*parts))
return commits
def is_bot_identity(name: str, email: str) -> bool:
lowered_name = name.strip().lower()
lowered_email = email.strip().lower()
return lowered_email in BOT_EMAILS or any(
lowered_name == bot or lowered_name.startswith(f"{bot} ") for bot in BOT_NAMES
)
def lookup_identity(aliases: dict[str, Identity], *values: str) -> Identity | None:
for value in values:
identity = aliases.get(norm_key(value))
if identity is not None:
return identity
return None
def validate(commits: list[Commit], aliases: dict[str, Identity], check_authors: bool) -> list[str]:
errors: list[str] = []
for commit in commits:
prefix = f"{commit.sha[:10]} {commit.subject}"
coauthors = [
Identity(match.group("name").strip(), match.group("email").strip())
for match in COAUTHOR_RE.finditer(commit.body)
]
if check_authors:
if is_bot_identity(commit.author_name, commit.author_email):
errors.append(
f"{prefix}: author {commit.author_name} <{commit.author_email}> is a "
"bot/tool identity. Human harvested work should preserve the contributor "
"as author or use a human co-author trailer."
)
elif (
(expected := lookup_identity(aliases, commit.author_email, commit.author_name))
and norm_key(commit.author_email) != norm_key(expected.email)
):
errors.append(
f"{prefix}: author {commit.author_name} <{commit.author_email}> "
f"matches AUTHOR_MAP but is not canonical. Use author {expected.author()}."
)
for coauthor in coauthors:
if CANONICAL_NOREPLY_RE.match(coauthor.email):
continue
if is_bot_identity(coauthor.name, coauthor.email):
errors.append(
f"{prefix}: remove bot/tool co-author trailer "
f"{coauthor.name} <{coauthor.email}>; contributor trailers are for humans."
)
continue
expected = lookup_identity(aliases, coauthor.email, coauthor.name)
if expected:
errors.append(
f"{prefix}: co-author {coauthor.name} <{coauthor.email}> is not "
f"GitHub-mappable. Use `{expected.trailer()}`."
)
else:
errors.append(
f"{prefix}: co-author {coauthor.name} <{coauthor.email}> is not "
"numeric GitHub noreply and has no AUTHOR_MAP entry. Add an alias "
"or use `gh api users/<login> --jq '\"\\(.id)+\\(.login)@users.noreply.github.com\"'`."
)
coauthor_emails = {norm_key(coauthor.email) for coauthor in coauthors}
for login in HARVEST_RE.findall(commit.body):
expected = lookup_identity(aliases, login)
if expected is None:
errors.append(
f"{prefix}: harvested contributor @{login} is missing from .github/AUTHOR_MAP."
)
continue
if (
norm_key(commit.author_email) != norm_key(expected.email)
and norm_key(expected.email) not in coauthor_emails
):
errors.append(
f"{prefix}: `Harvested from PR ... by @{login}` needs machine-readable "
f"credit. Add `{expected.trailer()}` or preserve the contributor as author."
)
return errors
def main(argv: list[str]) -> int:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--author-map", type=Path, default=DEFAULT_AUTHOR_MAP)
parser.add_argument("--range", default="origin/main..HEAD", help="git commit range to check")
parser.add_argument(
"--check-authors",
action="store_true",
help="also reject commit author emails that match known AUTHOR_MAP aliases",
)
args = parser.parse_args(argv)
try:
aliases = load_author_map(args.author_map)
commits = git_log(args.range)
errors = validate(commits, aliases, args.check_authors)
except Exception as exc:
print(f"co-author credit check failed to run: {exc}", file=sys.stderr)
return 2
if errors:
print("Co-author credit check failed:", file=sys.stderr)
for error in errors:
print(f"- {error}", file=sys.stderr)
return 1
print(f"Co-author credit check passed for {len(commits)} commit(s).")
return 0
if __name__ == "__main__":
raise SystemExit(main(sys.argv[1:]))