ci: enforce mappable co-author credit
Add AUTHOR_MAP plus a lightweight co-author trailer checker so harvested commits use numeric GitHub noreply identities, reject bot/tool trailers, and require machine-readable credit when a commit says it was harvested from a PR.
Also normalize the local unpushed v0.9 harvest range so existing contributor authors/trailers for HUQIANTAO, Implementist, jrcjrcc, xyuai, cyq1017, idling11, and shenjackyuanjie use GitHub-mappable identities before the branch is published.
Validation: python3 scripts/check-coauthor-trailers.py --author-map .github/AUTHOR_MAP --range origin/main..HEAD --check-authors; python3 -m py_compile scripts/check-coauthor-trailers.py; ruby -e 'require "yaml"; YAML.load_file(".github/workflows/ci.yml")'; git diff --check; negative in-process validation for raw email, missing harvested credit, and bot author cases.
This commit is contained in:
@@ -0,0 +1,90 @@
|
|||||||
|
# Contributor credit identity map.
|
||||||
|
#
|
||||||
|
# Format:
|
||||||
|
# alias = Display Name <id+login@users.noreply.github.com>
|
||||||
|
#
|
||||||
|
# The right-hand side must use GitHub's numeric noreply address so harvested
|
||||||
|
# co-author credit lands in the contributor graph. The left-hand side may be a
|
||||||
|
# GitHub login, old-style noreply address, raw email from a contributor commit,
|
||||||
|
# or local machine email seen in older harvested history.
|
||||||
|
|
||||||
|
hmbown = Hmbown <101357273+Hmbown@users.noreply.github.com>
|
||||||
|
reidliu41 = reidliu41 <61492567+reidliu41@users.noreply.github.com>
|
||||||
|
reid201711@gmail.com = reidliu41 <61492567+reidliu41@users.noreply.github.com>
|
||||||
|
HUQIANTAO = HUQIANTAO <58421104+HUQIANTAO@users.noreply.github.com>
|
||||||
|
Hu Qiantao = HUQIANTAO <58421104+HUQIANTAO@users.noreply.github.com>
|
||||||
|
huqiantao@users.noreply.github.com = HUQIANTAO <58421104+HUQIANTAO@users.noreply.github.com>
|
||||||
|
huqiantao@HudeMacBook-Air.local = HUQIANTAO <58421104+HUQIANTAO@users.noreply.github.com>
|
||||||
|
tom_huu@qq.com = HUQIANTAO <58421104+HUQIANTAO@users.noreply.github.com>
|
||||||
|
merchloubna70-dot = merchloubna70-dot <258170091+merchloubna70-dot@users.noreply.github.com>
|
||||||
|
h3c-hexin = h3c-hexin <13790929+h3c-hexin@users.noreply.github.com>
|
||||||
|
he.xin@h3c.com = h3c-hexin <13790929+h3c-hexin@users.noreply.github.com>
|
||||||
|
axobase001 = axobase001 <138223345+axobase001@users.noreply.github.com>
|
||||||
|
donglovejava = donglovejava <211940267+donglovejava@users.noreply.github.com>
|
||||||
|
Oliver-ZPLiu = Oliver-ZPLiu <47081637+Oliver-ZPLiu@users.noreply.github.com>
|
||||||
|
idling11 = idling11 <8055620+idling11@users.noreply.github.com>
|
||||||
|
Hanmiao Li = idling11 <8055620+idling11@users.noreply.github.com>
|
||||||
|
894876246@qq.com = idling11 <8055620+idling11@users.noreply.github.com>
|
||||||
|
angziii = angziii <177907677+angziii@users.noreply.github.com>
|
||||||
|
aboimpinto = aboimpinto <1231687+aboimpinto@users.noreply.github.com>
|
||||||
|
Paulo Aboim Pinto = aboimpinto <1231687+aboimpinto@users.noreply.github.com>
|
||||||
|
aboimpinto@gmail.com = aboimpinto <1231687+aboimpinto@users.noreply.github.com>
|
||||||
|
encyc = encyc <62669951+encyc@users.noreply.github.com>
|
||||||
|
Duducoco = Duducoco <69681789+Duducoco@users.noreply.github.com>
|
||||||
|
cyq1017 = cyq1017 <61975706+cyq1017@users.noreply.github.com>
|
||||||
|
cyq = cyq1017 <61975706+cyq1017@users.noreply.github.com>
|
||||||
|
15000851237@163.com = cyq1017 <61975706+cyq1017@users.noreply.github.com>
|
||||||
|
zlh124 = zlh124 <56312993+zlh124@users.noreply.github.com>
|
||||||
|
THINKER-ONLY = THINKER-ONLY <181556007+THINKER-ONLY@users.noreply.github.com>
|
||||||
|
nightt5879 = nightt5879 <87569709+nightt5879@users.noreply.github.com>
|
||||||
|
Liu-Vince = Liu-Vince <56624166+Liu-Vince@users.noreply.github.com>
|
||||||
|
Vince = Liu-Vince <56624166+Liu-Vince@users.noreply.github.com>
|
||||||
|
liuwenchang.x@qq.com = Liu-Vince <56624166+Liu-Vince@users.noreply.github.com>
|
||||||
|
JiarenWang = JiarenWang <33421508+JiarenWang@users.noreply.github.com>
|
||||||
|
wdw8276 = wdw8276 <3972439+wdw8276@users.noreply.github.com>
|
||||||
|
pengyou200902 = pengyou200902 <35026241+pengyou200902@users.noreply.github.com>
|
||||||
|
linzhiqin2003 = linzhiqin2003 <123250980+linzhiqin2003@users.noreply.github.com>
|
||||||
|
LING71671 = LING71671 <231181387+LING71671@users.noreply.github.com>
|
||||||
|
JasonOA888 = JasonOA888 <101583541+JasonOA888@users.noreply.github.com>
|
||||||
|
Inference1 = Inference1 <68734681+Inference1@users.noreply.github.com>
|
||||||
|
hongqitai = hongqitai <188678175+hongqitai@users.noreply.github.com>
|
||||||
|
gordonlu = gordonlu <3125629+gordonlu@users.noreply.github.com>
|
||||||
|
gaord = gaord <9567937+gaord@users.noreply.github.com>
|
||||||
|
Ben Gao = gaord <9567937+gaord@users.noreply.github.com>
|
||||||
|
bengao168@msn.com = gaord <9567937+gaord@users.noreply.github.com>
|
||||||
|
zhuangbiaowei = zhuangbiaowei <93194+zhuangbiaowei@users.noreply.github.com>
|
||||||
|
yuanchenglu = yuanchenglu <4088730+yuanchenglu@users.noreply.github.com>
|
||||||
|
Vishnu1837 = Vishnu1837 <104626273+Vishnu1837@users.noreply.github.com>
|
||||||
|
sximelon = sximelon <15710511+sximelon@users.noreply.github.com>
|
||||||
|
Sskift = Sskift <163287349+Sskift@users.noreply.github.com>
|
||||||
|
New2Niu = New2Niu <19551155+New2Niu@users.noreply.github.com>
|
||||||
|
mvanhorn = mvanhorn <455140+mvanhorn@users.noreply.github.com>
|
||||||
|
MengZ-super = MengZ-super <121712068+MengZ-super@users.noreply.github.com>
|
||||||
|
membphis = membphis <6814606+membphis@users.noreply.github.com>
|
||||||
|
LeoAlex0 = LeoAlex0 <31839998+LeoAlex0@users.noreply.github.com>
|
||||||
|
Lee-take = Lee-take <210963840+Lee-take@users.noreply.github.com>
|
||||||
|
lbcheng888 = lbcheng888 <6716643+lbcheng888@users.noreply.github.com>
|
||||||
|
kunpeng-ai-lab = kunpeng-ai-lab <16793595+kunpeng-ai-lab@users.noreply.github.com>
|
||||||
|
elowen53 = elowen53 <88364845+elowen53@users.noreply.github.com>
|
||||||
|
Elowen = elowen53 <88364845+elowen53@users.noreply.github.com>
|
||||||
|
xrnc@outlook.com = elowen53 <88364845+elowen53@users.noreply.github.com>
|
||||||
|
CrepuscularIRIS = CrepuscularIRIS <126939795+CrepuscularIRIS@users.noreply.github.com>
|
||||||
|
chnjames = chnjames <44110547+chnjames@users.noreply.github.com>
|
||||||
|
ChaceLyee2101 = ChaceLyee2101 <95995339+ChaceLyee2101@users.noreply.github.com>
|
||||||
|
AresNing = AresNing <49557311+AresNing@users.noreply.github.com>
|
||||||
|
|
||||||
|
shenjackyuanjie = shenjackyuanjie <54507071+shenjackyuanjie@users.noreply.github.com>
|
||||||
|
shenjack = shenjackyuanjie <54507071+shenjackyuanjie@users.noreply.github.com>
|
||||||
|
3695888@qq.com = shenjackyuanjie <54507071+shenjackyuanjie@users.noreply.github.com>
|
||||||
|
xyuai = xyuai <281015099+xyuai@users.noreply.github.com>
|
||||||
|
Implementist = Implementist <24910011+Implementist@users.noreply.github.com>
|
||||||
|
implecao = Implementist <24910011+Implementist@users.noreply.github.com>
|
||||||
|
yuyuyu4993@qq.com = Implementist <24910011+Implementist@users.noreply.github.com>
|
||||||
|
jrcjrcc = jrcjrcc <192965070+jrcjrcc@users.noreply.github.com>
|
||||||
|
jrcjrcc@users.noreply.github.com = jrcjrcc <192965070+jrcjrcc@users.noreply.github.com>
|
||||||
|
RefuseOdd = RefuseOdd <192543033+RefuseOdd@users.noreply.github.com>
|
||||||
|
wywsoor = wywsoor <26341601+wywsoor@users.noreply.github.com>
|
||||||
|
hsdbeebou = hsdbeebou <284843096+hsdbeebou@users.noreply.github.com>
|
||||||
|
tdccccc = tdccccc <79492752+tdccccc@users.noreply.github.com>
|
||||||
|
greyfreedom = greyfreedom <11493871+greyfreedom@users.noreply.github.com>
|
||||||
|
greyfreedom@163.com = greyfreedom <11493871+greyfreedom@users.noreply.github.com>
|
||||||
@@ -11,3 +11,4 @@
|
|||||||
- [ ] Updated docs or comments as needed
|
- [ ] Updated docs or comments as needed
|
||||||
- [ ] Added or updated tests where relevant
|
- [ ] Added or updated tests where relevant
|
||||||
- [ ] Verified TUI behavior manually if UI changes
|
- [ ] Verified TUI behavior manually if UI changes
|
||||||
|
- [ ] Harvested/co-authored credit uses a GitHub numeric noreply address
|
||||||
|
|||||||
@@ -33,6 +33,8 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
- uses: dtolnay/rust-toolchain@stable
|
- uses: dtolnay/rust-toolchain@stable
|
||||||
with:
|
with:
|
||||||
components: rustfmt, clippy
|
components: rustfmt, clippy
|
||||||
@@ -50,6 +52,22 @@ jobs:
|
|||||||
run: cargo clippy --workspace --all-features --locked -- -D warnings
|
run: cargo clippy --workspace --all-features --locked -- -D warnings
|
||||||
- name: Check provider registry drift
|
- name: Check provider registry drift
|
||||||
run: python3 scripts/check-provider-registry.py
|
run: python3 scripts/check-provider-registry.py
|
||||||
|
- name: Check harvested contributor credit
|
||||||
|
if: github.event_name != 'schedule'
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
|
||||||
|
git fetch --no-tags origin "${{ github.base_ref }}"
|
||||||
|
RANGE="origin/${{ github.base_ref }}..HEAD"
|
||||||
|
elif [[ "${{ github.event.before }}" != "0000000000000000000000000000000000000000" ]]; then
|
||||||
|
RANGE="${{ github.event.before }}..${{ github.sha }}"
|
||||||
|
else
|
||||||
|
RANGE="HEAD~1..HEAD"
|
||||||
|
fi
|
||||||
|
python3 scripts/check-coauthor-trailers.py \
|
||||||
|
--author-map .github/AUTHOR_MAP \
|
||||||
|
--range "$RANGE" \
|
||||||
|
--check-authors
|
||||||
- name: Linux clippy location
|
- name: Linux clippy location
|
||||||
run: echo "Linux clippy/test gates run on CNB for mirrored fix/*, rebrand/*, work/v*, and main branches."
|
run: echo "Linux clippy/test gates run on CNB for mirrored fix/*, rebrand/*, work/v*, and main branches."
|
||||||
|
|
||||||
|
|||||||
@@ -19,6 +19,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||||||
explicit Rustls ring-provider installation for the no-provider TLS build, and
|
explicit Rustls ring-provider installation for the no-provider TLS build, and
|
||||||
OHOS fallbacks for unsupported keyring, clipboard, sandbox, browser-open, TTY,
|
OHOS fallbacks for unsupported keyring, clipboard, sandbox, browser-open, TTY,
|
||||||
execpolicy Starlark parsing, and self-update surfaces.
|
execpolicy Starlark parsing, and self-update surfaces.
|
||||||
|
- Added `.github/AUTHOR_MAP` and a CI co-author credit check so harvested
|
||||||
|
commits use GitHub-mappable numeric noreply identities instead of `.local`,
|
||||||
|
placeholder, bot/tool, or raw third-party emails.
|
||||||
|
|
||||||
### Changed
|
### Changed
|
||||||
|
|
||||||
|
|||||||
+5
-1
@@ -98,8 +98,12 @@ When this happens:
|
|||||||
- If the maintainer copies or adapts your code, the harvested commit also
|
- If the maintainer copies or adapts your code, the harvested commit also
|
||||||
keeps attribution with the original author identity when possible: either by
|
keeps attribution with the original author identity when possible: either by
|
||||||
preserving the commit author on a cherry-pick or by adding a
|
preserving the commit author on a cherry-pick or by adding a
|
||||||
`Co-authored-by: Name <email>` trailer from the original PR commit. This is
|
`Co-authored-by: Name <id+login@users.noreply.github.com>` trailer. This is
|
||||||
what lets GitHub's contribution surfaces recognize more than prose credit.
|
what lets GitHub's contribution surfaces recognize more than prose credit.
|
||||||
|
Maintainers should use `.github/AUTHOR_MAP`, or run
|
||||||
|
`gh api users/<login> --jq '"\(.id)+\(.login)@users.noreply.github.com"'`,
|
||||||
|
rather than copying raw, `.local`, or old-style noreply emails from a
|
||||||
|
contributor's machine.
|
||||||
- The `CHANGELOG.md` entry for the next release credits you by handle.
|
- The `CHANGELOG.md` entry for the next release credits you by handle.
|
||||||
- The auto-close workflow closes your PR with a templated thank-you and
|
- The auto-close workflow closes your PR with a templated thank-you and
|
||||||
a link to the commit on `main`.
|
a link to the commit on `main`.
|
||||||
|
|||||||
@@ -23,6 +23,12 @@ could not cover by ourselves.
|
|||||||
issues, keep author/co-author attribution where possible, add
|
issues, keep author/co-author attribution where possible, add
|
||||||
`Harvested from PR #N by @handle`, and credit the contributor in the
|
`Harvested from PR #N by @handle`, and credit the contributor in the
|
||||||
changelog or release notes.
|
changelog or release notes.
|
||||||
|
- Make credit machine-readable. If a harvested commit cannot preserve the
|
||||||
|
contributor as the author, add a `Co-authored-by` trailer with the GitHub
|
||||||
|
numeric noreply address from `.github/AUTHOR_MAP` or
|
||||||
|
`gh api users/<login> --jq '"\(.id)+\(.login)@users.noreply.github.com"'`.
|
||||||
|
Do not use `.local`, placeholder, bot/tool, or raw third-party emails for
|
||||||
|
human contributor credit.
|
||||||
- Deferral is a maintainer action, not a dismissal. If a PR or issue is not
|
- Deferral is a maintainer action, not a dismissal. If a PR or issue is not
|
||||||
ready, say what is blocked, what evidence would change the decision, and
|
ready, say what is blocked, what evidence would change the decision, and
|
||||||
which part of the work remains valuable.
|
which part of the work remains valuable.
|
||||||
|
|||||||
@@ -43,6 +43,7 @@ harvest/stewardship commits:
|
|||||||
| #2708 Windows sub-agent completion halves TUI render width | Cherry-picked as `e933a11d7`; follow-up fix `72653f8ef` invalidates reused fanout-card rows. | `cargo test -p codewhale-tui --locked subagent`; `cargo test -p codewhale-tui --locked terminal_size`; `cargo clippy -p codewhale-tui --locked -- -D warnings` passed. |
|
| #2708 Windows sub-agent completion halves TUI render width | Cherry-picked as `e933a11d7`; follow-up fix `72653f8ef` invalidates reused fanout-card rows. | `cargo test -p codewhale-tui --locked subagent`; `cargo test -p codewhale-tui --locked terminal_size`; `cargo clippy -p codewhale-tui --locked -- -D warnings` passed. |
|
||||||
| #2627 Xiaomi MiMo Token Plan mode | Harvested only the auth-header behavior as `5aa68d986`; did not merge the conflicting mode/env changes. | `cargo test -p codewhale-tui --bin codewhale-tui --locked xiaomi_mimo`; `cargo test -p codewhale-secrets --locked xiaomi_mimo`; `cargo test -p codewhale-config --locked xiaomi_mimo`; `cargo clippy -p codewhale-tui --locked -- -D warnings` passed. |
|
| #2627 Xiaomi MiMo Token Plan mode | Harvested only the auth-header behavior as `5aa68d986`; did not merge the conflicting mode/env changes. | `cargo test -p codewhale-tui --bin codewhale-tui --locked xiaomi_mimo`; `cargo test -p codewhale-secrets --locked xiaomi_mimo`; `cargo test -p codewhale-config --locked xiaomi_mimo`; `cargo clippy -p codewhale-tui --locked -- -D warnings` passed. |
|
||||||
| #2730 canonical codewhale settings path | Already harvested as `9e15805f6`; follow-up reviewer assertion added on this branch. | Fixes #2664 by reading legacy DeepSeek settings fallbacks, migrating them into `~/.codewhale/settings.toml`, and ensuring `/config` displays the canonical CodeWhale path. `cargo test -p codewhale-tui --bin codewhale-tui --locked settings_ -- --nocapture` passed. |
|
| #2730 canonical codewhale settings path | Already harvested as `9e15805f6`; follow-up reviewer assertion added on this branch. | Fixes #2664 by reading legacy DeepSeek settings fallbacks, migrating them into `~/.codewhale/settings.toml`, and ensuring `/config` displays the canonical CodeWhale path. `cargo test -p codewhale-tui --bin codewhale-tui --locked settings_ -- --nocapture` passed. |
|
||||||
|
| Contributor credit plumbing | Added locally after the co-author audit. | Normalized unpushed harvest author/trailer emails to numeric GitHub noreply identities, added `.github/AUTHOR_MAP`, and wired `scripts/check-coauthor-trailers.py` into CI so future `Harvested from PR #N by @handle` commits require machine-readable credit. |
|
||||||
| #2636 project-context mtime cache | Defer direct merge; harvest only after cache key/signature is widened. | Must include constitution changes, auto-generated context deletion, canonical path equivalence, and overwrite detection before landing. |
|
| #2636 project-context mtime cache | Defer direct merge; harvest only after cache key/signature is widened. | Must include constitution changes, auto-generated context deletion, canonical path equivalence, and overwrite detection before landing. |
|
||||||
| #2634 HarmonyOS port | Locally harvested with additional Nix-chain clearance; keep credited and do not close until the integration branch is public. | User-supplied MatePad Edge demo (`https://bilibili.com/video/av116689597368905`) confirms real-device interest. Added env-driven OpenHarmony SDK setup, OHOS platform guards/fallbacks, self-update disablement, and OHOS target gating for Starlark execpolicy parsing plus PTY support so published OHOS builds do not pull `nix` 0.28 through `rustyline` or `portable-pty`. `cargo check --workspace --all-features --locked`, focused PTY/clipboard tests, and `cargo tree --locked -p codewhale-tui --target aarch64-unknown-linux-ohos -i nix@0.28.0` passed; full OHOS target check is blocked on this host because `OHOS_NATIVE_SDK`/target CC/sysroot are not configured and `ring` cannot find `assert.h`. |
|
| #2634 HarmonyOS port | Locally harvested with additional Nix-chain clearance; keep credited and do not close until the integration branch is public. | User-supplied MatePad Edge demo (`https://bilibili.com/video/av116689597368905`) confirms real-device interest. Added env-driven OpenHarmony SDK setup, OHOS platform guards/fallbacks, self-update disablement, and OHOS target gating for Starlark execpolicy parsing plus PTY support so published OHOS builds do not pull `nix` 0.28 through `rustyline` or `portable-pty`. `cargo check --workspace --all-features --locked`, focused PTY/clipboard tests, and `cargo tree --locked -p codewhale-tui --target aarch64-unknown-linux-ohos -i nix@0.28.0` passed; full OHOS target check is blocked on this host because `OHOS_NATIVE_SDK`/target CC/sysroot are not configured and `ring` cannot find `assert.h`. |
|
||||||
| #2687 append-only mode/approval prompt | Defer direct merge; draft has compile failures and Plan-mode prompt correctness risks. | Any future harvest must keep stable `message[0]` genuinely mode-agnostic, preserve mode/approval suffixes after capacity replans, and distinguish external overrides from persisted generated prompts. |
|
| #2687 append-only mode/approval prompt | Defer direct merge; draft has compile failures and Plan-mode prompt correctness risks. | Any future harvest must keep stable `message[0]` genuinely mode-agnostic, preserve mode/approval suffixes after capacity replans, and distinguish external overrides from persisted generated prompts. |
|
||||||
|
|||||||
@@ -0,0 +1,245 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Validate that harvested contributor credit is GitHub-mappable.
|
||||||
|
|
||||||
|
The check is intentionally scoped to new commits. Historical commits may carry
|
||||||
|
raw or local emails, but new harvested commits should use GitHub's numeric
|
||||||
|
`id+login@users.noreply.github.com` address so co-author credit lands in the
|
||||||
|
contributor graph.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
ROOT = Path(__file__).resolve().parents[1]
|
||||||
|
DEFAULT_AUTHOR_MAP = ROOT / ".github" / "AUTHOR_MAP"
|
||||||
|
|
||||||
|
IDENTITY_RE = re.compile(r"^\s*(?P<name>.+?)\s*<(?P<email>[^<>]+)>\s*$")
|
||||||
|
CANONICAL_NOREPLY_RE = re.compile(
|
||||||
|
r"^[0-9]+\+[^@\s]+@users\.noreply\.github\.com$", re.IGNORECASE
|
||||||
|
)
|
||||||
|
COAUTHOR_RE = re.compile(
|
||||||
|
r"^Co-authored-by:\s*(?P<name>.*?)\s*<(?P<email>[^<>]+)>\s*$",
|
||||||
|
re.IGNORECASE | re.MULTILINE,
|
||||||
|
)
|
||||||
|
HARVEST_RE = re.compile(r"Harvested from PR #[0-9]+ by @([A-Za-z0-9-]+)")
|
||||||
|
|
||||||
|
BOT_EMAILS = {
|
||||||
|
"codex@local",
|
||||||
|
"codex@example.com",
|
||||||
|
"cursoragent@cursor.com",
|
||||||
|
"noreply@anthropic.com",
|
||||||
|
}
|
||||||
|
BOT_NAMES = ("claude", "codex", "cursor")
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class Identity:
|
||||||
|
name: str
|
||||||
|
email: str
|
||||||
|
|
||||||
|
def trailer(self) -> str:
|
||||||
|
return f"Co-authored-by: {self.name} <{self.email}>"
|
||||||
|
|
||||||
|
def author(self) -> str:
|
||||||
|
return f"{self.name} <{self.email}>"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class Commit:
|
||||||
|
sha: str
|
||||||
|
author_name: str
|
||||||
|
author_email: str
|
||||||
|
subject: str
|
||||||
|
body: str
|
||||||
|
|
||||||
|
|
||||||
|
def norm_key(value: str) -> str:
|
||||||
|
return value.strip().lower()
|
||||||
|
|
||||||
|
|
||||||
|
def github_login_from_noreply(email: str) -> str | None:
|
||||||
|
if not CANONICAL_NOREPLY_RE.match(email):
|
||||||
|
return None
|
||||||
|
local = email.split("@", 1)[0]
|
||||||
|
return local.split("+", 1)[1]
|
||||||
|
|
||||||
|
|
||||||
|
def parse_identity(raw: str, context: str) -> Identity:
|
||||||
|
match = IDENTITY_RE.match(raw)
|
||||||
|
if not match:
|
||||||
|
raise ValueError(f"{context}: expected 'Name <id+login@users.noreply.github.com>'")
|
||||||
|
identity = Identity(match.group("name").strip(), match.group("email").strip())
|
||||||
|
if not CANONICAL_NOREPLY_RE.match(identity.email):
|
||||||
|
raise ValueError(
|
||||||
|
f"{context}: right-hand email must be numeric GitHub noreply, got {identity.email}"
|
||||||
|
)
|
||||||
|
return identity
|
||||||
|
|
||||||
|
|
||||||
|
def load_author_map(path: Path) -> dict[str, Identity]:
|
||||||
|
aliases: dict[str, Identity] = {}
|
||||||
|
for lineno, raw_line in enumerate(path.read_text(encoding="utf-8").splitlines(), start=1):
|
||||||
|
line = raw_line.split("#", 1)[0].strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
if "=" not in line:
|
||||||
|
raise ValueError(f"{path}:{lineno}: expected 'alias = Name <email>'")
|
||||||
|
alias, raw_identity = [part.strip() for part in line.split("=", 1)]
|
||||||
|
identity = parse_identity(raw_identity, f"{path}:{lineno}")
|
||||||
|
key = norm_key(alias)
|
||||||
|
if key in aliases and aliases[key] != identity:
|
||||||
|
raise ValueError(f"{path}:{lineno}: duplicate alias {alias!r}")
|
||||||
|
aliases[key] = identity
|
||||||
|
aliases.setdefault(norm_key(identity.email), identity)
|
||||||
|
aliases.setdefault(norm_key(identity.name), identity)
|
||||||
|
if login := github_login_from_noreply(identity.email):
|
||||||
|
aliases.setdefault(norm_key(login), identity)
|
||||||
|
return aliases
|
||||||
|
|
||||||
|
|
||||||
|
def git_log(commit_range: str) -> list[Commit]:
|
||||||
|
try:
|
||||||
|
raw = subprocess.check_output(
|
||||||
|
[
|
||||||
|
"git",
|
||||||
|
"log",
|
||||||
|
"--format=%H%x00%an%x00%ae%x00%s%x00%B%x1e",
|
||||||
|
commit_range,
|
||||||
|
],
|
||||||
|
cwd=ROOT,
|
||||||
|
text=True,
|
||||||
|
)
|
||||||
|
except subprocess.CalledProcessError as exc:
|
||||||
|
raise RuntimeError(f"failed to read git range {commit_range!r}: {exc}") from exc
|
||||||
|
|
||||||
|
commits: list[Commit] = []
|
||||||
|
for record in raw.split("\x1e"):
|
||||||
|
if not record.strip():
|
||||||
|
continue
|
||||||
|
parts = record.split("\x00", 4)
|
||||||
|
if len(parts) != 5:
|
||||||
|
raise RuntimeError("failed to parse git log output")
|
||||||
|
commits.append(Commit(*parts))
|
||||||
|
return commits
|
||||||
|
|
||||||
|
|
||||||
|
def is_bot_identity(name: str, email: str) -> bool:
|
||||||
|
lowered_name = name.strip().lower()
|
||||||
|
lowered_email = email.strip().lower()
|
||||||
|
return lowered_email in BOT_EMAILS or any(
|
||||||
|
lowered_name == bot or lowered_name.startswith(f"{bot} ") for bot in BOT_NAMES
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def lookup_identity(aliases: dict[str, Identity], *values: str) -> Identity | None:
|
||||||
|
for value in values:
|
||||||
|
identity = aliases.get(norm_key(value))
|
||||||
|
if identity is not None:
|
||||||
|
return identity
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def validate(commits: list[Commit], aliases: dict[str, Identity], check_authors: bool) -> list[str]:
|
||||||
|
errors: list[str] = []
|
||||||
|
for commit in commits:
|
||||||
|
prefix = f"{commit.sha[:10]} {commit.subject}"
|
||||||
|
coauthors = [
|
||||||
|
Identity(match.group("name").strip(), match.group("email").strip())
|
||||||
|
for match in COAUTHOR_RE.finditer(commit.body)
|
||||||
|
]
|
||||||
|
|
||||||
|
if check_authors:
|
||||||
|
if is_bot_identity(commit.author_name, commit.author_email):
|
||||||
|
errors.append(
|
||||||
|
f"{prefix}: author {commit.author_name} <{commit.author_email}> is a "
|
||||||
|
"bot/tool identity. Human harvested work should preserve the contributor "
|
||||||
|
"as author or use a human co-author trailer."
|
||||||
|
)
|
||||||
|
elif (
|
||||||
|
(expected := lookup_identity(aliases, commit.author_email, commit.author_name))
|
||||||
|
and norm_key(commit.author_email) != norm_key(expected.email)
|
||||||
|
):
|
||||||
|
errors.append(
|
||||||
|
f"{prefix}: author {commit.author_name} <{commit.author_email}> "
|
||||||
|
f"matches AUTHOR_MAP but is not canonical. Use author {expected.author()}."
|
||||||
|
)
|
||||||
|
|
||||||
|
for coauthor in coauthors:
|
||||||
|
if CANONICAL_NOREPLY_RE.match(coauthor.email):
|
||||||
|
continue
|
||||||
|
if is_bot_identity(coauthor.name, coauthor.email):
|
||||||
|
errors.append(
|
||||||
|
f"{prefix}: remove bot/tool co-author trailer "
|
||||||
|
f"{coauthor.name} <{coauthor.email}>; contributor trailers are for humans."
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
expected = lookup_identity(aliases, coauthor.email, coauthor.name)
|
||||||
|
if expected:
|
||||||
|
errors.append(
|
||||||
|
f"{prefix}: co-author {coauthor.name} <{coauthor.email}> is not "
|
||||||
|
f"GitHub-mappable. Use `{expected.trailer()}`."
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
errors.append(
|
||||||
|
f"{prefix}: co-author {coauthor.name} <{coauthor.email}> is not "
|
||||||
|
"numeric GitHub noreply and has no AUTHOR_MAP entry. Add an alias "
|
||||||
|
"or use `gh api users/<login> --jq '\"\\(.id)+\\(.login)@users.noreply.github.com\"'`."
|
||||||
|
)
|
||||||
|
|
||||||
|
coauthor_emails = {norm_key(coauthor.email) for coauthor in coauthors}
|
||||||
|
for login in HARVEST_RE.findall(commit.body):
|
||||||
|
expected = lookup_identity(aliases, login)
|
||||||
|
if expected is None:
|
||||||
|
errors.append(
|
||||||
|
f"{prefix}: harvested contributor @{login} is missing from .github/AUTHOR_MAP."
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
if (
|
||||||
|
norm_key(commit.author_email) != norm_key(expected.email)
|
||||||
|
and norm_key(expected.email) not in coauthor_emails
|
||||||
|
):
|
||||||
|
errors.append(
|
||||||
|
f"{prefix}: `Harvested from PR ... by @{login}` needs machine-readable "
|
||||||
|
f"credit. Add `{expected.trailer()}` or preserve the contributor as author."
|
||||||
|
)
|
||||||
|
return errors
|
||||||
|
|
||||||
|
|
||||||
|
def main(argv: list[str]) -> int:
|
||||||
|
parser = argparse.ArgumentParser(description=__doc__)
|
||||||
|
parser.add_argument("--author-map", type=Path, default=DEFAULT_AUTHOR_MAP)
|
||||||
|
parser.add_argument("--range", default="origin/main..HEAD", help="git commit range to check")
|
||||||
|
parser.add_argument(
|
||||||
|
"--check-authors",
|
||||||
|
action="store_true",
|
||||||
|
help="also reject commit author emails that match known AUTHOR_MAP aliases",
|
||||||
|
)
|
||||||
|
args = parser.parse_args(argv)
|
||||||
|
|
||||||
|
try:
|
||||||
|
aliases = load_author_map(args.author_map)
|
||||||
|
commits = git_log(args.range)
|
||||||
|
errors = validate(commits, aliases, args.check_authors)
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"co-author credit check failed to run: {exc}", file=sys.stderr)
|
||||||
|
return 2
|
||||||
|
|
||||||
|
if errors:
|
||||||
|
print("Co-author credit check failed:", file=sys.stderr)
|
||||||
|
for error in errors:
|
||||||
|
print(f"- {error}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
print(f"Co-author credit check passed for {len(commits)} commit(s).")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main(sys.argv[1:]))
|
||||||
Reference in New Issue
Block a user