-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
cc_maintainers: add staleness filter and pull request ignore
Introduce the ability to "log to stdout" by returning 3 values from python tests. Log info from cc_maintainers. Check if we heard from people who are blamed in the last 3 years, and ignore them if we didn't. This is implemented as a public inbox query so cache the info for 2 weeks to avoid frequent refreshes. This can be further optimized to recalculate the age based on existing results for active folks. Also don't require known bona fide sub-maintainers to CC people when co-posting patches with PRs. Before pushing this change avg runtime for the test on netdev was 1.4s, we'll see how much this slows us down... Signed-off-by: Jakub Kicinski <[email protected]>
- Loading branch information
Showing
3 changed files
with
149 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,12 +3,14 @@ | |
# Copyright (c) 2020 Facebook | ||
|
||
from typing import Tuple | ||
import datetime | ||
import email | ||
import email.utils | ||
import subprocess | ||
import tempfile | ||
import os | ||
import re | ||
import json | ||
""" Test if relevant maintainers were CCed """ | ||
|
||
emailpat = re.compile(r'([^ <"]*@[^ >"]*)') | ||
|
@@ -29,18 +31,127 @@ | |
] | ||
} | ||
|
||
local_map = ["Vladimir Oltean <vladimir.oltean@nxp.com> <olteanv@gmail.com>"] | ||
corp_suffix = ['@broadcom.com', '@huawei.com', '@intel.com', '@nvidia.com'] | ||
|
||
pull_requesters = {'[email protected]', '[email protected]', | ||
'[email protected]', '[email protected]'} | ||
|
||
def cc_maintainers(tree, thing, result_dir) -> Tuple[int, str]: | ||
local_map = ["Vladimir Oltean <[email protected]> <[email protected]>", | ||
"Alexander Duyck <[email protected]> <[email protected]>"] | ||
|
||
|
||
class StalenessEntry: | ||
def __init__(self, e, since_months): | ||
self.email = e | ||
|
||
self._load_time = None | ||
self._query_output = None | ||
self._query_depth = None | ||
self._newest_mid = None | ||
|
||
self._month_age = None | ||
|
||
self.reload(since_months) | ||
|
||
def __repr__(self): | ||
return f'Staleness({self.email}, age:{self._month_age} search depth:{self._query_depth}, ' + \ | ||
f'mid:{self._newest_mid})' | ||
|
||
def reload(self, since_months): | ||
res = subprocess.run(['lei', 'q', f"f:{self.email} AND d:{since_months}.months.ago..", | ||
'--no-save', '-q', '-O', 'https://lore.kernel.org/netdev'], | ||
stdout=subprocess.PIPE) | ||
output = res.stdout.decode('utf-8', 'replace') | ||
|
||
self._query_output = json.loads(output) | ||
self._query_depth = since_months | ||
self._load_time = datetime.datetime.now(datetime.UTC) | ||
|
||
newest = None | ||
for e in self._query_output: | ||
# Lei adds a null at the end of the list | ||
if not e: | ||
continue | ||
dt = datetime.datetime.fromisoformat(e["rt"]) | ||
if not newest or dt > newest: | ||
newest = dt | ||
self._newest_mid = e["m"] | ||
|
||
if not newest: | ||
self._month_age = 999 | ||
else: | ||
self._month_age = (self._load_time - newest).seconds / 60 / 60 / 24 / 30 | ||
|
||
def is_stale(self, since_months, dbg=None): | ||
if datetime.datetime.now(datetime.UTC) - self._load_time > datetime.timedelta(weeks=2): | ||
if dbg is not None: | ||
dbg.append("Cache expired for " + self.email) | ||
self.reload(since_months) | ||
|
||
# We know it's not stale, doesn't matter how deep the entry is | ||
if self._month_age <= since_months: | ||
return False | ||
# The query may have not been deep enough, refresh.. | ||
if self._query_depth < since_months: | ||
self.reload(since_months) | ||
return self._month_age > since_months | ||
|
||
|
||
class StalenessDB: | ||
def __init__(self): | ||
self._db = {} | ||
|
||
def is_stale(self, e, since_months, dbg=None): | ||
if e not in self._db: | ||
self._db[e] = StalenessEntry(e, since_months) | ||
|
||
ret = self._db[e].is_stale(since_months, dbg) | ||
|
||
if dbg is not None: | ||
dbg.append(repr(self._db[e])) | ||
return ret | ||
|
||
|
||
stale_db = StalenessDB() | ||
|
||
|
||
def get_stale(sender_from, missing, out): | ||
global stale_db | ||
|
||
sender_corp = None | ||
for corp in corp_suffix: | ||
if sender_from.endswith(corp): | ||
sender_corp = corp | ||
break | ||
|
||
ret = set() | ||
for e in missing: | ||
months = 36 | ||
# People within the same corp know sooner when others leave | ||
if sender_corp and e.endswith(sender_corp): | ||
months = 12 | ||
if stale_db.is_stale(e, months, out): | ||
ret.add(e) | ||
return ret | ||
|
||
|
||
def cc_maintainers(tree, thing, result_dir) -> Tuple[int, str, str]: | ||
out = [] | ||
raw_gm = [] | ||
patch = thing | ||
|
||
if patch.series and patch.series.cover_pull: | ||
return 0, f"Pull request co-post, skipping", "" | ||
|
||
msg = email.message_from_string(patch.raw_patch) | ||
addrs = msg.get_all('to', []) | ||
addrs += msg.get_all('cc', []) | ||
addrs += msg.get_all('from', []) | ||
addrs += msg.get_all('sender', []) | ||
included = set([e for n, e in email.utils.getaddresses(addrs)]) | ||
out += ["=== Email ===", | ||
f"From: {msg.get_all('from')}", | ||
f"Included: {included}", ""] | ||
|
||
ignore_domains = [] | ||
sender_from = msg.get_all('from', ['nobody@nothing'])[0] | ||
|
@@ -52,19 +163,23 @@ def cc_maintainers(tree, thing, result_dir) -> Tuple[int, str]: | |
|
||
expected = set() | ||
blamed = set() | ||
pure_blamed = set() | ||
ignored = set() | ||
with tempfile.NamedTemporaryFile() as fp: | ||
patch.write_out(fp) | ||
command = ['./scripts/get_maintainer.pl', '--git-min-percent', '25', '--', fp.name] | ||
with subprocess.Popen(command, cwd=tree.path, stdout=subprocess.PIPE) as p: | ||
line = p.stdout.readline().decode('utf8', 'replace') | ||
while line: | ||
raw_gm.append(line.strip()) | ||
match = emailpat.search(line) | ||
if match: | ||
addr = match.group(1) | ||
expected.add(addr) | ||
if 'blamed_fixes' in line: | ||
blamed.add(addr) | ||
if 'maintainer' not in line: | ||
pure_blamed.add(addr) | ||
for domain in ignore_domains: | ||
if domain in addr: | ||
ignored.add(addr) | ||
|
@@ -74,13 +189,28 @@ def cc_maintainers(tree, thing, result_dir) -> Tuple[int, str]: | |
expected.difference_update(ignore_emails) | ||
blamed.difference_update(ignore_emails) | ||
|
||
out += ["=== get_maint wants ===", | ||
f"Expected: {expected}", | ||
f"Blamed: {blamed}", | ||
f"Pure blames: {pure_blamed}", | ||
f"Ignored: {ignored}", ""] | ||
|
||
expected.difference_update(ignored) | ||
blamed.difference_update(ignored) | ||
|
||
found = expected.intersection(included) | ||
missing = expected.difference(included) | ||
missing_blamed = blamed.difference(included) | ||
|
||
stale_log = [] | ||
stale = get_stale(sender_from, missing_blamed, stale_log) | ||
out.append(f"Stale: {stale}") | ||
|
||
# Ditch all stale from blames, and from missing only those stales who aren't maintainers. | ||
missing_blamed = missing_blamed.difference(stale) | ||
stale_pure_blames = pure_blamed.intersection(stale) | ||
missing = missing.difference(stale_pure_blames) | ||
|
||
# Last resort, sift thru aliases | ||
if len(missing): | ||
with open(os.path.join(tree.path, '.mailmap'), 'r') as f: | ||
|
@@ -92,7 +222,7 @@ def cc_maintainers(tree, thing, result_dir) -> Tuple[int, str]: | |
for line in mmap_lines: | ||
if m in line: | ||
mmap_emails = emailpat.findall(line) | ||
if m not in mmap_emails: # re-check the match with the real regex | ||
if m not in mmap_emails: # re-check the match with the real regex | ||
continue | ||
for have in included: | ||
if have in mmap_emails: | ||
|
@@ -101,11 +231,18 @@ def cc_maintainers(tree, thing, result_dir) -> Tuple[int, str]: | |
found.update(mapped) | ||
missing.difference_update(mapped) | ||
missing_blamed.difference_update(mapped) | ||
out.append(f"Mapped: {mapped}") | ||
|
||
out += ["=== Final ===", | ||
f"Missing: {missing}", | ||
f"Missing blames: {missing_blamed}"] | ||
out += ["", "=== stale log ==="] + stale_log | ||
out += ["", "=== get_maintainer ==="] + raw_gm | ||
|
||
if len(missing_blamed): | ||
return 1, f"{len(missing_blamed)} blamed authors not CCed: {' '.join(missing_blamed)}; " + \ | ||
f"{len(missing)} maintainers not CCed: {' '.join(missing)}" | ||
f"{len(missing)} maintainers not CCed: {' '.join(missing)}", '\n'.join(out) | ||
if len(missing): | ||
ret = 250 if len(found) > 1 else 1 | ||
return ret, f"{len(missing)} maintainers not CCed: {' '.join(missing)}" | ||
return 0, f"CCed {len(found)} of {len(expected)} maintainers" | ||
return ret, f"{len(missing)} maintainers not CCed: {' '.join(missing)}", '\n'.join(out) | ||
return 0, f"CCed {len(found)} of {len(expected)} maintainers", '\n'.join(out) |