From b3593bc44e9ac3f2fbd71ff00da7e2dfd3e69679 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 7 Nov 2023 17:40:12 -0800 Subject: [PATCH] cc_maintainers: add staleness filter and pull request ignore Introduce the ability to "log to stdout" by returning 3 values from python tests. Log info from cc_maintainers. Check if we heard from people who are blamed in the last 3 years, and ignore them if we didn't. This is implemented as a public inbox query so cache the info for 2 weeks to avoid frequent refreshes. This can be further optimized to recalculate the age based on existing results for active folks. Also don't require known bona fide sub-maintainers to CC people when co-posting patches with PRs. Before pushing this change avg runtime for the test on netdev was 1.4s, we'll see how much this slows us down... Signed-off-by: Jakub Kicinski --- core/patch.py | 3 +- core/test.py | 6 +- tests/patch/cc_maintainers/test.py | 149 +++++++++++++++++++++++++++-- 3 files changed, 149 insertions(+), 9 deletions(-) diff --git a/core/patch.py b/core/patch.py index 18c4d6d..d552e02 100644 --- a/core/patch.py +++ b/core/patch.py @@ -29,10 +29,11 @@ class Patch(object): write_out(fp) Write the raw patch into the given file pointer. """ - def __init__(self, raw_patch, ident=None, title=""): + def __init__(self, raw_patch, ident=None, title="", series=None): self.raw_patch = raw_patch self.title = title self.subject = "" + self.series = series subj = re.search(r'Subject: \[.*\](.*)', raw_patch) if not subj: diff --git a/core/test.py b/core/test.py index 0770868..3a51eb9 100644 --- a/core/test.py +++ b/core/test.py @@ -109,9 +109,11 @@ def _exec(self, tree, thing, result_dir): return self._exec_run(tree, thing, result_dir) elif "pymod" in self.info: core.log("START", datetime.datetime.now().strftime("%H:%M:%S.%f")) - ret, desc = self._exec_pyfunc(tree, thing, result_dir) + ret = self._exec_pyfunc(tree, thing, result_dir) core.log("END", datetime.datetime.now().strftime("%H:%M:%S.%f")) - return ret, "", "", desc + if len(ret) == 2: + return ret[0], "", "", ret[1] + return ret[0], ret[2], "", ret[1] def _exec_run(self, tree, thing, result_dir): rfd, wfd = None, None diff --git a/tests/patch/cc_maintainers/test.py b/tests/patch/cc_maintainers/test.py index d0f0b57..1c9dbb7 100644 --- a/tests/patch/cc_maintainers/test.py +++ b/tests/patch/cc_maintainers/test.py @@ -3,12 +3,14 @@ # Copyright (c) 2020 Facebook from typing import Tuple +import datetime import email import email.utils import subprocess import tempfile import os import re +import json """ Test if relevant maintainers were CCed """ emailpat = re.compile(r'([^ <"]*@[^ >"]*)') @@ -29,18 +31,127 @@ ] } -local_map = ["Vladimir Oltean "] +corp_suffix = ['@broadcom.com', '@huawei.com', '@intel.com', '@nvidia.com'] +pull_requesters = {'mkl@pengutronix.de', 'steffen.klassert@secunet.com', + 'pablo@netfilter.org', 'fw@strlen.de'} -def cc_maintainers(tree, thing, result_dir) -> Tuple[int, str]: +local_map = ["Vladimir Oltean ", + "Alexander Duyck "] + + +class StalenessEntry: + def __init__(self, e, since_months): + self.email = e + + self._load_time = None + self._query_output = None + self._query_depth = None + self._newest_mid = None + + self._month_age = None + + self.reload(since_months) + + def __repr__(self): + return f'Staleness({self.email}, age:{self._month_age} search depth:{self._query_depth}, ' + \ + f'mid:{self._newest_mid})' + + def reload(self, since_months): + res = subprocess.run(['lei', 'q', f"f:{self.email} AND d:{since_months}.months.ago..", + '--no-save', '-q', '-O', 'https://lore.kernel.org/netdev'], + stdout=subprocess.PIPE) + output = res.stdout.decode('utf-8', 'replace') + + self._query_output = json.loads(output) + self._query_depth = since_months + self._load_time = datetime.datetime.now(datetime.UTC) + + newest = None + for e in self._query_output: + # Lei adds a null at the end of the list + if not e: + continue + dt = datetime.datetime.fromisoformat(e["rt"]) + if not newest or dt > newest: + newest = dt + self._newest_mid = e["m"] + + if not newest: + self._month_age = 999 + else: + self._month_age = (self._load_time - newest).seconds / 60 / 60 / 24 / 30 + + def is_stale(self, since_months, dbg=None): + if datetime.datetime.now(datetime.UTC) - self._load_time > datetime.timedelta(weeks=2): + if dbg is not None: + dbg.append("Cache expired for " + self.email) + self.reload(since_months) + + # We know it's not stale, doesn't matter how deep the entry is + if self._month_age <= since_months: + return False + # The query may have not been deep enough, refresh.. + if self._query_depth < since_months: + self.reload(since_months) + return self._month_age > since_months + + +class StalenessDB: + def __init__(self): + self._db = {} + + def is_stale(self, e, since_months, dbg=None): + if e not in self._db: + self._db[e] = StalenessEntry(e, since_months) + + ret = self._db[e].is_stale(since_months, dbg) + + if dbg is not None: + dbg.append(repr(self._db[e])) + return ret + + +stale_db = StalenessDB() + + +def get_stale(sender_from, missing, out): + global stale_db + + sender_corp = None + for corp in corp_suffix: + if sender_from.endswith(corp): + sender_corp = corp + break + + ret = set() + for e in missing: + months = 36 + # People within the same corp know sooner when others leave + if sender_corp and e.endswith(sender_corp): + months = 12 + if stale_db.is_stale(e, months, out): + ret.add(e) + return ret + + +def cc_maintainers(tree, thing, result_dir) -> Tuple[int, str, str]: + out = [] + raw_gm = [] patch = thing + if patch.series and patch.series.cover_pull: + return 0, f"Pull request co-post, skipping", "" + msg = email.message_from_string(patch.raw_patch) addrs = msg.get_all('to', []) addrs += msg.get_all('cc', []) addrs += msg.get_all('from', []) addrs += msg.get_all('sender', []) included = set([e for n, e in email.utils.getaddresses(addrs)]) + out += ["=== Email ===", + f"From: {msg.get_all('from')}", + f"Included: {included}", ""] ignore_domains = [] sender_from = msg.get_all('from', ['nobody@nothing'])[0] @@ -52,6 +163,7 @@ def cc_maintainers(tree, thing, result_dir) -> Tuple[int, str]: expected = set() blamed = set() + pure_blamed = set() ignored = set() with tempfile.NamedTemporaryFile() as fp: patch.write_out(fp) @@ -59,12 +171,15 @@ def cc_maintainers(tree, thing, result_dir) -> Tuple[int, str]: with subprocess.Popen(command, cwd=tree.path, stdout=subprocess.PIPE) as p: line = p.stdout.readline().decode('utf8', 'replace') while line: + raw_gm.append(line.strip()) match = emailpat.search(line) if match: addr = match.group(1) expected.add(addr) if 'blamed_fixes' in line: blamed.add(addr) + if 'maintainer' not in line: + pure_blamed.add(addr) for domain in ignore_domains: if domain in addr: ignored.add(addr) @@ -74,6 +189,12 @@ def cc_maintainers(tree, thing, result_dir) -> Tuple[int, str]: expected.difference_update(ignore_emails) blamed.difference_update(ignore_emails) + out += ["=== get_maint wants ===", + f"Expected: {expected}", + f"Blamed: {blamed}", + f"Pure blames: {pure_blamed}", + f"Ignored: {ignored}", ""] + expected.difference_update(ignored) blamed.difference_update(ignored) @@ -81,6 +202,15 @@ def cc_maintainers(tree, thing, result_dir) -> Tuple[int, str]: missing = expected.difference(included) missing_blamed = blamed.difference(included) + stale_log = [] + stale = get_stale(sender_from, missing_blamed, stale_log) + out.append(f"Stale: {stale}") + + # Ditch all stale from blames, and from missing only those stales who aren't maintainers. + missing_blamed = missing_blamed.difference(stale) + stale_pure_blames = pure_blamed.intersection(stale) + missing = missing.difference(stale_pure_blames) + # Last resort, sift thru aliases if len(missing): with open(os.path.join(tree.path, '.mailmap'), 'r') as f: @@ -92,7 +222,7 @@ def cc_maintainers(tree, thing, result_dir) -> Tuple[int, str]: for line in mmap_lines: if m in line: mmap_emails = emailpat.findall(line) - if m not in mmap_emails: # re-check the match with the real regex + if m not in mmap_emails: # re-check the match with the real regex continue for have in included: if have in mmap_emails: @@ -101,11 +231,18 @@ def cc_maintainers(tree, thing, result_dir) -> Tuple[int, str]: found.update(mapped) missing.difference_update(mapped) missing_blamed.difference_update(mapped) + out.append(f"Mapped: {mapped}") + + out += ["=== Final ===", + f"Missing: {missing}", + f"Missing blames: {missing_blamed}"] + out += ["", "=== stale log ==="] + stale_log + out += ["", "=== get_maintainer ==="] + raw_gm if len(missing_blamed): return 1, f"{len(missing_blamed)} blamed authors not CCed: {' '.join(missing_blamed)}; " + \ - f"{len(missing)} maintainers not CCed: {' '.join(missing)}" + f"{len(missing)} maintainers not CCed: {' '.join(missing)}", '\n'.join(out) if len(missing): ret = 250 if len(found) > 1 else 1 - return ret, f"{len(missing)} maintainers not CCed: {' '.join(missing)}" - return 0, f"CCed {len(found)} of {len(expected)} maintainers" + return ret, f"{len(missing)} maintainers not CCed: {' '.join(missing)}", '\n'.join(out) + return 0, f"CCed {len(found)} of {len(expected)} maintainers", '\n'.join(out)