Skip to content

Commit

Permalink
cc_maintainers: add staleness filter and pull request ignore
Browse files Browse the repository at this point in the history
Introduce the ability to "log to stdout" by returning 3
values from python tests. Log info from cc_maintainers.

Check if we heard from people who are blamed in the last
3 years, and ignore them if we didn't. This is implemented
as a public inbox query so cache the info for 2 weeks to
avoid frequent refreshes. This can be further optimized
to recalculate the age based on existing results for
active folks.

Also don't require known bona fide sub-maintainers to CC
people when co-posting patches with PRs.

Before pushing this change avg runtime for the test
on netdev was 1.4s, we'll see how much this slows us down...

Signed-off-by: Jakub Kicinski <[email protected]>
  • Loading branch information
kuba-moo committed Nov 8, 2023
1 parent ac5eef9 commit b3593bc
Show file tree
Hide file tree
Showing 3 changed files with 149 additions and 9 deletions.
3 changes: 2 additions & 1 deletion core/patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,11 @@ class Patch(object):
write_out(fp)
Write the raw patch into the given file pointer.
"""
def __init__(self, raw_patch, ident=None, title=""):
def __init__(self, raw_patch, ident=None, title="", series=None):
self.raw_patch = raw_patch
self.title = title
self.subject = ""
self.series = series

subj = re.search(r'Subject: \[.*\](.*)', raw_patch)
if not subj:
Expand Down
6 changes: 4 additions & 2 deletions core/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,11 @@ def _exec(self, tree, thing, result_dir):
return self._exec_run(tree, thing, result_dir)
elif "pymod" in self.info:
core.log("START", datetime.datetime.now().strftime("%H:%M:%S.%f"))
ret, desc = self._exec_pyfunc(tree, thing, result_dir)
ret = self._exec_pyfunc(tree, thing, result_dir)
core.log("END", datetime.datetime.now().strftime("%H:%M:%S.%f"))
return ret, "", "", desc
if len(ret) == 2:
return ret[0], "", "", ret[1]
return ret[0], ret[2], "", ret[1]

def _exec_run(self, tree, thing, result_dir):
rfd, wfd = None, None
Expand Down
149 changes: 143 additions & 6 deletions tests/patch/cc_maintainers/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
# Copyright (c) 2020 Facebook

from typing import Tuple
import datetime
import email
import email.utils
import subprocess
import tempfile
import os
import re
import json
""" Test if relevant maintainers were CCed """

emailpat = re.compile(r'([^ <"]*@[^ >"]*)')
Expand All @@ -29,18 +31,127 @@
]
}

local_map = ["Vladimir Oltean <vladimir.oltean@nxp.com> <olteanv@gmail.com>"]
corp_suffix = ['@broadcom.com', '@huawei.com', '@intel.com', '@nvidia.com']

pull_requesters = {'[email protected]', '[email protected]',
'[email protected]', '[email protected]'}

def cc_maintainers(tree, thing, result_dir) -> Tuple[int, str]:
local_map = ["Vladimir Oltean <[email protected]> <[email protected]>",
"Alexander Duyck <[email protected]> <[email protected]>"]


class StalenessEntry:
def __init__(self, e, since_months):
self.email = e

self._load_time = None
self._query_output = None
self._query_depth = None
self._newest_mid = None

self._month_age = None

self.reload(since_months)

def __repr__(self):
return f'Staleness({self.email}, age:{self._month_age} search depth:{self._query_depth}, ' + \
f'mid:{self._newest_mid})'

def reload(self, since_months):
res = subprocess.run(['lei', 'q', f"f:{self.email} AND d:{since_months}.months.ago..",
'--no-save', '-q', '-O', 'https://lore.kernel.org/netdev'],
stdout=subprocess.PIPE)
output = res.stdout.decode('utf-8', 'replace')

self._query_output = json.loads(output)
self._query_depth = since_months
self._load_time = datetime.datetime.now(datetime.UTC)

newest = None
for e in self._query_output:
# Lei adds a null at the end of the list
if not e:
continue
dt = datetime.datetime.fromisoformat(e["rt"])
if not newest or dt > newest:
newest = dt
self._newest_mid = e["m"]

if not newest:
self._month_age = 999
else:
self._month_age = (self._load_time - newest).seconds / 60 / 60 / 24 / 30

def is_stale(self, since_months, dbg=None):
if datetime.datetime.now(datetime.UTC) - self._load_time > datetime.timedelta(weeks=2):
if dbg is not None:
dbg.append("Cache expired for " + self.email)
self.reload(since_months)

# We know it's not stale, doesn't matter how deep the entry is
if self._month_age <= since_months:
return False
# The query may have not been deep enough, refresh..
if self._query_depth < since_months:
self.reload(since_months)
return self._month_age > since_months


class StalenessDB:
def __init__(self):
self._db = {}

def is_stale(self, e, since_months, dbg=None):
if e not in self._db:
self._db[e] = StalenessEntry(e, since_months)

ret = self._db[e].is_stale(since_months, dbg)

if dbg is not None:
dbg.append(repr(self._db[e]))
return ret


stale_db = StalenessDB()


def get_stale(sender_from, missing, out):
global stale_db

sender_corp = None
for corp in corp_suffix:
if sender_from.endswith(corp):
sender_corp = corp
break

ret = set()
for e in missing:
months = 36
# People within the same corp know sooner when others leave
if sender_corp and e.endswith(sender_corp):
months = 12
if stale_db.is_stale(e, months, out):
ret.add(e)
return ret


def cc_maintainers(tree, thing, result_dir) -> Tuple[int, str, str]:
out = []
raw_gm = []
patch = thing

if patch.series and patch.series.cover_pull:
return 0, f"Pull request co-post, skipping", ""

msg = email.message_from_string(patch.raw_patch)
addrs = msg.get_all('to', [])
addrs += msg.get_all('cc', [])
addrs += msg.get_all('from', [])
addrs += msg.get_all('sender', [])
included = set([e for n, e in email.utils.getaddresses(addrs)])
out += ["=== Email ===",
f"From: {msg.get_all('from')}",
f"Included: {included}", ""]

ignore_domains = []
sender_from = msg.get_all('from', ['nobody@nothing'])[0]
Expand All @@ -52,19 +163,23 @@ def cc_maintainers(tree, thing, result_dir) -> Tuple[int, str]:

expected = set()
blamed = set()
pure_blamed = set()
ignored = set()
with tempfile.NamedTemporaryFile() as fp:
patch.write_out(fp)
command = ['./scripts/get_maintainer.pl', '--git-min-percent', '25', '--', fp.name]
with subprocess.Popen(command, cwd=tree.path, stdout=subprocess.PIPE) as p:
line = p.stdout.readline().decode('utf8', 'replace')
while line:
raw_gm.append(line.strip())
match = emailpat.search(line)
if match:
addr = match.group(1)
expected.add(addr)
if 'blamed_fixes' in line:
blamed.add(addr)
if 'maintainer' not in line:
pure_blamed.add(addr)
for domain in ignore_domains:
if domain in addr:
ignored.add(addr)
Expand All @@ -74,13 +189,28 @@ def cc_maintainers(tree, thing, result_dir) -> Tuple[int, str]:
expected.difference_update(ignore_emails)
blamed.difference_update(ignore_emails)

out += ["=== get_maint wants ===",
f"Expected: {expected}",
f"Blamed: {blamed}",
f"Pure blames: {pure_blamed}",
f"Ignored: {ignored}", ""]

expected.difference_update(ignored)
blamed.difference_update(ignored)

found = expected.intersection(included)
missing = expected.difference(included)
missing_blamed = blamed.difference(included)

stale_log = []
stale = get_stale(sender_from, missing_blamed, stale_log)
out.append(f"Stale: {stale}")

# Ditch all stale from blames, and from missing only those stales who aren't maintainers.
missing_blamed = missing_blamed.difference(stale)
stale_pure_blames = pure_blamed.intersection(stale)
missing = missing.difference(stale_pure_blames)

# Last resort, sift thru aliases
if len(missing):
with open(os.path.join(tree.path, '.mailmap'), 'r') as f:
Expand All @@ -92,7 +222,7 @@ def cc_maintainers(tree, thing, result_dir) -> Tuple[int, str]:
for line in mmap_lines:
if m in line:
mmap_emails = emailpat.findall(line)
if m not in mmap_emails: # re-check the match with the real regex
if m not in mmap_emails: # re-check the match with the real regex
continue
for have in included:
if have in mmap_emails:
Expand All @@ -101,11 +231,18 @@ def cc_maintainers(tree, thing, result_dir) -> Tuple[int, str]:
found.update(mapped)
missing.difference_update(mapped)
missing_blamed.difference_update(mapped)
out.append(f"Mapped: {mapped}")

out += ["=== Final ===",
f"Missing: {missing}",
f"Missing blames: {missing_blamed}"]
out += ["", "=== stale log ==="] + stale_log
out += ["", "=== get_maintainer ==="] + raw_gm

if len(missing_blamed):
return 1, f"{len(missing_blamed)} blamed authors not CCed: {' '.join(missing_blamed)}; " + \
f"{len(missing)} maintainers not CCed: {' '.join(missing)}"
f"{len(missing)} maintainers not CCed: {' '.join(missing)}", '\n'.join(out)
if len(missing):
ret = 250 if len(found) > 1 else 1
return ret, f"{len(missing)} maintainers not CCed: {' '.join(missing)}"
return 0, f"CCed {len(found)} of {len(expected)} maintainers"
return ret, f"{len(missing)} maintainers not CCed: {' '.join(missing)}", '\n'.join(out)
return 0, f"CCed {len(found)} of {len(expected)} maintainers", '\n'.join(out)

0 comments on commit b3593bc

Please sign in to comment.