From 769e9a914276b5133f055045c1ee5e6d72fb9308 Mon Sep 17 00:00:00 2001 From: Chris Lovering Date: Sun, 30 Jun 2024 19:01:24 +0100 Subject: [PATCH 1/4] Add dependabot config for docker & CI --- .github/dependabot.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index b38df29..59cb3ac 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -4,3 +4,21 @@ updates: directory: "/" schedule: interval: "daily" + + - package-ecosystem: "docker" + directory: "/" + schedule: + interval: "daily" + groups: + docker-dependencies: + patterns: + - "*" + + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "daily" + groups: + ci-dependencies: + patterns: + - "*" From 7898288420401566deef6c1cd23bde946bbfc614 Mon Sep 17 00:00:00 2001 From: Chris Lovering Date: Sun, 30 Jun 2024 18:57:23 +0100 Subject: [PATCH 2/4] Improve user in-guild sync process Previously we set all users in_guild to False, and relied on users being set back to in_guild when iterating through guild.members However, this caused two problems 1. For a short window a users in_guild status was incorrect 2. It required an update for all users in_guild to be sent to postgres to update in_guild back to True. This diff changes that, so instead only users who are not found in the guild have in_guild set to False. The bottleneck for this query is the number of users that are currently in_guild=False. Testing locally, with 360k users off guild, this took 7.4s to query out, and 0.5s to process & 15.1 s to commit. --- .../exts/event_listeners/startup_sync.py | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/metricity/exts/event_listeners/startup_sync.py b/metricity/exts/event_listeners/startup_sync.py index 0f6264f..017ea1f 100644 --- a/metricity/exts/event_listeners/startup_sync.py +++ b/metricity/exts/event_listeners/startup_sync.py @@ -5,7 +5,7 @@ import discord from discord.ext import commands from pydis_core.utils import logging, scheduling -from sqlalchemy import column, update +from sqlalchemy import column, select from sqlalchemy.dialects.postgresql import insert from metricity import models @@ -35,10 +35,6 @@ async def sync_guild(self) -> None: await _syncer_utils.sync_thread_archive_state(guild) log.info("Beginning user synchronisation process") - async with async_session() as sess: - await sess.execute(update(models.User).values(in_guild=False)) - await sess.commit() - users = ( { "id": str(user.id), @@ -85,7 +81,6 @@ async def sync_guild(self) -> None: )) objs = list(res) - created += [obj[0] == 0 for obj in objs].count(True) updated += [obj[0] != 0 for obj in objs].count(True) @@ -95,6 +90,20 @@ async def sync_guild(self) -> None: await sess.commit() log.info("User upsert complete") + log.info("Beginning user in_guild sync") + + users_updated = 0 + guild_member_ids = {str(member.id) for member in guild.members} + async with async_session() as sess: + res = await sess.execute(select(models.User).filter_by(in_guild=True)) + in_guild_users = res.scalars() + for user in in_guild_users: + if user.id not in guild_member_ids: + users_updated += 1 + user.in_guild = False + await sess.commit() + log.info("User in_guild sync updated %d users to be off guild", users_updated) + log.info("User sync complete") self.bot.sync_process_complete.set() From f69fb12e122f01a826ca661ab8402fa4f48d2a33 Mon Sep 17 00:00:00 2001 From: Chris Lovering Date: Sun, 30 Jun 2024 19:27:40 +0100 Subject: [PATCH 3/4] Improve user in_guild sync query time by only loading the id column This means the other, larger, columns do not need to be deserialised, bringing the query time down from 7.4s down to 3.5s. I couldn't simply do select(models.User.id) here, as we need the full User object in order to mutate and update it later inthe process. --- metricity/exts/event_listeners/startup_sync.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/metricity/exts/event_listeners/startup_sync.py b/metricity/exts/event_listeners/startup_sync.py index 017ea1f..554f952 100644 --- a/metricity/exts/event_listeners/startup_sync.py +++ b/metricity/exts/event_listeners/startup_sync.py @@ -7,6 +7,7 @@ from pydis_core.utils import logging, scheduling from sqlalchemy import column, select from sqlalchemy.dialects.postgresql import insert +from sqlalchemy.orm import load_only from metricity import models from metricity.bot import Bot @@ -95,7 +96,9 @@ async def sync_guild(self) -> None: users_updated = 0 guild_member_ids = {str(member.id) for member in guild.members} async with async_session() as sess: - res = await sess.execute(select(models.User).filter_by(in_guild=True)) + + stmt = select(models.User).filter_by(in_guild=True).options(load_only(models.User.id)) + res = await sess.execute(stmt) in_guild_users = res.scalars() for user in in_guild_users: if user.id not in guild_member_ids: From db22e38630cb8a4f8df868bcd563080fd92b3d4b Mon Sep 17 00:00:00 2001 From: Chris Lovering Date: Sun, 30 Jun 2024 19:28:15 +0100 Subject: [PATCH 4/4] Add debug performance logs for in_guild sync --- .../exts/event_listeners/startup_sync.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/metricity/exts/event_listeners/startup_sync.py b/metricity/exts/event_listeners/startup_sync.py index 554f952..90b63c6 100644 --- a/metricity/exts/event_listeners/startup_sync.py +++ b/metricity/exts/event_listeners/startup_sync.py @@ -1,6 +1,7 @@ """An ext to sync the guild when the bot starts up.""" import math +import time import discord from discord.ext import commands @@ -25,7 +26,7 @@ def __init__(self, bot: Bot) -> None: self.bot = bot scheduling.create_task(self.sync_guild()) - async def sync_guild(self) -> None: + async def sync_guild(self) -> None: # noqa: PLR0914 """Sync all channels and members in the guild.""" await self.bot.wait_until_guild_available() @@ -96,16 +97,30 @@ async def sync_guild(self) -> None: users_updated = 0 guild_member_ids = {str(member.id) for member in guild.members} async with async_session() as sess: + start = time.perf_counter() stmt = select(models.User).filter_by(in_guild=True).options(load_only(models.User.id)) res = await sess.execute(stmt) in_guild_users = res.scalars() + query = time.perf_counter() + for user in in_guild_users: if user.id not in guild_member_ids: users_updated += 1 user.in_guild = False + proc = time.perf_counter() + await sess.commit() - log.info("User in_guild sync updated %d users to be off guild", users_updated) + end = time.perf_counter() + + log.debug( + "in_guild sync: total time %fs, query %fs, processing %fs, commit %fs", + end - start, + query - start, + proc - query, + end - proc, + ) + log.info("User in_guild sync updated %d users to be off guild", users_updated) log.info("User sync complete") self.bot.sync_process_complete.set()