From 3a6eec1cc88c3ee5a202f053ad00e52417175aa9 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 15 Jul 2024 15:13:25 -0500 Subject: [PATCH 01/47] Make `filters.is_encrypted` robust to remote invites --- synapse/api/constants.py | 3 + synapse/handlers/sliding_sync.py | 76 +++++++++++++++++++------ synapse/storage/databases/main/state.py | 46 ++++++++++++++- 3 files changed, 105 insertions(+), 20 deletions(-) diff --git a/synapse/api/constants.py b/synapse/api/constants.py index 12d18137e07..42206f79b4e 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -237,6 +237,9 @@ class EventContentFields: # an unspecced field added to to-device messages to identify them uniquely-ish TO_DEVICE_MSGID: Final = "org.matrix.msgid" + # `m.room.encryption`` algorithm field + ENCRYPTION_ALGORITHM: Final = "algorithm" + class EventUnsignedContentFields: """Fields found inside the 'unsigned' data on events""" diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 1b5262d6670..4c51e15a564 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -24,7 +24,7 @@ import attr from immutabledict import immutabledict -from synapse.api.constants import AccountDataTypes, Direction, EventTypes, Membership +from synapse.api.constants import AccountDataTypes, Direction, EventTypes, EventContentFields, Membership from synapse.events import EventBase from synapse.events.utils import strip_event from synapse.handlers.relations import BundledAggregations @@ -1109,23 +1109,63 @@ async def filter_rooms( # Filter for encrypted rooms if filters.is_encrypted is not None: - # Make a copy so we don't run into an error: `Set changed size during - # iteration`, when we filter out and remove items - for room_id in filtered_room_id_set.copy(): - state_at_to_token = await self.storage_controllers.state.get_state_at( - room_id, - to_token, - state_filter=StateFilter.from_types( - [(EventTypes.RoomEncryption, "")] - ), - # Partially-stated rooms should have all state events except for the - # membership events so we don't need to wait because we only care - # about retrieving the `EventTypes.RoomEncryption` state event here. - # Plus we don't want to block the whole sync waiting for this one - # room. - await_full_state=False, - ) - is_encrypted = state_at_to_token.get((EventTypes.RoomEncryption, "")) + room_to_is_encrypted = await self.store.bulk_get_room_is_encrypted( + filtered_room_id_set + ) + room_ids_with_results = set(room_to_is_encrypted.keys()) + + # We might not have room state for remote invite/knocks if we are the first + # person on our server to see the room. The best we can do is look in the + # optional stripped state from the invite/knock event. + room_ids_without_results = filtered_room_id_set.difference( + room_ids_with_results + ) + invite_or_knock_event_ids: List[str] = [] + for room_id in room_ids_without_results: + event_id = sync_room_map[room_id].event_id + # If this is an invite/knock then there should be an event_id + assert event_id is not None + + if sync_room_map[room_id].membership in ( + Membership.INVITE, + Membership.KNOCK, + ): + invite_or_knock_event_ids.append(event_id) + + invite_or_knock_events = await self.store.get_events( + invite_or_knock_event_ids + ) + for invite_or_knock_event in invite_or_knock_events.values(): + room_id = invite_or_knock_event.room_id + membership = sync_room_map[room_id].membership + + stripped_state = [] + if membership == Membership.INVITE: + stripped_state.extend( + invite_or_knock_event.unsigned.get("invite_room_state", []) + ) + elif membership == Membership.KNOCK: + stripped_state.extend( + invite_or_knock_event.unsigned.get("knock_room_state", []) + ) + + for event in stripped_state: + if event["type"] == EventTypes.RoomEncryption: + is_encrypted = ( + event["content"].get( + EventContentFields.ENCRYPTION_ALGORITHM + ) + is not None + ) + room_to_is_encrypted[room_id] = is_encrypted + break + + for room_id in filtered_room_id_set: + is_encrypted = room_to_is_encrypted.get(room_id) + + # Just remove rooms if we can't determine their encryption status + if is_encrypted is None: + filtered_room_id_set.remove(room_id) # If we're looking for encrypted rooms, filter out rooms that are not # encrypted and vice versa diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py index 5188b2f7a4d..43fdea6906f 100644 --- a/synapse/storage/databases/main/state.py +++ b/synapse/storage/databases/main/state.py @@ -325,8 +325,9 @@ async def get_room_type(self, room_id: str) -> Optional[str]: async def bulk_get_room_type( self, room_ids: Set[str] ) -> Mapping[str, Optional[str]]: - """Bulk fetch room types for the given rooms, the server must be in all - the rooms given. + """Bulk fetch room types for the given rooms. + + Will only return results if the server is in the room given. """ rows = await self.db_pool.simple_select_many_batch( @@ -348,6 +349,47 @@ async def bulk_get_room_type( return results + @cachedList(cached_method_name="get_room_is_encrypted", list_name="room_ids") + async def bulk_get_room_is_encrypted( + self, room_ids: Set[str] + ) -> Mapping[str, Optional[str]]: + """Bulk fetch whether the given rooms are encrypted. + + Will only return results if the server is in the room given. + """ + + rows = await self.db_pool.simple_select_many_batch( + table="room_stats_state", + column="room_id", + iterable=room_ids, + retcols=("room_id", "room_type"), + desc="bulk_get_room_type", + ) + + # If we haven't updated `room_stats_state` with the room yet, query the state + # directly. This should happen only rarely so we don't mind if we do this in a + # loop. + results = dict(rows) + for room_id in room_ids - results.keys(): + state_map = await self.get_partial_filtered_current_state_ids( + room_id, + state_filter=StateFilter.from_types([(EventTypes.RoomEncryption, "")]), + ) + encryption_event = state_map.get((EventTypes.RoomEncryption, "")) + + is_encrypted = False + if encryption_event is not None: + is_encrypted = ( + encryption_event.content.get( + EventContentFields.ENCRYPTION_ALGORITHM + ) + is not None + ) + + results[room_id] = is_encrypted + + return results + @cached(max_entries=100000, iterable=True) async def get_partial_current_state_ids(self, room_id: str) -> StateMap[str]: """Get the current state event ids for a room based on the From 7f72d80604c537da78b7ed81d181f6982dd6a4c8 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 15 Jul 2024 16:11:34 -0500 Subject: [PATCH 02/47] Try share work --- synapse/handlers/sliding_sync.py | 84 +++++++++++++++------ synapse/storage/databases/main/state.py | 2 +- tests/handlers/test_sliding_sync.py | 99 ++++++++++++++++--------- 3 files changed, 127 insertions(+), 58 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 4c51e15a564..77f3318d749 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -24,7 +24,13 @@ import attr from immutabledict import immutabledict -from synapse.api.constants import AccountDataTypes, Direction, EventTypes, EventContentFields, Membership +from synapse.api.constants import ( + AccountDataTypes, + Direction, + EventTypes, + EventContentFields, + Membership, +) from synapse.events import EventBase from synapse.events.utils import strip_event from synapse.handlers.relations import BundledAggregations @@ -38,6 +44,7 @@ RoomStreamToken, StateMap, StreamKeyType, + StrCollection, StreamToken, UserID, ) @@ -1107,48 +1114,77 @@ async def filter_rooms( if filters.spaces: raise NotImplementedError() - # Filter for encrypted rooms - if filters.is_encrypted is not None: - room_to_is_encrypted = await self.store.bulk_get_room_is_encrypted( - filtered_room_id_set - ) - room_ids_with_results = set(room_to_is_encrypted.keys()) + async def _bulk_fetch_stripped_state_for_rooms( + room_ids_to_fetch: StrCollection[str], + ) -> Mapping[str, Optional[List[Any]]]: + """ + Fetch stripped state for a list of rooms that we are invited to/knocked on. + """ + room_to_stripped_state_map: Dict[str, Optional[List[Any]]] = {} - # We might not have room state for remote invite/knocks if we are the first - # person on our server to see the room. The best we can do is look in the - # optional stripped state from the invite/knock event. - room_ids_without_results = filtered_room_id_set.difference( - room_ids_with_results - ) + # Gather a list of event IDs we can grab stripped state from invite_or_knock_event_ids: List[str] = [] - for room_id in room_ids_without_results: - event_id = sync_room_map[room_id].event_id - # If this is an invite/knock then there should be an event_id - assert event_id is not None - + for room_id in room_ids_to_fetch: if sync_room_map[room_id].membership in ( Membership.INVITE, Membership.KNOCK, ): + event_id = sync_room_map[room_id].event_id + # If this is an invite/knock then there should be an event_id + assert event_id is not None invite_or_knock_event_ids.append(event_id) + else: + room_to_stripped_state_map[room_id] = None invite_or_knock_events = await self.store.get_events( invite_or_knock_event_ids ) for invite_or_knock_event in invite_or_knock_events.values(): room_id = invite_or_knock_event.room_id - membership = sync_room_map[room_id].membership + membership = invite_or_knock_event.membership - stripped_state = [] if membership == Membership.INVITE: - stripped_state.extend( - invite_or_knock_event.unsigned.get("invite_room_state", []) + # Scrutinize unsigned things + invite_room_state = invite_or_knock_event.unsigned.get( + "invite_room_state", None ) + if isinstance(invite_room_state, list): + room_to_stripped_state_map[room_id] = invite_room_state elif membership == Membership.KNOCK: - stripped_state.extend( - invite_or_knock_event.unsigned.get("knock_room_state", []) + # Scrutinize unsigned things + knock_room_state = invite_or_knock_event.unsigned.get( + "knock_room_state", None + ) + if isinstance(knock_room_state, list): + room_to_stripped_state_map[room_id] = knock_room_state + else: + raise AssertionError( + f"Unexpected membership {membership} (fix Synapse)" ) + return room_to_stripped_state_map + + # Filter for encrypted rooms + if filters.is_encrypted is not None: + room_to_is_encrypted = await self.store.bulk_get_room_is_encrypted( + filtered_room_id_set + ) + room_ids_with_results = [ + room_id + for room_id, is_encrypted in room_to_is_encrypted.items() + if is_encrypted is not None + ] + + # We might not have room state for remote invite/knocks if we are the first + # person on our server to see the room. The best we can do is look in the + # optional stripped state from the invite/knock event. + room_ids_without_results = filtered_room_id_set.difference( + room_ids_with_results + ) + asdf = await _bulk_fetch_stripped_state_for_rooms(room_ids_without_results) + + for room_id in room_ids_without_results: + stripped_state = asdf[room_id] for event in stripped_state: if event["type"] == EventTypes.RoomEncryption: is_encrypted = ( diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py index 43fdea6906f..9c38793ea74 100644 --- a/synapse/storage/databases/main/state.py +++ b/synapse/storage/databases/main/state.py @@ -352,7 +352,7 @@ async def bulk_get_room_type( @cachedList(cached_method_name="get_room_is_encrypted", list_name="room_ids") async def bulk_get_room_is_encrypted( self, room_ids: Set[str] - ) -> Mapping[str, Optional[str]]: + ) -> Mapping[str, Optional[bool]]: """Bulk fetch whether the given rooms are encrypted. Will only return results if the server is in the room given. diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index a7aa9bb8afe..0c92a1d6943 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -19,7 +19,7 @@ # import logging from copy import deepcopy -from typing import Dict, Optional +from typing import Dict, List, Optional from unittest.mock import patch from parameterized import parameterized @@ -3093,6 +3093,65 @@ def _create_dm_room( return room_id + _remote_invite_count: int = 0 + + def _create_remote_invite_room_for_user( + self, invitee_user_id: str, unsigned_invite_room_state: Optional[List[JsonDict]] + ) -> None: + """ + Create a fake remote invite and persist it. + + Args: + invitee_user_id: The person being invited + unsigned_invite_room_state: List of stripped state events to assist the + receiver in identifying the room. Use the `strip_event(...)` helper. + """ + invite_room_id = f"!test_room{self._remote_invite_count}:remote_server" + + invite_event_dict = { + "room_id": invite_room_id, + "sender": "@inviter:remote_server", + "state_key": invitee_user_id, + "depth": 1, + "origin_server_ts": 1, + "type": EventTypes.Member, + "content": {"membership": Membership.INVITE}, + "auth_events": [], + "prev_events": [], + } + if unsigned_invite_room_state is not None: + for stripped_event in unsigned_invite_room_state: + # Required stripped event fields + assert "type" in stripped_event + assert "state_key" in stripped_event + assert "sender" in stripped_event + assert "content" in stripped_event + + invite_event_dict["unsigned"] = { + "invite_room_state": unsigned_invite_room_state + } + + invite_event = make_event_from_dict( + invite_event_dict, + room_version=RoomVersions.V10, + ) + invite_event.internal_metadata.outlier = True + invite_event.internal_metadata.out_of_band_membership = True + + self.get_success( + self.store.maybe_store_room_on_outlier_membership( + room_id=invite_room_id, room_version=invite_event.room_version + ) + ) + context = EventContext.for_outlier(self.hs.get_storage_controllers()) + persist_controller = self.hs.get_storage_controllers().persistence + assert persist_controller is not None + self.get_success(persist_controller.persist_event(invite_event, context)) + + self._remote_invite_count += 1 + + return invite_room_id + def test_filter_dm_rooms(self) -> None: """ Test `filter.is_dm` for DM rooms @@ -3461,9 +3520,10 @@ def test_filter_not_room_types(self) -> None: self.assertEqual(filtered_room_map.keys(), {space_room_id}) - def test_filter_room_types_with_invite_remote_room(self) -> None: - """Test that we can apply a room type filter, even if we have an invite - for a remote room. + def test_filter_room_types_with_remote_invite_room(self) -> None: + """ + Test that we can apply a `filter.not_room_types` filter, even if we only have an + invite for a remote room. This is a regression test. """ @@ -3471,34 +3531,7 @@ def test_filter_room_types_with_invite_remote_room(self) -> None: user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") - # Create a fake remote invite and persist it. - invite_room_id = "!some:room" - invite_event = make_event_from_dict( - { - "room_id": invite_room_id, - "sender": "@user:test.serv", - "state_key": user1_id, - "depth": 1, - "origin_server_ts": 1, - "type": EventTypes.Member, - "content": {"membership": Membership.INVITE}, - "auth_events": [], - "prev_events": [], - }, - room_version=RoomVersions.V10, - ) - invite_event.internal_metadata.outlier = True - invite_event.internal_metadata.out_of_band_membership = True - - self.get_success( - self.store.maybe_store_room_on_outlier_membership( - room_id=invite_room_id, room_version=invite_event.room_version - ) - ) - context = EventContext.for_outlier(self.hs.get_storage_controllers()) - persist_controller = self.hs.get_storage_controllers().persistence - assert persist_controller is not None - self.get_success(persist_controller.persist_event(invite_event, context)) + remote_invite_room_id = self._create_remote_invite_room_for_user(user1_id, None) # Create a normal room (no room type) room_id = self.helper.create_room_as(user1_id, tok=user1_tok) @@ -3523,7 +3556,7 @@ def test_filter_room_types_with_invite_remote_room(self) -> None: ) ) - self.assertEqual(filtered_room_map.keys(), {room_id, invite_room_id}) + self.assertEqual(filtered_room_map.keys(), {room_id, remote_invite_room_id}) class SortRoomsTestCase(HomeserverTestCase): From 18ec4379f7a315a021c7569303b621bd299ec7e3 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 15 Jul 2024 17:22:04 -0500 Subject: [PATCH 03/47] More robust --- synapse/handlers/sliding_sync.py | 138 +++++++++++++++--------- synapse/storage/databases/main/state.py | 38 ++++--- tests/handlers/test_sliding_sync.py | 38 +++++++ 3 files changed, 148 insertions(+), 66 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 77f3318d749..f0050e2b1aa 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -17,6 +17,7 @@ # [This file includes modifications made by New Vector Limited] # # +import enum import logging from itertools import chain from typing import TYPE_CHECKING, Any, Dict, Final, List, Mapping, Optional, Set, Tuple @@ -27,8 +28,8 @@ from synapse.api.constants import ( AccountDataTypes, Direction, - EventTypes, EventContentFields, + EventTypes, Membership, ) from synapse.events import EventBase @@ -43,8 +44,8 @@ Requester, RoomStreamToken, StateMap, - StreamKeyType, StrCollection, + StreamKeyType, StreamToken, UserID, ) @@ -58,6 +59,12 @@ logger = logging.getLogger(__name__) +class _Sentinel(enum.Enum): + # defining a sentinel in this way allows mypy to correctly handle the + # type of a dictionary lookup and subsequent type narrowing. + sentinel = object() + + # The event types that clients should consider as new activity. DEFAULT_BUMP_EVENT_TYPES = { EventTypes.Message, @@ -1092,35 +1099,20 @@ async def filter_rooms( A filtered dictionary of room IDs along with membership information in the room at the time of `to_token`. """ - filtered_room_id_set = set(sync_room_map.keys()) - - # Filter for Direct-Message (DM) rooms - if filters.is_dm is not None: - if filters.is_dm: - # Only DM rooms please - filtered_room_id_set = { - room_id - for room_id in filtered_room_id_set - if sync_room_map[room_id].is_dm - } - else: - # Only non-DM rooms please - filtered_room_id_set = { - room_id - for room_id in filtered_room_id_set - if not sync_room_map[room_id].is_dm - } - - if filters.spaces: - raise NotImplementedError() + room_id_to_stripped_state_map: Dict[str, Optional[List[Any]]] = {} async def _bulk_fetch_stripped_state_for_rooms( - room_ids_to_fetch: StrCollection[str], - ) -> Mapping[str, Optional[List[Any]]]: + room_ids: StrCollection, + ) -> None: """ - Fetch stripped state for a list of rooms that we are invited to/knocked on. + Fetch stripped state for a list of rooms (stripped state is only applicable to invite/knock rooms). """ - room_to_stripped_state_map: Dict[str, Optional[List[Any]]] = {} + # Fetch what we haven't before + room_ids_to_fetch = [ + room_id + for room_id in room_ids + if room_id not in room_id_to_stripped_state_map + ] # Gather a list of event IDs we can grab stripped state from invite_or_knock_event_ids: List[str] = [] @@ -1134,7 +1126,7 @@ async def _bulk_fetch_stripped_state_for_rooms( assert event_id is not None invite_or_knock_event_ids.append(event_id) else: - room_to_stripped_state_map[room_id] = None + room_id_to_stripped_state_map[room_id] = None invite_or_knock_events = await self.store.get_events( invite_or_knock_event_ids @@ -1148,30 +1140,56 @@ async def _bulk_fetch_stripped_state_for_rooms( invite_room_state = invite_or_knock_event.unsigned.get( "invite_room_state", None ) + # `invite_room_state` should be a list of stripped events if isinstance(invite_room_state, list): - room_to_stripped_state_map[room_id] = invite_room_state + room_id_to_stripped_state_map[room_id] = invite_room_state + else: + room_id_to_stripped_state_map[room_id] = None elif membership == Membership.KNOCK: # Scrutinize unsigned things knock_room_state = invite_or_knock_event.unsigned.get( "knock_room_state", None ) + # `knock_room_state` should be a list of stripped events if isinstance(knock_room_state, list): - room_to_stripped_state_map[room_id] = knock_room_state + room_id_to_stripped_state_map[room_id] = knock_room_state + else: + room_id_to_stripped_state_map[room_id] = None else: raise AssertionError( - f"Unexpected membership {membership} (fix Synapse)" + f"Unexpected membership {membership} (this is a problem with Synapse itself)" ) - return room_to_stripped_state_map + filtered_room_id_set = set(sync_room_map.keys()) + + # Filter for Direct-Message (DM) rooms + if filters.is_dm is not None: + if filters.is_dm: + # Only DM rooms please + filtered_room_id_set = { + room_id + for room_id in filtered_room_id_set + if sync_room_map[room_id].is_dm + } + else: + # Only non-DM rooms please + filtered_room_id_set = { + room_id + for room_id in filtered_room_id_set + if not sync_room_map[room_id].is_dm + } + + if filters.spaces is not None: + raise NotImplementedError() # Filter for encrypted rooms if filters.is_encrypted is not None: - room_to_is_encrypted = await self.store.bulk_get_room_is_encrypted( + room_id_to_is_encrypted = await self.store.bulk_get_room_is_encrypted( filtered_room_id_set ) room_ids_with_results = [ room_id - for room_id, is_encrypted in room_to_is_encrypted.items() + for room_id, is_encrypted in room_id_to_is_encrypted.items() if is_encrypted is not None ] @@ -1181,24 +1199,42 @@ async def _bulk_fetch_stripped_state_for_rooms( room_ids_without_results = filtered_room_id_set.difference( room_ids_with_results ) - asdf = await _bulk_fetch_stripped_state_for_rooms(room_ids_without_results) + await _bulk_fetch_stripped_state_for_rooms(room_ids_without_results) + # Update our room_id_to_is_encrypted map based on the stripped state for room_id in room_ids_without_results: - stripped_state = asdf[room_id] - for event in stripped_state: - if event["type"] == EventTypes.RoomEncryption: - is_encrypted = ( - event["content"].get( - EventContentFields.ENCRYPTION_ALGORITHM - ) - is not None - ) - room_to_is_encrypted[room_id] = is_encrypted - break + stripped_state = room_id_to_stripped_state_map.get( + room_id, _Sentinel.sentinel + ) + assert stripped_state is not _Sentinel.sentinel, ( + f"Stripped state left unset for room {room_id}. " + + "Make sure you're calling `_bulk_fetch_stripped_state_for_rooms(...)` " + + "with that room_id. (this is a problem with Synapse itself)" + ) - for room_id in filtered_room_id_set: - is_encrypted = room_to_is_encrypted.get(room_id) + if stripped_state is not None: + for event in stripped_state: + event_type = event.get("type") + event_content = event.get("content") + # Scrutinize unsigned stripped state + if isinstance(event_type, str) and isinstance( + event_content, dict + ): + if event_type == EventTypes.RoomEncryption: + # Optional because Python is not block-scoped and we re-use the name below + is_encrypted: Optional[bool] = ( + event_content.get( + EventContentFields.ENCRYPTION_ALGORITHM + ) + is not None + ) + room_id_to_is_encrypted[room_id] = is_encrypted + break + else: + # Didn't see any encryption events in the stripped state + room_id_to_is_encrypted[room_id] = False + for room_id, is_encrypted in room_id_to_is_encrypted.items(): # Just remove rooms if we can't determine their encryption status if is_encrypted is None: filtered_room_id_set.remove(room_id) @@ -1251,13 +1287,13 @@ async def _bulk_fetch_stripped_state_for_rooms( ): filtered_room_id_set.remove(room_id) - if filters.room_name_like: + if filters.room_name_like is not None: raise NotImplementedError() - if filters.tags: + if filters.tags is not None: raise NotImplementedError() - if filters.not_tags: + if filters.not_tags is not None: raise NotImplementedError() # Assemble a new sync room map but only with the `filtered_room_id_set` diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py index 9c38793ea74..e75d89ce1a5 100644 --- a/synapse/storage/databases/main/state.py +++ b/synapse/storage/databases/main/state.py @@ -327,7 +327,7 @@ async def bulk_get_room_type( ) -> Mapping[str, Optional[str]]: """Bulk fetch room types for the given rooms. - Will only return results if the server is in the room given. + Rooms unknown to this server will be omitted from the response. """ rows = await self.db_pool.simple_select_many_batch( @@ -343,9 +343,13 @@ async def bulk_get_room_type( # mind if we do this in a loop. results = dict(rows) for room_id in room_ids - results.keys(): - create_event = await self.get_create_event_for_room(room_id) - room_type = create_event.content.get(EventContentFields.ROOM_TYPE) - results[room_id] = room_type + try: + create_event = await self.get_create_event_for_room(room_id) + room_type = create_event.content.get(EventContentFields.ROOM_TYPE) + results[room_id] = room_type + except NotFoundError: + # Ignore unknown rooms + pass return results @@ -355,7 +359,7 @@ async def bulk_get_room_is_encrypted( ) -> Mapping[str, Optional[bool]]: """Bulk fetch whether the given rooms are encrypted. - Will only return results if the server is in the room given. + Rooms unknown to this server will be omitted from the response. """ rows = await self.db_pool.simple_select_many_batch( @@ -370,21 +374,25 @@ async def bulk_get_room_is_encrypted( # directly. This should happen only rarely so we don't mind if we do this in a # loop. results = dict(rows) + encryption_event_ids: List[str] = [] for room_id in room_ids - results.keys(): state_map = await self.get_partial_filtered_current_state_ids( room_id, state_filter=StateFilter.from_types([(EventTypes.RoomEncryption, "")]), ) - encryption_event = state_map.get((EventTypes.RoomEncryption, "")) - - is_encrypted = False - if encryption_event is not None: - is_encrypted = ( - encryption_event.content.get( - EventContentFields.ENCRYPTION_ALGORITHM - ) - is not None - ) + encryption_event_id = state_map.get((EventTypes.RoomEncryption, "")) + if encryption_event_id: + encryption_event_ids.append(encryption_event_id) + else: + results[room_id] = False + + encryption_event_map = await self.get_events(encryption_event_ids) + + for encryption_event in encryption_event_map.values(): + is_encrypted = ( + encryption_event.content.get(EventContentFields.ENCRYPTION_ALGORITHM) + is not None + ) results[room_id] = is_encrypted diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index 0c92a1d6943..d3bb190b24b 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -3265,6 +3265,44 @@ def test_filter_encrypted_rooms(self) -> None: self.assertEqual(falsy_filtered_room_map.keys(), {room_id}) + def test_filter_encrypted_with_remote_invite_room(self) -> None: + """ + Test that we can apply a `filter.is_encrypted` filter, even if we only have an + invite for a remote room. + + This is a regression test. + """ + + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + + remote_invite_room_id = self._create_remote_invite_room_for_user(user1_id, None) + + # Create a normal room (no room type) + room_id = self.helper.create_room_as(user1_id, tok=user1_tok) + + after_rooms_token = self.event_sources.get_current_token() + + # Get the rooms the user should be syncing with + sync_room_map = self._get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=None, + to_token=after_rooms_token, + ) + + filtered_room_map = self.get_success( + self.sliding_sync_handler.filter_rooms( + UserID.from_string(user1_id), + sync_room_map, + SlidingSyncConfig.SlidingSyncList.Filters( + is_encrypted=True, + ), + after_rooms_token, + ) + ) + + self.assertEqual(filtered_room_map.keys(), {room_id, remote_invite_room_id}) + def test_filter_invite_rooms(self) -> None: """ Test `filter.is_invite` for rooms that the user has been invited to From a67157ecd483ed2c04256ee26fdd98f1f616f62c Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 15 Jul 2024 17:49:58 -0500 Subject: [PATCH 04/47] Fix lints --- synapse/handlers/sliding_sync.py | 7 ++- synapse/storage/databases/main/state.py | 65 +++++++++++++++++++++++-- tests/handlers/test_sliding_sync.py | 26 ++++++---- 3 files changed, 81 insertions(+), 17 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index f0050e2b1aa..b629a4561c4 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -1184,8 +1184,10 @@ async def _bulk_fetch_stripped_state_for_rooms( # Filter for encrypted rooms if filters.is_encrypted is not None: - room_id_to_is_encrypted = await self.store.bulk_get_room_is_encrypted( - filtered_room_id_set + # Lookup the encryption state from the database. Since this function is + # cached, need to make a mutable copy via `dict(...)`. + room_id_to_is_encrypted = dict( + await self.store.bulk_get_room_is_encrypted(filtered_room_id_set) ) room_ids_with_results = [ room_id @@ -1238,6 +1240,7 @@ async def _bulk_fetch_stripped_state_for_rooms( # Just remove rooms if we can't determine their encryption status if is_encrypted is None: filtered_room_id_set.remove(room_id) + continue # If we're looking for encrypted rooms, filter out rooms that are not # encrypted and vice versa diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py index e75d89ce1a5..7114dd77ecc 100644 --- a/synapse/storage/databases/main/state.py +++ b/synapse/storage/databases/main/state.py @@ -353,6 +353,53 @@ async def bulk_get_room_type( return results + @cached(max_entries=10000) + async def get_room_is_encrypted(self, room_id: str) -> Optional[bool]: + """ + Get whether the given room is encrypted. + + Returns None if the room is not known to the server. + """ + + row = await self.db_pool.simple_select_one( + table="room_stats_state", + keyvalues={"room_id": room_id}, + retcols=("encryption",), + allow_none=True, + desc="get_room_is_encrypted", + ) + + if row is not None: + return row[0] + + # If we haven't updated `room_stats_state` with the room yet, query the state + # directly. + state_map = await self.get_partial_filtered_current_state_ids( + room_id, + state_filter=StateFilter.from_types( + [ + (EventTypes.Create, ""), + (EventTypes.RoomEncryption, ""), + ] + ), + ) + # We can use the create event as a canary to tell whether the server has seen + # the room before + create_event_id = state_map.get((EventTypes.Create, "")) + encryption_event_id = state_map.get((EventTypes.RoomEncryption, "")) + if create_event_id is None: + return None + elif create_event_id is not None and encryption_event_id is None: + return False + + encryption_event = await self.get_event(encryption_event_id) + is_encrypted = ( + encryption_event.content.get(EventContentFields.ENCRYPTION_ALGORITHM) + is not None + ) + + return is_encrypted + @cachedList(cached_method_name="get_room_is_encrypted", list_name="room_ids") async def bulk_get_room_is_encrypted( self, room_ids: Set[str] @@ -366,8 +413,8 @@ async def bulk_get_room_is_encrypted( table="room_stats_state", column="room_id", iterable=room_ids, - retcols=("room_id", "room_type"), - desc="bulk_get_room_type", + retcols=("room_id", "encryption"), + desc="bulk_get_encryption", ) # If we haven't updated `room_stats_state` with the room yet, query the state @@ -378,12 +425,20 @@ async def bulk_get_room_is_encrypted( for room_id in room_ids - results.keys(): state_map = await self.get_partial_filtered_current_state_ids( room_id, - state_filter=StateFilter.from_types([(EventTypes.RoomEncryption, "")]), + state_filter=StateFilter.from_types( + [ + (EventTypes.Create, ""), + (EventTypes.RoomEncryption, ""), + ] + ), ) + # We can use the create event as a canary to tell whether the server has + # seen the room before + create_event_id = state_map.get((EventTypes.Create, "")) encryption_event_id = state_map.get((EventTypes.RoomEncryption, "")) - if encryption_event_id: + if create_event_id is not None and encryption_event_id is not None: encryption_event_ids.append(encryption_event_id) - else: + elif create_event_id is not None: results[room_id] = False encryption_event_map = await self.get_events(encryption_event_ids) diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index d3bb190b24b..53a85caaa04 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -3097,7 +3097,7 @@ def _create_dm_room( def _create_remote_invite_room_for_user( self, invitee_user_id: str, unsigned_invite_room_state: Optional[List[JsonDict]] - ) -> None: + ) -> str: """ Create a fake remote invite and persist it. @@ -3105,6 +3105,9 @@ def _create_remote_invite_room_for_user( invitee_user_id: The person being invited unsigned_invite_room_state: List of stripped state events to assist the receiver in identifying the room. Use the `strip_event(...)` helper. + + Returns: + The room ID of the remote invite room """ invite_room_id = f"!test_room{self._remote_invite_count}:remote_server" @@ -3265,20 +3268,21 @@ def test_filter_encrypted_rooms(self) -> None: self.assertEqual(falsy_filtered_room_map.keys(), {room_id}) - def test_filter_encrypted_with_remote_invite_room(self) -> None: + def test_filter_encrypted_with_remote_invite_room_no_stripped_state(self) -> None: """ - Test that we can apply a `filter.is_encrypted` filter, even if we only have an - invite for a remote room. - - This is a regression test. + Test that we can apply a `filter.is_encrypted` filter against a remote invite + room without any `unsigned.invite_room_state` (stripped state). """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") - remote_invite_room_id = self._create_remote_invite_room_for_user(user1_id, None) + # Create a remote invite room without any `unsigned.invite_room_state` + _remote_invite_room_id = self._create_remote_invite_room_for_user( + user1_id, None + ) - # Create a normal room (no room type) + # Create a unencrypted room room_id = self.helper.create_room_as(user1_id, tok=user1_tok) after_rooms_token = self.event_sources.get_current_token() @@ -3295,13 +3299,15 @@ def test_filter_encrypted_with_remote_invite_room(self) -> None: UserID.from_string(user1_id), sync_room_map, SlidingSyncConfig.SlidingSyncList.Filters( - is_encrypted=True, + is_encrypted=False, ), after_rooms_token, ) ) - self.assertEqual(filtered_room_map.keys(), {room_id, remote_invite_room_id}) + # `remote_invite_room_id` should not appear because we can't figure out whether + # it is encrypted or not. + self.assertEqual(filtered_room_map.keys(), {room_id}) def test_filter_invite_rooms(self) -> None: """ From ffb61cf84d9775041efa6155d2349275e34a8460 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 15 Jul 2024 18:11:07 -0500 Subject: [PATCH 05/47] Return bool from db --- synapse/handlers/sliding_sync.py | 1 + synapse/storage/databases/main/state.py | 30 ++++++++++++++++------- tests/handlers/test_sliding_sync.py | 32 ++++++++++++++++++++++--- 3 files changed, 52 insertions(+), 11 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index b629a4561c4..c27a3832759 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -1189,6 +1189,7 @@ async def _bulk_fetch_stripped_state_for_rooms( room_id_to_is_encrypted = dict( await self.store.bulk_get_room_is_encrypted(filtered_room_id_set) ) + logger.info("room_id_to_is_encrypted: %s", room_id_to_is_encrypted) room_ids_with_results = [ room_id for room_id, is_encrypted in room_id_to_is_encrypted.items() diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py index 7114dd77ecc..5a684d3880b 100644 --- a/synapse/storage/databases/main/state.py +++ b/synapse/storage/databases/main/state.py @@ -300,8 +300,9 @@ async def get_create_event_for_room(self, room_id: str) -> EventBase: @cached(max_entries=10000) async def get_room_type(self, room_id: str) -> Optional[str]: - """Get the room type for a given room. The server must be joined to the - given room. + """Get the room type for a given room. + + Returns None if the room is not known to the server. """ row = await self.db_pool.simple_select_one( @@ -317,9 +318,13 @@ async def get_room_type(self, room_id: str) -> Optional[str]: # If we haven't updated `room_stats_state` with the room yet, query the # create event directly. - create_event = await self.get_create_event_for_room(room_id) - room_type = create_event.content.get(EventContentFields.ROOM_TYPE) - return room_type + try: + create_event = await self.get_create_event_for_room(room_id) + room_type = create_event.content.get(EventContentFields.ROOM_TYPE) + return room_type + except NotFoundError: + # Ignore unknown rooms + pass @cachedList(cached_method_name="get_room_type", list_name="room_ids") async def bulk_get_room_type( @@ -358,7 +363,10 @@ async def get_room_is_encrypted(self, room_id: str) -> Optional[bool]: """ Get whether the given room is encrypted. - Returns None if the room is not known to the server. + Returns: + True if the room is encrypted, + False if it is not encrypted, and + None if the room is not known to the server. """ row = await self.db_pool.simple_select_one( @@ -370,7 +378,7 @@ async def get_room_is_encrypted(self, room_id: str) -> Optional[bool]: ) if row is not None: - return row[0] + return row[0] is not None # If we haven't updated `room_stats_state` with the room yet, query the state # directly. @@ -407,6 +415,12 @@ async def bulk_get_room_is_encrypted( """Bulk fetch whether the given rooms are encrypted. Rooms unknown to this server will be omitted from the response. + + Returns: + A mapping from room ID to whether the room is encrypted: + True if the room is encrypted, + False if it is not encrypted, and + None if the room is not known to the server. """ rows = await self.db_pool.simple_select_many_batch( @@ -420,7 +434,7 @@ async def bulk_get_room_is_encrypted( # If we haven't updated `room_stats_state` with the room yet, query the state # directly. This should happen only rarely so we don't mind if we do this in a # loop. - results = dict(rows) + results = {room_id: encryption is not None for room_id, encryption in rows} encryption_event_ids: List[str] = [] for room_id in room_ids - results.keys(): state_map = await self.get_partial_filtered_current_state_ids( diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index 53a85caaa04..85e4e41b94f 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -3219,7 +3219,7 @@ def test_filter_encrypted_rooms(self) -> None: user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") - # Create a normal room + # Create a unencrypted room room_id = self.helper.create_room_as(user1_id, tok=user1_tok) # Create an encrypted room @@ -3285,6 +3285,15 @@ def test_filter_encrypted_with_remote_invite_room_no_stripped_state(self) -> Non # Create a unencrypted room room_id = self.helper.create_room_as(user1_id, tok=user1_tok) + # Create an encrypted room + encrypted_room_id = self.helper.create_room_as(user1_id, tok=user1_tok) + self.helper.send_state( + encrypted_room_id, + EventTypes.RoomEncryption, + {"algorithm": "m.megolm.v1.aes-sha2"}, + tok=user1_tok, + ) + after_rooms_token = self.event_sources.get_current_token() # Get the rooms the user should be syncing with @@ -3294,7 +3303,24 @@ def test_filter_encrypted_with_remote_invite_room_no_stripped_state(self) -> Non to_token=after_rooms_token, ) - filtered_room_map = self.get_success( + # Try with `is_encrypted=True` + truthy_filtered_room_map = self.get_success( + self.sliding_sync_handler.filter_rooms( + UserID.from_string(user1_id), + sync_room_map, + SlidingSyncConfig.SlidingSyncList.Filters( + is_encrypted=True, + ), + after_rooms_token, + ) + ) + + # `remote_invite_room_id` should not appear because we can't figure out whether + # it is encrypted or not. + self.assertEqual(truthy_filtered_room_map.keys(), {encrypted_room_id}) + + # Try with `is_encrypted=False` + falsy_filtered_room_map = self.get_success( self.sliding_sync_handler.filter_rooms( UserID.from_string(user1_id), sync_room_map, @@ -3307,7 +3333,7 @@ def test_filter_encrypted_with_remote_invite_room_no_stripped_state(self) -> Non # `remote_invite_room_id` should not appear because we can't figure out whether # it is encrypted or not. - self.assertEqual(filtered_room_map.keys(), {room_id}) + self.assertEqual(falsy_filtered_room_map.keys(), {room_id}) def test_filter_invite_rooms(self) -> None: """ From 9b77c9776618cf137951f58975faa63a88d8176c Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 15 Jul 2024 22:17:42 -0500 Subject: [PATCH 06/47] Add tests --- synapse/handlers/sliding_sync.py | 1 - tests/handlers/test_sliding_sync.py | 176 +++++++++++++++++++++++++++- 2 files changed, 171 insertions(+), 6 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index c27a3832759..b629a4561c4 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -1189,7 +1189,6 @@ async def _bulk_fetch_stripped_state_for_rooms( room_id_to_is_encrypted = dict( await self.store.bulk_get_room_is_encrypted(filtered_room_id_set) ) - logger.info("room_id_to_is_encrypted: %s", room_id_to_is_encrypted) room_ids_with_results = [ room_id for room_id, is_encrypted in room_id_to_is_encrypted.items() diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index 85e4e41b94f..4a4abf1660b 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -3219,7 +3219,7 @@ def test_filter_encrypted_rooms(self) -> None: user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") - # Create a unencrypted room + # Create an unencrypted room room_id = self.helper.create_room_as(user1_id, tok=user1_tok) # Create an encrypted room @@ -3227,7 +3227,7 @@ def test_filter_encrypted_rooms(self) -> None: self.helper.send_state( encrypted_room_id, EventTypes.RoomEncryption, - {"algorithm": "m.megolm.v1.aes-sha2"}, + {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, tok=user1_tok, ) @@ -3273,7 +3273,6 @@ def test_filter_encrypted_with_remote_invite_room_no_stripped_state(self) -> Non Test that we can apply a `filter.is_encrypted` filter against a remote invite room without any `unsigned.invite_room_state` (stripped state). """ - user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") @@ -3282,7 +3281,7 @@ def test_filter_encrypted_with_remote_invite_room_no_stripped_state(self) -> Non user1_id, None ) - # Create a unencrypted room + # Create an unencrypted room room_id = self.helper.create_room_as(user1_id, tok=user1_tok) # Create an encrypted room @@ -3290,7 +3289,7 @@ def test_filter_encrypted_with_remote_invite_room_no_stripped_state(self) -> Non self.helper.send_state( encrypted_room_id, EventTypes.RoomEncryption, - {"algorithm": "m.megolm.v1.aes-sha2"}, + {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, tok=user1_tok, ) @@ -3335,6 +3334,173 @@ def test_filter_encrypted_with_remote_invite_room_no_stripped_state(self) -> Non # it is encrypted or not. self.assertEqual(falsy_filtered_room_map.keys(), {room_id}) + def test_filter_encrypted_with_remote_invite_encrypted_room(self) -> None: + """ + Test that we can apply a `filter.is_encrypted` filter against a remote invite + encrypted room with some `unsigned.invite_room_state` (stripped state). + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + + # Create a remote invite room without any `unsigned.invite_room_state` + remote_invite_room_id = self._create_remote_invite_room_for_user( + user1_id, + [ + { + "type": EventTypes.Create, + "state_key": "", + "sender": "@inviter:remote_server", + "content": { + "creator": "@inviter:remote_server", + "room_version": RoomVersions.V10.identifier, + }, + }, + { + "type": EventTypes.RoomEncryption, + "state_key": "", + "sender": "@inviter:remote_server", + "content": { + EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2", + }, + }, + ], + ) + + # Create an unencrypted room + room_id = self.helper.create_room_as(user1_id, tok=user1_tok) + + # Create an encrypted room + encrypted_room_id = self.helper.create_room_as(user1_id, tok=user1_tok) + self.helper.send_state( + encrypted_room_id, + EventTypes.RoomEncryption, + {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, + tok=user1_tok, + ) + + after_rooms_token = self.event_sources.get_current_token() + + # Get the rooms the user should be syncing with + sync_room_map = self._get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=None, + to_token=after_rooms_token, + ) + + # Try with `is_encrypted=True` + truthy_filtered_room_map = self.get_success( + self.sliding_sync_handler.filter_rooms( + UserID.from_string(user1_id), + sync_room_map, + SlidingSyncConfig.SlidingSyncList.Filters( + is_encrypted=True, + ), + after_rooms_token, + ) + ) + + # `remote_invite_room_id` should appear here because it is encrypted + # according to the stripped state + self.assertEqual( + truthy_filtered_room_map.keys(), {encrypted_room_id, remote_invite_room_id} + ) + + # Try with `is_encrypted=False` + falsy_filtered_room_map = self.get_success( + self.sliding_sync_handler.filter_rooms( + UserID.from_string(user1_id), + sync_room_map, + SlidingSyncConfig.SlidingSyncList.Filters( + is_encrypted=False, + ), + after_rooms_token, + ) + ) + + # `remote_invite_room_id` should not appear here because it is encrypted + # according to the stripped state + self.assertEqual(falsy_filtered_room_map.keys(), {room_id}) + + def test_filter_encrypted_with_remote_invite_unencrypted_room(self) -> None: + """ + Test that we can apply a `filter.is_encrypted` filter against a remote invite + unencrypted room with some `unsigned.invite_room_state` (stripped state). + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + + # Create a remote invite room without any `unsigned.invite_room_state` + remote_invite_room_id = self._create_remote_invite_room_for_user( + user1_id, + [ + { + "type": EventTypes.Create, + "state_key": "", + "sender": "@inviter:remote_server", + "content": { + "creator": "@inviter:remote_server", + "room_version": RoomVersions.V10.identifier, + }, + }, + # No room encryption event + ], + ) + + # Create an unencrypted room + room_id = self.helper.create_room_as(user1_id, tok=user1_tok) + + # Create an encrypted room + encrypted_room_id = self.helper.create_room_as(user1_id, tok=user1_tok) + self.helper.send_state( + encrypted_room_id, + EventTypes.RoomEncryption, + {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, + tok=user1_tok, + ) + + after_rooms_token = self.event_sources.get_current_token() + + # Get the rooms the user should be syncing with + sync_room_map = self._get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=None, + to_token=after_rooms_token, + ) + + # Try with `is_encrypted=True` + truthy_filtered_room_map = self.get_success( + self.sliding_sync_handler.filter_rooms( + UserID.from_string(user1_id), + sync_room_map, + SlidingSyncConfig.SlidingSyncList.Filters( + is_encrypted=True, + ), + after_rooms_token, + ) + ) + + # `remote_invite_room_id` should not appear here because it is unencrypted + # according to the stripped state + self.assertEqual(truthy_filtered_room_map.keys(), {encrypted_room_id}) + + # Try with `is_encrypted=False` + falsy_filtered_room_map = self.get_success( + self.sliding_sync_handler.filter_rooms( + UserID.from_string(user1_id), + sync_room_map, + SlidingSyncConfig.SlidingSyncList.Filters( + is_encrypted=False, + ), + after_rooms_token, + ) + ) + + # `remote_invite_room_id` should appear because it is unencrypted according to + # the stripped state + self.assertEqual( + falsy_filtered_room_map.keys(), {room_id, remote_invite_room_id} + ) + def test_filter_invite_rooms(self) -> None: """ Test `filter.is_invite` for rooms that the user has been invited to From d609205fd99f7a436a0f3ee351cd25c04ea13fd1 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 16 Jul 2024 00:27:18 -0500 Subject: [PATCH 07/47] Standardize --- synapse/handlers/sliding_sync.py | 89 +++++++++++++++++-------- synapse/handlers/stats.py | 4 +- synapse/storage/databases/main/state.py | 85 +++++++++++------------ 3 files changed, 109 insertions(+), 69 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index b629a4561c4..3ea04b2a96a 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -1186,14 +1186,10 @@ async def _bulk_fetch_stripped_state_for_rooms( if filters.is_encrypted is not None: # Lookup the encryption state from the database. Since this function is # cached, need to make a mutable copy via `dict(...)`. - room_id_to_is_encrypted = dict( - await self.store.bulk_get_room_is_encrypted(filtered_room_id_set) + room_id_to_encryption = dict( + await self.store.bulk_get_room_encryption(filtered_room_id_set) ) - room_ids_with_results = [ - room_id - for room_id, is_encrypted in room_id_to_is_encrypted.items() - if is_encrypted is not None - ] + room_ids_with_results = room_id_to_encryption.keys() # We might not have room state for remote invite/knocks if we are the first # person on our server to see the room. The best we can do is look in the @@ -1203,7 +1199,7 @@ async def _bulk_fetch_stripped_state_for_rooms( ) await _bulk_fetch_stripped_state_for_rooms(room_ids_without_results) - # Update our room_id_to_is_encrypted map based on the stripped state + # Update our `room_id_to_encryption` map based on the stripped state for room_id in room_ids_without_results: stripped_state = room_id_to_stripped_state_map.get( room_id, _Sentinel.sentinel @@ -1223,27 +1219,27 @@ async def _bulk_fetch_stripped_state_for_rooms( event_content, dict ): if event_type == EventTypes.RoomEncryption: - # Optional because Python is not block-scoped and we re-use the name below - is_encrypted: Optional[bool] = ( - event_content.get( - EventContentFields.ENCRYPTION_ALGORITHM - ) - is not None + room_id_to_encryption[room_id] = event_content.get( + EventContentFields.ENCRYPTION_ALGORITHM ) - room_id_to_is_encrypted[room_id] = is_encrypted break else: # Didn't see any encryption events in the stripped state - room_id_to_is_encrypted[room_id] = False + room_id_to_encryption[room_id] = None + + # Make a copy so we don't run into an error: `Set changed size during + # iteration`, when we filter out and remove items + for room_id in filtered_room_id_set.copy(): + encryption = room_id_to_encryption.get(room_id, _Sentinel.sentinel) - for room_id, is_encrypted in room_id_to_is_encrypted.items(): # Just remove rooms if we can't determine their encryption status - if is_encrypted is None: + if encryption is _Sentinel.sentinel: filtered_room_id_set.remove(room_id) continue # If we're looking for encrypted rooms, filter out rooms that are not # encrypted and vice versa + is_encrypted = encryption is not None if (filters.is_encrypted and not is_encrypted) or ( not filters.is_encrypted and is_encrypted ): @@ -1269,15 +1265,56 @@ async def _bulk_fetch_stripped_state_for_rooms( # provided in the list. `None` is a valid type for rooms which do not have a # room type. if filters.room_types is not None or filters.not_room_types is not None: - room_to_type = await self.store.bulk_get_room_type( - { - room_id - for room_id in filtered_room_id_set - # We only know the room types for joined rooms - if sync_room_map[room_id].membership == Membership.JOIN - } + # Lookup the room type from the database. Since this function is + # cached, need to make a mutable copy via `dict(...)`. + room_id_to_type = dict( + await self.store.bulk_get_room_type(filtered_room_id_set) + ) + room_ids_with_results = room_id_to_type.keys() + + # We might not have room state for remote invite/knocks if we are the first + # person on our server to see the room. The best we can do is look in the + # optional stripped state from the invite/knock event. + room_ids_without_results = filtered_room_id_set.difference( + room_ids_with_results ) - for room_id, room_type in room_to_type.items(): + await _bulk_fetch_stripped_state_for_rooms(room_ids_without_results) + + # Update our `room_id_to_type` map based on the stripped state + for room_id in room_ids_without_results: + stripped_state = room_id_to_stripped_state_map.get( + room_id, _Sentinel.sentinel + ) + assert stripped_state is not _Sentinel.sentinel, ( + f"Stripped state left unset for room {room_id}. " + + "Make sure you're calling `_bulk_fetch_stripped_state_for_rooms(...)` " + + "with that room_id. (this is a problem with Synapse itself)" + ) + + if stripped_state is not None: + for event in stripped_state: + event_type = event.get("type") + event_content = event.get("content") + # Scrutinize unsigned stripped state + if isinstance(event_type, str) and isinstance( + event_content, dict + ): + if event_type == EventTypes.Create: + room_id_to_type[room_id] = event_content.get( + EventContentFields.ROOM_TYPE + ) + break + + # Make a copy so we don't run into an error: `Set changed size during + # iteration`, when we filter out and remove items + for room_id in filtered_room_id_set.copy(): + room_type = room_id_to_type.get(room_id, _Sentinel.sentinel) + + # Just remove rooms if we can't determine their type + if room_type is _Sentinel.sentinel: + filtered_room_id_set.remove(room_id) + continue + if ( filters.room_types is not None and room_type not in filters.room_types diff --git a/synapse/handlers/stats.py b/synapse/handlers/stats.py index 1c94f3ca461..8f90c170602 100644 --- a/synapse/handlers/stats.py +++ b/synapse/handlers/stats.py @@ -293,7 +293,9 @@ async def _handle_deltas( "history_visibility" ) elif delta.event_type == EventTypes.RoomEncryption: - room_state["encryption"] = event_content.get("algorithm") + room_state["encryption"] = event_content.get( + EventContentFields.ENCRYPTION_ALGORITHM + ) elif delta.event_type == EventTypes.Name: room_state["name"] = event_content.get("name") elif delta.event_type == EventTypes.Topic: diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py index 5a684d3880b..413f78d8095 100644 --- a/synapse/storage/databases/main/state.py +++ b/synapse/storage/databases/main/state.py @@ -72,7 +72,6 @@ _T = TypeVar("_T") - MAX_STATE_DELTA_HOPS = 100 @@ -300,9 +299,14 @@ async def get_create_event_for_room(self, room_id: str) -> EventBase: @cached(max_entries=10000) async def get_room_type(self, room_id: str) -> Optional[str]: - """Get the room type for a given room. + """ + Get the room type for a given room. + + Returns: + The room type if known (`None` is a valid room type) - Returns None if the room is not known to the server. + Raises: + NotFoundError if the room is unknown """ row = await self.db_pool.simple_select_one( @@ -318,19 +322,16 @@ async def get_room_type(self, room_id: str) -> Optional[str]: # If we haven't updated `room_stats_state` with the room yet, query the # create event directly. - try: - create_event = await self.get_create_event_for_room(room_id) - room_type = create_event.content.get(EventContentFields.ROOM_TYPE) - return room_type - except NotFoundError: - # Ignore unknown rooms - pass + create_event = await self.get_create_event_for_room(room_id) + room_type = create_event.content.get(EventContentFields.ROOM_TYPE) + return room_type @cachedList(cached_method_name="get_room_type", list_name="room_ids") async def bulk_get_room_type( self, room_ids: Set[str] ) -> Mapping[str, Optional[str]]: - """Bulk fetch room types for the given rooms. + """ + Bulk fetch room types for the given rooms. Rooms unknown to this server will be omitted from the response. """ @@ -359,14 +360,15 @@ async def bulk_get_room_type( return results @cached(max_entries=10000) - async def get_room_is_encrypted(self, room_id: str) -> Optional[bool]: + async def get_room_encryption(self, room_id: str) -> Optional[str]: """ - Get whether the given room is encrypted. + Get the encryption algorithm for a given room. Returns: - True if the room is encrypted, - False if it is not encrypted, and - None if the room is not known to the server. + The encryption algorithm if the room is encrypted, otherwise `None`. + + Raises: + NotFoundError if the room is unknown """ row = await self.db_pool.simple_select_one( @@ -378,7 +380,7 @@ async def get_room_is_encrypted(self, room_id: str) -> Optional[bool]: ) if row is not None: - return row[0] is not None + return row[0] # If we haven't updated `room_stats_state` with the room yet, query the state # directly. @@ -396,31 +398,28 @@ async def get_room_is_encrypted(self, room_id: str) -> Optional[bool]: create_event_id = state_map.get((EventTypes.Create, "")) encryption_event_id = state_map.get((EventTypes.RoomEncryption, "")) if create_event_id is None: + raise NotFoundError( + f"Unknown room {room_id} does not have any state to determine room encryption" + ) + + if encryption_event_id is None: return None - elif create_event_id is not None and encryption_event_id is None: - return False encryption_event = await self.get_event(encryption_event_id) - is_encrypted = ( - encryption_event.content.get(EventContentFields.ENCRYPTION_ALGORITHM) - is not None - ) - - return is_encrypted + return encryption_event.content.get(EventContentFields.ENCRYPTION_ALGORITHM) - @cachedList(cached_method_name="get_room_is_encrypted", list_name="room_ids") - async def bulk_get_room_is_encrypted( + @cachedList(cached_method_name="get_room_encryption", list_name="room_ids") + async def bulk_get_room_encryption( self, room_ids: Set[str] - ) -> Mapping[str, Optional[bool]]: - """Bulk fetch whether the given rooms are encrypted. + ) -> Mapping[str, Optional[str]]: + """ + Bulk fetch room encryption for the given rooms. Rooms unknown to this server will be omitted from the response. Returns: - A mapping from room ID to whether the room is encrypted: - True if the room is encrypted, - False if it is not encrypted, and - None if the room is not known to the server. + A mapping from room ID to the room's encryption algorithm if the room is + encrypted, otherwise `None`. """ rows = await self.db_pool.simple_select_many_batch( @@ -434,7 +433,7 @@ async def bulk_get_room_is_encrypted( # If we haven't updated `room_stats_state` with the room yet, query the state # directly. This should happen only rarely so we don't mind if we do this in a # loop. - results = {room_id: encryption is not None for room_id, encryption in rows} + results = dict(rows) encryption_event_ids: List[str] = [] for room_id in room_ids - results.keys(): state_map = await self.get_partial_filtered_current_state_ids( @@ -450,21 +449,23 @@ async def bulk_get_room_is_encrypted( # seen the room before create_event_id = state_map.get((EventTypes.Create, "")) encryption_event_id = state_map.get((EventTypes.RoomEncryption, "")) - if create_event_id is not None and encryption_event_id is not None: + + if create_event_id is None: + continue + + if encryption_event_id is None: + results[room_id] = None + else: encryption_event_ids.append(encryption_event_id) - elif create_event_id is not None: - results[room_id] = False encryption_event_map = await self.get_events(encryption_event_ids) for encryption_event in encryption_event_map.values(): - is_encrypted = ( - encryption_event.content.get(EventContentFields.ENCRYPTION_ALGORITHM) - is not None + results[encryption_event.room_id] = encryption_event.content.get( + EventContentFields.ENCRYPTION_ALGORITHM ) - results[room_id] = is_encrypted - + logger.info("results %s", results) return results @cached(max_entries=100000, iterable=True) From 8ca8ae5bd34148f3ed990d50bbe1b127ead68239 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 16 Jul 2024 00:30:24 -0500 Subject: [PATCH 08/47] Add TODO for more tests --- tests/handlers/test_sliding_sync.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index 4a4abf1660b..8a1b448ed9e 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -3756,6 +3756,7 @@ def test_filter_not_room_types(self) -> None: self.assertEqual(filtered_room_map.keys(), {space_room_id}) + # TODO: Add more tests like we have for `filters.is_encrypted` def test_filter_room_types_with_remote_invite_room(self) -> None: """ Test that we can apply a `filter.not_room_types` filter, even if we only have an From 4f5005490e5899bae1275248ffe065cb1355f38d Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 16 Jul 2024 00:37:52 -0500 Subject: [PATCH 09/47] Add changelog --- changelog.d/17450.bugfix | 1 + synapse/storage/databases/main/state.py | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 changelog.d/17450.bugfix diff --git a/changelog.d/17450.bugfix b/changelog.d/17450.bugfix new file mode 100644 index 00000000000..01a521da386 --- /dev/null +++ b/changelog.d/17450.bugfix @@ -0,0 +1 @@ +Update experimental [MSC3575](https://github.com/matrix-org/matrix-spec-proposals/pull/3575) Sliding Sync `/sync` endpoint to handle invite/knock rooms when filtering. diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py index 413f78d8095..a090cf23e83 100644 --- a/synapse/storage/databases/main/state.py +++ b/synapse/storage/databases/main/state.py @@ -465,7 +465,6 @@ async def bulk_get_room_encryption( EventContentFields.ENCRYPTION_ALGORITHM ) - logger.info("results %s", results) return results @cached(max_entries=100000, iterable=True) From 7ae24258b4815dac1761b84d2c7bf442e01572bd Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 16 Jul 2024 14:56:21 -0500 Subject: [PATCH 10/47] Invalidate caches --- synapse/storage/_base.py | 4 ++++ synapse/storage/databases/main/cache.py | 14 ++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 881888fa93f..060a8aa45e3 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -124,6 +124,8 @@ def _invalidate_state_caches( # Purge other caches based on room state. self._attempt_to_invalidate_cache("get_room_summary", (room_id,)) self._attempt_to_invalidate_cache("get_partial_current_state_ids", (room_id,)) + self._attempt_to_invalidate_cache("get_room_type", (room_id,)) + self._attempt_to_invalidate_cache("get_room_encryption", (room_id,)) def _invalidate_state_caches_all(self, room_id: str) -> None: """Invalidates caches that are based on the current state, but does @@ -147,6 +149,8 @@ def _invalidate_state_caches_all(self, room_id: str) -> None: self._attempt_to_invalidate_cache("get_user_in_room_with_profile", None) self._attempt_to_invalidate_cache("get_rooms_for_user", None) self._attempt_to_invalidate_cache("get_room_summary", (room_id,)) + self._attempt_to_invalidate_cache("get_room_type", (room_id,)) + self._attempt_to_invalidate_cache("get_room_encryption", (room_id,)) def _attempt_to_invalidate_cache( self, cache_name: str, key: Optional[Collection[Any]] diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py index 2d6b75e47ed..264b7c9bf4a 100644 --- a/synapse/storage/databases/main/cache.py +++ b/synapse/storage/databases/main/cache.py @@ -269,12 +269,18 @@ def _process_event_stream_row(self, token: int, row: EventsStreamRow) -> None: if data.type == EventTypes.Member: self.get_rooms_for_user.invalidate((data.state_key,)) # type: ignore[attr-defined] + elif data.type == EventTypes.RoomEncryption: + self.get_room_encryption.invalidate((data.room_id,)) # type: ignore[attr-defined] + elif data.type == EventTypes.Create: + self.get_room_type.invalidate((data.room_id,)) # type: ignore[attr-defined] elif row.type == EventsStreamAllStateRow.TypeId: assert isinstance(data, EventsStreamAllStateRow) # Similar to the above, but the entire caches are invalidated. This is # unfortunate for the membership caches, but should recover quickly. self._curr_state_delta_stream_cache.entity_has_changed(data.room_id, token) # type: ignore[attr-defined] self.get_rooms_for_user.invalidate_all() # type: ignore[attr-defined] + self.get_room_type.invalidate((data.room_id,)) # type: ignore[attr-defined] + self.get_room_encryption.invalidate((data.room_id,)) # type: ignore[attr-defined] else: raise Exception("Unknown events stream row type %s" % (row.type,)) @@ -342,6 +348,10 @@ def _invalidate_caches_for_event( self._attempt_to_invalidate_cache( "get_forgotten_rooms_for_user", (state_key,) ) + elif etype == EventTypes.Create: + self._attempt_to_invalidate_cache("get_room_type", (room_id,)) + elif etype == EventTypes.RoomEncryption: + self._attempt_to_invalidate_cache("get_room_encryption", (room_id,)) if relates_to: self._attempt_to_invalidate_cache( @@ -399,6 +409,8 @@ def _invalidate_caches_for_room_events(self, room_id: str) -> None: self._attempt_to_invalidate_cache("get_thread_summary", None) self._attempt_to_invalidate_cache("get_thread_participated", None) self._attempt_to_invalidate_cache("get_threads", (room_id,)) + self._attempt_to_invalidate_cache("get_room_type", (room_id,)) + self._attempt_to_invalidate_cache("get_room_encryption", (room_id,)) self._attempt_to_invalidate_cache("_get_state_group_for_event", None) @@ -451,6 +463,8 @@ def _invalidate_caches_for_room(self, room_id: str) -> None: self._attempt_to_invalidate_cache("get_forgotten_rooms_for_user", None) self._attempt_to_invalidate_cache("_get_membership_from_event_id", None) self._attempt_to_invalidate_cache("get_room_version_id", (room_id,)) + self._attempt_to_invalidate_cache("get_room_type", (room_id,)) + self._attempt_to_invalidate_cache("get_room_encryption", (room_id,)) # And delete state caches. From b35a2daf7670863157d807f0afd65bbf638b208c Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 16 Jul 2024 16:06:56 -0500 Subject: [PATCH 11/47] Use `ROOM_UNKNOWN_SENTINEL` --- synapse/api/constants.py | 3 + synapse/handlers/sliding_sync.py | 21 ++- synapse/storage/databases/main/state.py | 49 +++++- tests/handlers/test_sliding_sync.py | 204 ++++++++++++++++++++++-- 4 files changed, 250 insertions(+), 27 deletions(-) diff --git a/synapse/api/constants.py b/synapse/api/constants.py index 42206f79b4e..07be493c5cc 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -225,6 +225,9 @@ class EventContentFields: # This is deprecated in MSC2175. ROOM_CREATOR: Final = "creator" + # The version of the room for `m.room.create` events. + ROOM_VERSION: Final = "room_version" + # Used in m.room.guest_access events. GUEST_ACCESS: Final = "guest_access" diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 3ea04b2a96a..1a16f28c3c9 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -36,6 +36,7 @@ from synapse.events.utils import strip_event from synapse.handlers.relations import BundledAggregations from synapse.storage.databases.main.roommember import extract_heroes_from_room_summary +from synapse.storage.databases.main.state import ROOM_UNKNOWN_SENTINEL from synapse.storage.databases.main.stream import CurrentStateDeltaMembership from synapse.storage.roommember import MemberSummary from synapse.types import ( @@ -1189,7 +1190,11 @@ async def _bulk_fetch_stripped_state_for_rooms( room_id_to_encryption = dict( await self.store.bulk_get_room_encryption(filtered_room_id_set) ) - room_ids_with_results = room_id_to_encryption.keys() + room_ids_with_results = [ + room_id + for room_id, encryption in room_id_to_encryption.items() + if encryption is not ROOM_UNKNOWN_SENTINEL + ] # We might not have room state for remote invite/knocks if we are the first # person on our server to see the room. The best we can do is look in the @@ -1230,10 +1235,10 @@ async def _bulk_fetch_stripped_state_for_rooms( # Make a copy so we don't run into an error: `Set changed size during # iteration`, when we filter out and remove items for room_id in filtered_room_id_set.copy(): - encryption = room_id_to_encryption.get(room_id, _Sentinel.sentinel) + encryption = room_id_to_encryption.get(room_id, ROOM_UNKNOWN_SENTINEL) # Just remove rooms if we can't determine their encryption status - if encryption is _Sentinel.sentinel: + if encryption is ROOM_UNKNOWN_SENTINEL: filtered_room_id_set.remove(room_id) continue @@ -1270,7 +1275,11 @@ async def _bulk_fetch_stripped_state_for_rooms( room_id_to_type = dict( await self.store.bulk_get_room_type(filtered_room_id_set) ) - room_ids_with_results = room_id_to_type.keys() + room_ids_with_results = [ + room_id + for room_id, room_type in room_id_to_type.items() + if room_type is not ROOM_UNKNOWN_SENTINEL + ] # We might not have room state for remote invite/knocks if we are the first # person on our server to see the room. The best we can do is look in the @@ -1308,10 +1317,10 @@ async def _bulk_fetch_stripped_state_for_rooms( # Make a copy so we don't run into an error: `Set changed size during # iteration`, when we filter out and remove items for room_id in filtered_room_id_set.copy(): - room_type = room_id_to_type.get(room_id, _Sentinel.sentinel) + room_type = room_id_to_type.get(room_id, ROOM_UNKNOWN_SENTINEL) # Just remove rooms if we can't determine their type - if room_type is _Sentinel.sentinel: + if room_type is ROOM_UNKNOWN_SENTINEL: filtered_room_id_set.remove(room_id) continue diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py index a090cf23e83..d75ac2ad4f2 100644 --- a/synapse/storage/databases/main/state.py +++ b/synapse/storage/databases/main/state.py @@ -75,6 +75,15 @@ MAX_STATE_DELTA_HOPS = 100 +# Freeze so it's immutable and we can use it as a cache value +@attr.s(slots=True, frozen=True, auto_attribs=True) +class Sentinel: + pass + + +ROOM_UNKNOWN_SENTINEL = Sentinel() + + @attr.s(slots=True, frozen=True, auto_attribs=True) class EventMetadata: """Returned by `get_metadata_for_events`""" @@ -329,11 +338,19 @@ async def get_room_type(self, room_id: str) -> Optional[str]: @cachedList(cached_method_name="get_room_type", list_name="room_ids") async def bulk_get_room_type( self, room_ids: Set[str] - ) -> Mapping[str, Optional[str]]: + ) -> Mapping[str, Union[Optional[str], Sentinel]]: """ Bulk fetch room types for the given rooms. - Rooms unknown to this server will be omitted from the response. + Since this function is cached, any missing values would be cached as `None`. In + order to distinguish between an unencrypted room that has `None` encryption and + a room that is unknown to the server where we might want to omit the value + (which would make it cached as `None`), instead we use the sentinel value + `ROOM_UNKNOWN_SENTINEL`. + + Returns: + A mapping from room ID to the room's type (`None` is a valid room type). + Rooms unknown to this server will return `ROOM_UNKNOWN_SENTINEL`. """ rows = await self.db_pool.simple_select_many_batch( @@ -354,8 +371,10 @@ async def bulk_get_room_type( room_type = create_event.content.get(EventContentFields.ROOM_TYPE) results[room_id] = room_type except NotFoundError: - # Ignore unknown rooms - pass + # We use the sentinel value to distinguish between `None` which is a + # valid room type and a room that is unknown to the server so the value + # is just unset. + results[room_id] = ROOM_UNKNOWN_SENTINEL return results @@ -411,15 +430,20 @@ async def get_room_encryption(self, room_id: str) -> Optional[str]: @cachedList(cached_method_name="get_room_encryption", list_name="room_ids") async def bulk_get_room_encryption( self, room_ids: Set[str] - ) -> Mapping[str, Optional[str]]: + ) -> Mapping[str, Union[Optional[str], Sentinel]]: """ Bulk fetch room encryption for the given rooms. - Rooms unknown to this server will be omitted from the response. + Since this function is cached, any missing values would be cached as `None`. In + order to distinguish between an unencrypted room that has `None` encryption and + a room that is unknown to the server where we might want to omit the value + (which would make it cached as `None`), instead we use the sentinel value + `ROOM_UNKNOWN_SENTINEL`. Returns: A mapping from room ID to the room's encryption algorithm if the room is - encrypted, otherwise `None`. + encrypted, otherwise `None`. Rooms unknown to this server will return + `ROOM_UNKNOWN_SENTINEL`. """ rows = await self.db_pool.simple_select_many_batch( @@ -451,6 +475,10 @@ async def bulk_get_room_encryption( encryption_event_id = state_map.get((EventTypes.RoomEncryption, "")) if create_event_id is None: + # We use the sentinel value to distinguish between `None` which is a + # valid room type and a room that is unknown to the server so the value + # is just unset. + results[room_id] = ROOM_UNKNOWN_SENTINEL continue if encryption_event_id is None: @@ -460,7 +488,12 @@ async def bulk_get_room_encryption( encryption_event_map = await self.get_events(encryption_event_ids) - for encryption_event in encryption_event_map.values(): + for encryption_event_id in encryption_event_ids: + encryption_event = encryption_event_map.get(encryption_event_id) + # If the state curent state says there is an encryption event, we should + # have it in the database. + assert encryption_event is not None + results[encryption_event.room_id] = encryption_event.content.get( EventContentFields.ENCRYPTION_ALGORITHM ) diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index 8a1b448ed9e..b6d93acb451 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -3315,7 +3315,7 @@ def test_filter_encrypted_with_remote_invite_room_no_stripped_state(self) -> Non ) # `remote_invite_room_id` should not appear because we can't figure out whether - # it is encrypted or not. + # it is encrypted or not (no stripped state, `unsigned.invite_room_state`). self.assertEqual(truthy_filtered_room_map.keys(), {encrypted_room_id}) # Try with `is_encrypted=False` @@ -3331,7 +3331,7 @@ def test_filter_encrypted_with_remote_invite_room_no_stripped_state(self) -> Non ) # `remote_invite_room_id` should not appear because we can't figure out whether - # it is encrypted or not. + # it is encrypted or not (no stripped state, `unsigned.invite_room_state`). self.assertEqual(falsy_filtered_room_map.keys(), {room_id}) def test_filter_encrypted_with_remote_invite_encrypted_room(self) -> None: @@ -3342,7 +3342,8 @@ def test_filter_encrypted_with_remote_invite_encrypted_room(self) -> None: user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") - # Create a remote invite room without any `unsigned.invite_room_state` + # Create a remote invite room with some `unsigned.invite_room_state` + # indicating that the room is encrypted. remote_invite_room_id = self._create_remote_invite_room_for_user( user1_id, [ @@ -3429,7 +3430,8 @@ def test_filter_encrypted_with_remote_invite_unencrypted_room(self) -> None: user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") - # Create a remote invite room without any `unsigned.invite_room_state` + # Create a remote invite room with some `unsigned.invite_room_state` + # but don't set any room encryption event. remote_invite_room_id = self._create_remote_invite_room_for_user( user1_id, [ @@ -3756,23 +3758,184 @@ def test_filter_not_room_types(self) -> None: self.assertEqual(filtered_room_map.keys(), {space_room_id}) - # TODO: Add more tests like we have for `filters.is_encrypted` - def test_filter_room_types_with_remote_invite_room(self) -> None: + def test_filter_room_types_with_remote_invite_room_no_stripped_state(self) -> None: """ - Test that we can apply a `filter.not_room_types` filter, even if we only have an - invite for a remote room. + Test that we can apply a `filter.room_types` filter against a remote invite + room without any `unsigned.invite_room_state` (stripped state). + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + + # Create a remote invite room without any `unsigned.invite_room_state` + _remote_invite_room_id = self._create_remote_invite_room_for_user( + user1_id, None + ) + + # Create a normal room (no room type) + room_id = self.helper.create_room_as(user1_id, tok=user1_tok) + + # Create a space room + space_room_id = self.helper.create_room_as( + user1_id, + tok=user1_tok, + extra_content={ + "creation_content": {EventContentFields.ROOM_TYPE: RoomTypes.SPACE} + }, + ) + + after_rooms_token = self.event_sources.get_current_token() + + # Get the rooms the user should be syncing with + sync_room_map = self._get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=None, + to_token=after_rooms_token, + ) + + # Try finding only normal rooms + filtered_room_map = self.get_success( + self.sliding_sync_handler.filter_rooms( + UserID.from_string(user1_id), + sync_room_map, + SlidingSyncConfig.SlidingSyncList.Filters(room_types=[None]), + after_rooms_token, + ) + ) + + # `remote_invite_room_id` should not appear because we can't figure out what + # room type it is (no stripped state, `unsigned.invite_room_state`) + self.assertEqual(filtered_room_map.keys(), {room_id}) + + # Try finding only spaces + filtered_room_map = self.get_success( + self.sliding_sync_handler.filter_rooms( + UserID.from_string(user1_id), + sync_room_map, + SlidingSyncConfig.SlidingSyncList.Filters(room_types=[RoomTypes.SPACE]), + after_rooms_token, + ) + ) + + # `remote_invite_room_id` should not appear because we can't figure out what + # room type it is (no stripped state, `unsigned.invite_room_state`) + self.assertEqual(filtered_room_map.keys(), {space_room_id}) - This is a regression test. + def test_filter_room_types_with_remote_invite_space(self) -> None: + """ + Test that we can apply a `filter.room_types` filter against a remote invite + to a space room with some `unsigned.invite_room_state` (stripped state). """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + + # Create a remote invite room with some `unsigned.invite_room_state` indicating + # that it is a space room + remote_invite_room_id = self._create_remote_invite_room_for_user( + user1_id, + [ + { + "type": EventTypes.Create, + "state_key": "", + "sender": "@inviter:remote_server", + "content": { + EventContentFields.ROOM_CREATOR: "@inviter:remote_server", + EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, + # Specify that it is a space room + EventContentFields.ROOM_TYPE: RoomTypes.SPACE, + }, + }, + ], + ) + + # Create a normal room (no room type) + room_id = self.helper.create_room_as(user1_id, tok=user1_tok) + # Create a space room + space_room_id = self.helper.create_room_as( + user1_id, + tok=user1_tok, + extra_content={ + "creation_content": {EventContentFields.ROOM_TYPE: RoomTypes.SPACE} + }, + ) + + after_rooms_token = self.event_sources.get_current_token() + + # Get the rooms the user should be syncing with + sync_room_map = self._get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=None, + to_token=after_rooms_token, + ) + + # Try finding only normal rooms + filtered_room_map = self.get_success( + self.sliding_sync_handler.filter_rooms( + UserID.from_string(user1_id), + sync_room_map, + SlidingSyncConfig.SlidingSyncList.Filters(room_types=[None]), + after_rooms_token, + ) + ) + + # `remote_invite_room_id` should not appear here because it is a space room + # according to the stripped state + self.assertEqual(filtered_room_map.keys(), {room_id}) + + # Try finding only spaces + filtered_room_map = self.get_success( + self.sliding_sync_handler.filter_rooms( + UserID.from_string(user1_id), + sync_room_map, + SlidingSyncConfig.SlidingSyncList.Filters(room_types=[RoomTypes.SPACE]), + after_rooms_token, + ) + ) + + # `remote_invite_room_id` should appear here because it is a space room + # according to the stripped state + self.assertEqual( + filtered_room_map.keys(), {space_room_id, remote_invite_room_id} + ) + + def test_filter_encrypted_with_remote_invite_normal_room(self) -> None: + """ + Test that we can apply a `filter.room_types` filter against a remote invite + to a normal room with some `unsigned.invite_room_state` (stripped state). + """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") - remote_invite_room_id = self._create_remote_invite_room_for_user(user1_id, None) + # Create a remote invite room with some `unsigned.invite_room_state` + # but the create event does not specify a room type (normal room) + remote_invite_room_id = self._create_remote_invite_room_for_user( + user1_id, + [ + { + "type": EventTypes.Create, + "state_key": "", + "sender": "@inviter:remote_server", + "content": { + EventContentFields.ROOM_CREATOR: "@inviter:remote_server", + EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, + # No room type means this is a normal room + }, + }, + ], + ) # Create a normal room (no room type) room_id = self.helper.create_room_as(user1_id, tok=user1_tok) + # Create a space room + space_room_id = self.helper.create_room_as( + user1_id, + tok=user1_tok, + extra_content={ + "creation_content": {EventContentFields.ROOM_TYPE: RoomTypes.SPACE} + }, + ) + after_rooms_token = self.event_sources.get_current_token() # Get the rooms the user should be syncing with @@ -3782,19 +3945,34 @@ def test_filter_room_types_with_remote_invite_room(self) -> None: to_token=after_rooms_token, ) + # Try finding only normal rooms filtered_room_map = self.get_success( self.sliding_sync_handler.filter_rooms( UserID.from_string(user1_id), sync_room_map, - SlidingSyncConfig.SlidingSyncList.Filters( - room_types=[None, RoomTypes.SPACE], - ), + SlidingSyncConfig.SlidingSyncList.Filters(room_types=[None]), after_rooms_token, ) ) + # `remote_invite_room_id` should appear here because it is a normal room + # according to the stripped state (no room type) self.assertEqual(filtered_room_map.keys(), {room_id, remote_invite_room_id}) + # Try finding only spaces + filtered_room_map = self.get_success( + self.sliding_sync_handler.filter_rooms( + UserID.from_string(user1_id), + sync_room_map, + SlidingSyncConfig.SlidingSyncList.Filters(room_types=[RoomTypes.SPACE]), + after_rooms_token, + ) + ) + + # `remote_invite_room_id` should not appear here because it is a normal room + # according to the stripped state (no room type) + self.assertEqual(filtered_room_map.keys(), {space_room_id}) + class SortRoomsTestCase(HomeserverTestCase): """ From 4bb34d33ab02a5fdee71c8b387d0658d999f9e49 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 16 Jul 2024 16:18:37 -0500 Subject: [PATCH 12/47] Iterate on sentinels --- synapse/handlers/sliding_sync.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 1a16f28c3c9..1b0dab324a8 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -60,10 +60,16 @@ logger = logging.getLogger(__name__) -class _Sentinel(enum.Enum): - # defining a sentinel in this way allows mypy to correctly handle the - # type of a dictionary lookup and subsequent type narrowing. - sentinel = object() +# Freeze so it's immutable and we can use it as a cache value +@attr.s(slots=True, frozen=True, auto_attribs=True) +class _Sentinel: + pass + + +# Use an enum so mypy handles the type of a dictionary lookup and subsequent type +# narrowing. +class Sentinel(enum.Enum): + UNSET_SENTINEL = _Sentinel() # The event types that clients should consider as new activity. @@ -1207,9 +1213,9 @@ async def _bulk_fetch_stripped_state_for_rooms( # Update our `room_id_to_encryption` map based on the stripped state for room_id in room_ids_without_results: stripped_state = room_id_to_stripped_state_map.get( - room_id, _Sentinel.sentinel + room_id, Sentinel.UNSET_SENTINEL ) - assert stripped_state is not _Sentinel.sentinel, ( + assert stripped_state is not Sentinel.UNSET_SENTINEL, ( f"Stripped state left unset for room {room_id}. " + "Make sure you're calling `_bulk_fetch_stripped_state_for_rooms(...)` " + "with that room_id. (this is a problem with Synapse itself)" @@ -1292,9 +1298,9 @@ async def _bulk_fetch_stripped_state_for_rooms( # Update our `room_id_to_type` map based on the stripped state for room_id in room_ids_without_results: stripped_state = room_id_to_stripped_state_map.get( - room_id, _Sentinel.sentinel + room_id, Sentinel.UNSET_SENTINEL ) - assert stripped_state is not _Sentinel.sentinel, ( + assert stripped_state is not Sentinel.UNSET_SENTINEL, ( f"Stripped state left unset for room {room_id}. " + "Make sure you're calling `_bulk_fetch_stripped_state_for_rooms(...)` " + "with that room_id. (this is a problem with Synapse itself)" From a07ee22e8697806104ce9a5ac018c9eab2888ebe Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 16 Jul 2024 16:54:11 -0500 Subject: [PATCH 13/47] Simply sentinel for now --- synapse/handlers/sliding_sync.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 1b0dab324a8..3a20cf6aa91 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -60,16 +60,10 @@ logger = logging.getLogger(__name__) -# Freeze so it's immutable and we can use it as a cache value -@attr.s(slots=True, frozen=True, auto_attribs=True) -class _Sentinel: - pass - - -# Use an enum so mypy handles the type of a dictionary lookup and subsequent type -# narrowing. class Sentinel(enum.Enum): - UNSET_SENTINEL = _Sentinel() + # defining a sentinel in this way allows mypy to correctly handle the + # type of a dictionary lookup and subsequent type narrowing. + UNSET_SENTINEL = object() # The event types that clients should consider as new activity. From 27a97a068d78f01b0cc13c904789ba639fc7c820 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 16 Jul 2024 17:05:40 -0500 Subject: [PATCH 14/47] fetch -> get --- synapse/handlers/sliding_sync.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 3a20cf6aa91..e31556aed24 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -1102,7 +1102,7 @@ async def filter_rooms( """ room_id_to_stripped_state_map: Dict[str, Optional[List[Any]]] = {} - async def _bulk_fetch_stripped_state_for_rooms( + async def _bulk_get_stripped_state_for_rooms( room_ids: StrCollection, ) -> None: """ @@ -1202,7 +1202,7 @@ async def _bulk_fetch_stripped_state_for_rooms( room_ids_without_results = filtered_room_id_set.difference( room_ids_with_results ) - await _bulk_fetch_stripped_state_for_rooms(room_ids_without_results) + await _bulk_get_stripped_state_for_rooms(room_ids_without_results) # Update our `room_id_to_encryption` map based on the stripped state for room_id in room_ids_without_results: @@ -1211,7 +1211,7 @@ async def _bulk_fetch_stripped_state_for_rooms( ) assert stripped_state is not Sentinel.UNSET_SENTINEL, ( f"Stripped state left unset for room {room_id}. " - + "Make sure you're calling `_bulk_fetch_stripped_state_for_rooms(...)` " + + "Make sure you're calling `_bulk_get_stripped_state_for_rooms(...)` " + "with that room_id. (this is a problem with Synapse itself)" ) @@ -1287,7 +1287,7 @@ async def _bulk_fetch_stripped_state_for_rooms( room_ids_without_results = filtered_room_id_set.difference( room_ids_with_results ) - await _bulk_fetch_stripped_state_for_rooms(room_ids_without_results) + await _bulk_get_stripped_state_for_rooms(room_ids_without_results) # Update our `room_id_to_type` map based on the stripped state for room_id in room_ids_without_results: @@ -1296,7 +1296,7 @@ async def _bulk_fetch_stripped_state_for_rooms( ) assert stripped_state is not Sentinel.UNSET_SENTINEL, ( f"Stripped state left unset for room {room_id}. " - + "Make sure you're calling `_bulk_fetch_stripped_state_for_rooms(...)` " + + "Make sure you're calling `_bulk_get_stripped_state_for_rooms(...)` " + "with that room_id. (this is a problem with Synapse itself)" ) From 517bfc4fb1d6300d12ea95d69fcbad488ca80bd6 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 16 Jul 2024 17:11:34 -0500 Subject: [PATCH 15/47] Update comments --- synapse/handlers/sliding_sync.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index e31556aed24..4189a4cea68 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -1106,7 +1106,12 @@ async def _bulk_get_stripped_state_for_rooms( room_ids: StrCollection, ) -> None: """ - Fetch stripped state for a list of rooms (stripped state is only applicable to invite/knock rooms). + Fetch stripped state for a list of room IDs. Stripped state is only + applicable to invite/knock rooms. Other rooms will have `None` as their + stripped state. + + For invite rooms, we pull from `unsigned.invite_room_state`. + For knock rooms, we pull from `unsigned.knock_room_state`. """ # Fetch what we haven't before room_ids_to_fetch = [ @@ -1186,7 +1191,7 @@ async def _bulk_get_stripped_state_for_rooms( # Filter for encrypted rooms if filters.is_encrypted is not None: # Lookup the encryption state from the database. Since this function is - # cached, need to make a mutable copy via `dict(...)`. + # cached, we need to make a mutable copy via `dict(...)`. room_id_to_encryption = dict( await self.store.bulk_get_room_encryption(filtered_room_id_set) ) @@ -1229,7 +1234,8 @@ async def _bulk_get_stripped_state_for_rooms( ) break else: - # Didn't see any encryption events in the stripped state + # Didn't see any encryption events in the stripped state so we + # can assume the room is unencrypted. room_id_to_encryption[room_id] = None # Make a copy so we don't run into an error: `Set changed size during @@ -1271,7 +1277,7 @@ async def _bulk_get_stripped_state_for_rooms( # room type. if filters.room_types is not None or filters.not_room_types is not None: # Lookup the room type from the database. Since this function is - # cached, need to make a mutable copy via `dict(...)`. + # cached, we need to make a mutable copy via `dict(...)`. room_id_to_type = dict( await self.store.bulk_get_room_type(filtered_room_id_set) ) From d22bae4b232e8172ce62be9f4f23e15c9a211898 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 16 Jul 2024 17:15:01 -0500 Subject: [PATCH 16/47] Fix typo --- synapse/storage/databases/main/state.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py index d75ac2ad4f2..9d7810c44c5 100644 --- a/synapse/storage/databases/main/state.py +++ b/synapse/storage/databases/main/state.py @@ -490,8 +490,8 @@ async def bulk_get_room_encryption( for encryption_event_id in encryption_event_ids: encryption_event = encryption_event_map.get(encryption_event_id) - # If the state curent state says there is an encryption event, we should - # have it in the database. + # If the curent state says there is an encryption event, we should have it + # in the database. assert encryption_event is not None results[encryption_event.room_id] = encryption_event.content.get( From 8624fb0df66951e0226ffe9c76abdbdab2f76a8d Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 16 Jul 2024 17:18:20 -0500 Subject: [PATCH 17/47] Better docstring --- tests/handlers/test_sliding_sync.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index b6d93acb451..fe2b89d7a50 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -3099,12 +3099,17 @@ def _create_remote_invite_room_for_user( self, invitee_user_id: str, unsigned_invite_room_state: Optional[List[JsonDict]] ) -> str: """ - Create a fake remote invite and persist it. + Create a fake invite for a remote room and persist it. + + We don't have any state for these kind of rooms and can only rely on the + stripped state included in the unsigned portion of the invite event to identify + the room. Args: invitee_user_id: The person being invited unsigned_invite_room_state: List of stripped state events to assist the - receiver in identifying the room. Use the `strip_event(...)` helper. + receiver in identifying the room. Stripped state events have `type`, + `state_key`, `sender`, `content`. Returns: The room ID of the remote invite room From a77b70e55ee921a0c04c3f6fc31491369affad60 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 16 Jul 2024 17:19:40 -0500 Subject: [PATCH 18/47] Use constants --- tests/handlers/test_sliding_sync.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index fe2b89d7a50..809d524cb65 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -3357,8 +3357,8 @@ def test_filter_encrypted_with_remote_invite_encrypted_room(self) -> None: "state_key": "", "sender": "@inviter:remote_server", "content": { - "creator": "@inviter:remote_server", - "room_version": RoomVersions.V10.identifier, + EventContentFields.ROOM_CREATOR: "@inviter:remote_server", + EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, }, }, { @@ -3445,8 +3445,8 @@ def test_filter_encrypted_with_remote_invite_unencrypted_room(self) -> None: "state_key": "", "sender": "@inviter:remote_server", "content": { - "creator": "@inviter:remote_server", - "room_version": RoomVersions.V10.identifier, + EventContentFields.ROOM_CREATOR: "@inviter:remote_server", + EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, }, }, # No room encryption event From df5093bff0fb64f6af3ae5eeff18d6198f5263d0 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 18 Jul 2024 11:47:05 -0500 Subject: [PATCH 19/47] Prefer not implementing if not used See https://github.com/element-hq/synapse/pull/17450#discussion_r1682608566 --- synapse/storage/databases/main/state.py | 74 +------------------------ 1 file changed, 2 insertions(+), 72 deletions(-) diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py index 9d7810c44c5..603d4af5633 100644 --- a/synapse/storage/databases/main/state.py +++ b/synapse/storage/databases/main/state.py @@ -308,32 +308,7 @@ async def get_create_event_for_room(self, room_id: str) -> EventBase: @cached(max_entries=10000) async def get_room_type(self, room_id: str) -> Optional[str]: - """ - Get the room type for a given room. - - Returns: - The room type if known (`None` is a valid room type) - - Raises: - NotFoundError if the room is unknown - """ - - row = await self.db_pool.simple_select_one( - table="room_stats_state", - keyvalues={"room_id": room_id}, - retcols=("room_type",), - allow_none=True, - desc="get_room_type", - ) - - if row is not None: - return row[0] - - # If we haven't updated `room_stats_state` with the room yet, query the - # create event directly. - create_event = await self.get_create_event_for_room(room_id) - room_type = create_event.content.get(EventContentFields.ROOM_TYPE) - return room_type + raise NotImplementedError() @cachedList(cached_method_name="get_room_type", list_name="room_ids") async def bulk_get_room_type( @@ -380,52 +355,7 @@ async def bulk_get_room_type( @cached(max_entries=10000) async def get_room_encryption(self, room_id: str) -> Optional[str]: - """ - Get the encryption algorithm for a given room. - - Returns: - The encryption algorithm if the room is encrypted, otherwise `None`. - - Raises: - NotFoundError if the room is unknown - """ - - row = await self.db_pool.simple_select_one( - table="room_stats_state", - keyvalues={"room_id": room_id}, - retcols=("encryption",), - allow_none=True, - desc="get_room_is_encrypted", - ) - - if row is not None: - return row[0] - - # If we haven't updated `room_stats_state` with the room yet, query the state - # directly. - state_map = await self.get_partial_filtered_current_state_ids( - room_id, - state_filter=StateFilter.from_types( - [ - (EventTypes.Create, ""), - (EventTypes.RoomEncryption, ""), - ] - ), - ) - # We can use the create event as a canary to tell whether the server has seen - # the room before - create_event_id = state_map.get((EventTypes.Create, "")) - encryption_event_id = state_map.get((EventTypes.RoomEncryption, "")) - if create_event_id is None: - raise NotFoundError( - f"Unknown room {room_id} does not have any state to determine room encryption" - ) - - if encryption_event_id is None: - return None - - encryption_event = await self.get_event(encryption_event_id) - return encryption_event.content.get(EventContentFields.ENCRYPTION_ALGORITHM) + raise NotImplementedError() @cachedList(cached_method_name="get_room_encryption", list_name="room_ids") async def bulk_get_room_encryption( From 7c33c8319d44bc1d3849698cc87995f9880aeba8 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 18 Jul 2024 12:33:14 -0500 Subject: [PATCH 20/47] Separate to its own function (`_bulk_get_stripped_state_for_rooms` -> `_bulk_get_stripped_state_for_rooms_from_sync_room_map`) See https://github.com/element-hq/synapse/pull/17450#discussion_r1682611077 --- synapse/handlers/sliding_sync.py | 158 ++++++++++++++++++------------- 1 file changed, 90 insertions(+), 68 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 4189a4cea68..ea9888ce871 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -1079,6 +1079,82 @@ async def check_room_subscription_allowed_for_user( # return None + async def _bulk_get_stripped_state_for_rooms_from_sync_room_map( + self, + room_ids: StrCollection, + sync_room_map: Dict[str, _RoomMembershipForUser], + ) -> Dict[str, Optional[List[Any]]]: + """ + Fetch stripped state for a list of room IDs. Stripped state is only + applicable to invite/knock rooms. Other rooms will have `None` as their + stripped state. + + For invite rooms, we pull from `unsigned.invite_room_state`. + For knock rooms, we pull from `unsigned.knock_room_state`. + + Args: + room_ids: Room IDs to fetch stripped state for + sync_room_map: Dictionary of room IDs to sort along with membership + information in the room at the time of `to_token`. + + Returns: + A dictionary of room IDs to stripped state. + """ + room_id_to_stripped_state_map: Dict[str, Optional[List[Any]]] = {} + + # Fetch what we haven't before + room_ids_to_fetch = [ + room_id + for room_id in room_ids + if room_id not in room_id_to_stripped_state_map + ] + + # Gather a list of event IDs we can grab stripped state from + invite_or_knock_event_ids: List[str] = [] + for room_id in room_ids_to_fetch: + if sync_room_map[room_id].membership in ( + Membership.INVITE, + Membership.KNOCK, + ): + event_id = sync_room_map[room_id].event_id + # If this is an invite/knock then there should be an event_id + assert event_id is not None + invite_or_knock_event_ids.append(event_id) + else: + room_id_to_stripped_state_map[room_id] = None + + invite_or_knock_events = await self.store.get_events(invite_or_knock_event_ids) + for invite_or_knock_event in invite_or_knock_events.values(): + room_id = invite_or_knock_event.room_id + membership = invite_or_knock_event.membership + + if membership == Membership.INVITE: + # Scrutinize unsigned things + invite_room_state = invite_or_knock_event.unsigned.get( + "invite_room_state", None + ) + # `invite_room_state` should be a list of stripped events + if isinstance(invite_room_state, list): + room_id_to_stripped_state_map[room_id] = invite_room_state + else: + room_id_to_stripped_state_map[room_id] = None + elif membership == Membership.KNOCK: + # Scrutinize unsigned things + knock_room_state = invite_or_knock_event.unsigned.get( + "knock_room_state", None + ) + # `knock_room_state` should be a list of stripped events + if isinstance(knock_room_state, list): + room_id_to_stripped_state_map[room_id] = knock_room_state + else: + room_id_to_stripped_state_map[room_id] = None + else: + raise AssertionError( + f"Unexpected membership {membership} (this is a problem with Synapse itself)" + ) + + return room_id_to_stripped_state_map + async def filter_rooms( self, user: UserID, @@ -1102,70 +1178,6 @@ async def filter_rooms( """ room_id_to_stripped_state_map: Dict[str, Optional[List[Any]]] = {} - async def _bulk_get_stripped_state_for_rooms( - room_ids: StrCollection, - ) -> None: - """ - Fetch stripped state for a list of room IDs. Stripped state is only - applicable to invite/knock rooms. Other rooms will have `None` as their - stripped state. - - For invite rooms, we pull from `unsigned.invite_room_state`. - For knock rooms, we pull from `unsigned.knock_room_state`. - """ - # Fetch what we haven't before - room_ids_to_fetch = [ - room_id - for room_id in room_ids - if room_id not in room_id_to_stripped_state_map - ] - - # Gather a list of event IDs we can grab stripped state from - invite_or_knock_event_ids: List[str] = [] - for room_id in room_ids_to_fetch: - if sync_room_map[room_id].membership in ( - Membership.INVITE, - Membership.KNOCK, - ): - event_id = sync_room_map[room_id].event_id - # If this is an invite/knock then there should be an event_id - assert event_id is not None - invite_or_knock_event_ids.append(event_id) - else: - room_id_to_stripped_state_map[room_id] = None - - invite_or_knock_events = await self.store.get_events( - invite_or_knock_event_ids - ) - for invite_or_knock_event in invite_or_knock_events.values(): - room_id = invite_or_knock_event.room_id - membership = invite_or_knock_event.membership - - if membership == Membership.INVITE: - # Scrutinize unsigned things - invite_room_state = invite_or_knock_event.unsigned.get( - "invite_room_state", None - ) - # `invite_room_state` should be a list of stripped events - if isinstance(invite_room_state, list): - room_id_to_stripped_state_map[room_id] = invite_room_state - else: - room_id_to_stripped_state_map[room_id] = None - elif membership == Membership.KNOCK: - # Scrutinize unsigned things - knock_room_state = invite_or_knock_event.unsigned.get( - "knock_room_state", None - ) - # `knock_room_state` should be a list of stripped events - if isinstance(knock_room_state, list): - room_id_to_stripped_state_map[room_id] = knock_room_state - else: - room_id_to_stripped_state_map[room_id] = None - else: - raise AssertionError( - f"Unexpected membership {membership} (this is a problem with Synapse itself)" - ) - filtered_room_id_set = set(sync_room_map.keys()) # Filter for Direct-Message (DM) rooms @@ -1205,9 +1217,14 @@ async def _bulk_get_stripped_state_for_rooms( # person on our server to see the room. The best we can do is look in the # optional stripped state from the invite/knock event. room_ids_without_results = filtered_room_id_set.difference( - room_ids_with_results + chain(room_ids_with_results, room_id_to_stripped_state_map.keys()) ) - await _bulk_get_stripped_state_for_rooms(room_ids_without_results) + room_id_to_stripped_state_map = { + **room_id_to_stripped_state_map, + **await self._bulk_get_stripped_state_for_rooms_from_sync_room_map( + room_ids_without_results, sync_room_map + ), + } # Update our `room_id_to_encryption` map based on the stripped state for room_id in room_ids_without_results: @@ -1291,9 +1308,14 @@ async def _bulk_get_stripped_state_for_rooms( # person on our server to see the room. The best we can do is look in the # optional stripped state from the invite/knock event. room_ids_without_results = filtered_room_id_set.difference( - room_ids_with_results + chain(room_ids_with_results, room_id_to_stripped_state_map.keys()) ) - await _bulk_get_stripped_state_for_rooms(room_ids_without_results) + room_id_to_stripped_state_map = { + **room_id_to_stripped_state_map, + **await self._bulk_get_stripped_state_for_rooms_from_sync_room_map( + room_ids_without_results, sync_room_map + ), + } # Update our `room_id_to_type` map based on the stripped state for room_id in room_ids_without_results: From 474b480daf2c7f62462a769cf6f3a861c029ebf5 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 18 Jul 2024 13:19:22 -0500 Subject: [PATCH 21/47] Use `StateMap` See https://github.com/element-hq/synapse/pull/17450#discussion_r1682617588 --- synapse/events/__init__.py | 19 ++++++ synapse/events/utils.py | 29 ++++++++- synapse/handlers/sliding_sync.py | 105 ++++++++++++++++--------------- 3 files changed, 102 insertions(+), 51 deletions(-) diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index 36e0f47e514..2e56b671f06 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -554,3 +554,22 @@ def relation_from_event(event: EventBase) -> Optional[_EventRelation]: aggregation_key = None return _EventRelation(parent_id, rel_type, aggregation_key) + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class StrippedStateEvent: + """ + A stripped down state event. Usually used for remote invite/knocks so the user can + make an informed decision on whether they want to join. + + Attributes: + type: Event `type` + state_key: Event `state_key` + sender: Event `sender` + content: Event `content` + """ + + type: str + state_key: str + sender: str + content: Dict[str, Any] diff --git a/synapse/events/utils.py b/synapse/events/utils.py index f937fd46980..54f94add4dc 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -49,7 +49,7 @@ from synapse.api.room_versions import RoomVersion from synapse.types import JsonDict, Requester -from . import EventBase, make_event_from_dict +from . import EventBase, StrippedStateEvent, make_event_from_dict if TYPE_CHECKING: from synapse.handlers.relations import BundledAggregations @@ -854,3 +854,30 @@ def strip_event(event: EventBase) -> JsonDict: "content": event.content, "sender": event.sender, } + + +def parse_stripped_state_event(raw_stripped_event: Any) -> Optional[StrippedStateEvent]: + """ + Given a raw value from an event's `unsigned` field, attempt to parse it into a + `StrippedStateEvent`. + """ + if isinstance(raw_stripped_event, dict): + # All of these fields are required + type = raw_stripped_event.get("type") + state_key = raw_stripped_event.get("state_key") + sender = raw_stripped_event.get("sender") + content = raw_stripped_event.get("content") + if ( + isinstance(type, str) + and isinstance(state_key, str) + and isinstance(sender, str) + and isinstance(content, dict) + ): + return StrippedStateEvent( + type=type, + state_key=state_key, + sender=sender, + content=content, + ) + + return None diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index ea9888ce871..5488de3ce68 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -32,8 +32,8 @@ EventTypes, Membership, ) -from synapse.events import EventBase -from synapse.events.utils import strip_event +from synapse.events import EventBase, StrippedStateEvent +from synapse.events.utils import parse_stripped_state_event, strip_event from synapse.handlers.relations import BundledAggregations from synapse.storage.databases.main.roommember import extract_heroes_from_room_summary from synapse.storage.databases.main.state import ROOM_UNKNOWN_SENTINEL @@ -41,6 +41,7 @@ from synapse.storage.roommember import MemberSummary from synapse.types import ( JsonDict, + MutableStateMap, PersistedEventPosition, Requester, RoomStreamToken, @@ -1083,7 +1084,7 @@ async def _bulk_get_stripped_state_for_rooms_from_sync_room_map( self, room_ids: StrCollection, sync_room_map: Dict[str, _RoomMembershipForUser], - ) -> Dict[str, Optional[List[Any]]]: + ) -> Dict[str, Optional[StateMap[StrippedStateEvent]]]: """ Fetch stripped state for a list of room IDs. Stripped state is only applicable to invite/knock rooms. Other rooms will have `None` as their @@ -1098,9 +1099,12 @@ async def _bulk_get_stripped_state_for_rooms_from_sync_room_map( information in the room at the time of `to_token`. Returns: - A dictionary of room IDs to stripped state. + Mapping from room_id to mapping of (type, state_key) to stripped state + event. """ - room_id_to_stripped_state_map: Dict[str, Optional[List[Any]]] = {} + room_id_to_stripped_state_map: Dict[ + str, Optional[StateMap[StrippedStateEvent]] + ] = {} # Fetch what we haven't before room_ids_to_fetch = [ @@ -1128,31 +1132,42 @@ async def _bulk_get_stripped_state_for_rooms_from_sync_room_map( room_id = invite_or_knock_event.room_id membership = invite_or_knock_event.membership + raw_stripped_state_events = None if membership == Membership.INVITE: - # Scrutinize unsigned things invite_room_state = invite_or_knock_event.unsigned.get( - "invite_room_state", None + "invite_room_state" ) - # `invite_room_state` should be a list of stripped events - if isinstance(invite_room_state, list): - room_id_to_stripped_state_map[room_id] = invite_room_state - else: - room_id_to_stripped_state_map[room_id] = None + raw_stripped_state_events = invite_room_state elif membership == Membership.KNOCK: - # Scrutinize unsigned things knock_room_state = invite_or_knock_event.unsigned.get( - "knock_room_state", None + "knock_room_state" ) - # `knock_room_state` should be a list of stripped events - if isinstance(knock_room_state, list): - room_id_to_stripped_state_map[room_id] = knock_room_state - else: - room_id_to_stripped_state_map[room_id] = None + raw_stripped_state_events = knock_room_state else: raise AssertionError( f"Unexpected membership {membership} (this is a problem with Synapse itself)" ) + stripped_state_map: Optional[MutableStateMap[StrippedStateEvent]] = None + # Scrutinize unsigned things. `raw_stripped_state_events` should be a list + # of stripped events + if raw_stripped_state_events is not None: + stripped_state_map = {} + if isinstance(raw_stripped_state_events, list): + for raw_stripped_event in raw_stripped_state_events: + stripped_state_event = parse_stripped_state_event( + raw_stripped_event + ) + if stripped_state_event is not None: + stripped_state_map[ + ( + stripped_state_event.type, + stripped_state_event.state_key, + ) + ] = stripped_state_event + + room_id_to_stripped_state_map[room_id] = stripped_state_map + return room_id_to_stripped_state_map async def filter_rooms( @@ -1176,7 +1191,9 @@ async def filter_rooms( A filtered dictionary of room IDs along with membership information in the room at the time of `to_token`. """ - room_id_to_stripped_state_map: Dict[str, Optional[List[Any]]] = {} + room_id_to_stripped_state_map: Dict[ + str, Optional[StateMap[StrippedStateEvent]] + ] = {} filtered_room_id_set = set(sync_room_map.keys()) @@ -1228,28 +1245,23 @@ async def filter_rooms( # Update our `room_id_to_encryption` map based on the stripped state for room_id in room_ids_without_results: - stripped_state = room_id_to_stripped_state_map.get( + stripped_state_map = room_id_to_stripped_state_map.get( room_id, Sentinel.UNSET_SENTINEL ) - assert stripped_state is not Sentinel.UNSET_SENTINEL, ( + assert stripped_state_map is not Sentinel.UNSET_SENTINEL, ( f"Stripped state left unset for room {room_id}. " + "Make sure you're calling `_bulk_get_stripped_state_for_rooms(...)` " + "with that room_id. (this is a problem with Synapse itself)" ) - if stripped_state is not None: - for event in stripped_state: - event_type = event.get("type") - event_content = event.get("content") - # Scrutinize unsigned stripped state - if isinstance(event_type, str) and isinstance( - event_content, dict - ): - if event_type == EventTypes.RoomEncryption: - room_id_to_encryption[room_id] = event_content.get( - EventContentFields.ENCRYPTION_ALGORITHM - ) - break + if stripped_state_map is not None: + encryption_event = stripped_state_map.get( + (EventTypes.RoomEncryption, "") + ) + if encryption_event is not None: + room_id_to_encryption[room_id] = encryption_event.content.get( + EventContentFields.ENCRYPTION_ALGORITHM + ) else: # Didn't see any encryption events in the stripped state so we # can assume the room is unencrypted. @@ -1319,28 +1331,21 @@ async def filter_rooms( # Update our `room_id_to_type` map based on the stripped state for room_id in room_ids_without_results: - stripped_state = room_id_to_stripped_state_map.get( + stripped_state_map = room_id_to_stripped_state_map.get( room_id, Sentinel.UNSET_SENTINEL ) - assert stripped_state is not Sentinel.UNSET_SENTINEL, ( + assert stripped_state_map is not Sentinel.UNSET_SENTINEL, ( f"Stripped state left unset for room {room_id}. " + "Make sure you're calling `_bulk_get_stripped_state_for_rooms(...)` " + "with that room_id. (this is a problem with Synapse itself)" ) - if stripped_state is not None: - for event in stripped_state: - event_type = event.get("type") - event_content = event.get("content") - # Scrutinize unsigned stripped state - if isinstance(event_type, str) and isinstance( - event_content, dict - ): - if event_type == EventTypes.Create: - room_id_to_type[room_id] = event_content.get( - EventContentFields.ROOM_TYPE - ) - break + if stripped_state_map is not None: + create_event = stripped_state_map.get((EventTypes.Create, "")) + if create_event is not None: + room_id_to_type[room_id] = create_event.content.get( + EventContentFields.ROOM_TYPE + ) # Make a copy so we don't run into an error: `Set changed size during # iteration`, when we filter out and remove items From 82fa0375422f4f5680e6908b62af86d359197f6e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 18 Jul 2024 13:21:30 -0500 Subject: [PATCH 22/47] Fix function name in assertion message --- synapse/handlers/sliding_sync.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 5488de3ce68..798250837c0 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -1250,7 +1250,7 @@ async def filter_rooms( ) assert stripped_state_map is not Sentinel.UNSET_SENTINEL, ( f"Stripped state left unset for room {room_id}. " - + "Make sure you're calling `_bulk_get_stripped_state_for_rooms(...)` " + + "Make sure you're calling `_bulk_get_stripped_state_for_rooms_from_sync_room_map(...)` " + "with that room_id. (this is a problem with Synapse itself)" ) @@ -1336,7 +1336,7 @@ async def filter_rooms( ) assert stripped_state_map is not Sentinel.UNSET_SENTINEL, ( f"Stripped state left unset for room {room_id}. " - + "Make sure you're calling `_bulk_get_stripped_state_for_rooms(...)` " + + "Make sure you're calling `_bulk_get_stripped_state_for_rooms_from_sync_room_map(...)` " + "with that room_id. (this is a problem with Synapse itself)" ) From 6f05bb3e03fbc27a8566bd1a0d3a0cd16ce9cfa7 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 18 Jul 2024 13:46:28 -0500 Subject: [PATCH 23/47] Add tests for when the server leaves the room See https://github.com/element-hq/synapse/pull/17450#discussion_r1682621262 --- tests/handlers/test_sliding_sync.py | 116 ++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index 809d524cb65..d8506c3e6fb 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -3273,6 +3273,66 @@ def test_filter_encrypted_rooms(self) -> None: self.assertEqual(falsy_filtered_room_map.keys(), {room_id}) + def test_filter_encrypted_server_left_room(self) -> None: + """ + Test that we can apply a `filter.is_encrypted` against a room that everyone has left. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + + # Create an unencrypted room + room_id = self.helper.create_room_as(user1_id, tok=user1_tok) + # Leave the room + self.helper.leave(room_id, user1_id, tok=user1_tok) + + # Create an encrypted room + encrypted_room_id = self.helper.create_room_as(user1_id, tok=user1_tok) + self.helper.send_state( + encrypted_room_id, + EventTypes.RoomEncryption, + {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, + tok=user1_tok, + ) + # Leave the room + self.helper.leave(encrypted_room_id, user1_id, tok=user1_tok) + + after_rooms_token = self.event_sources.get_current_token() + + # Get the rooms the user should be syncing with + sync_room_map = self._get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=None, + to_token=after_rooms_token, + ) + + # Try with `is_encrypted=True` + truthy_filtered_room_map = self.get_success( + self.sliding_sync_handler.filter_rooms( + UserID.from_string(user1_id), + sync_room_map, + SlidingSyncConfig.SlidingSyncList.Filters( + is_encrypted=True, + ), + after_rooms_token, + ) + ) + + self.assertEqual(truthy_filtered_room_map.keys(), {encrypted_room_id}) + + # Try with `is_encrypted=False` + falsy_filtered_room_map = self.get_success( + self.sliding_sync_handler.filter_rooms( + UserID.from_string(user1_id), + sync_room_map, + SlidingSyncConfig.SlidingSyncList.Filters( + is_encrypted=False, + ), + after_rooms_token, + ) + ) + + self.assertEqual(falsy_filtered_room_map.keys(), {room_id}) + def test_filter_encrypted_with_remote_invite_room_no_stripped_state(self) -> None: """ Test that we can apply a `filter.is_encrypted` filter against a remote invite @@ -3763,6 +3823,62 @@ def test_filter_not_room_types(self) -> None: self.assertEqual(filtered_room_map.keys(), {space_room_id}) + def test_filter_room_types_server_left_room(self) -> None: + """ + Test that we can apply a `filter.room_types` against a room that everyone has left. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + + # Create a normal room (no room type) + room_id = self.helper.create_room_as(user1_id, tok=user1_tok) + # Leave the room + self.helper.leave(room_id, user1_id, tok=user1_tok) + + # Create a space room + space_room_id = self.helper.create_room_as( + user1_id, + tok=user1_tok, + extra_content={ + "creation_content": {EventContentFields.ROOM_TYPE: RoomTypes.SPACE} + }, + ) + # Leave the room + self.helper.leave(space_room_id, user1_id, tok=user1_tok) + + after_rooms_token = self.event_sources.get_current_token() + + # Get the rooms the user should be syncing with + sync_room_map = self._get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=None, + to_token=after_rooms_token, + ) + + # Try finding only normal rooms + filtered_room_map = self.get_success( + self.sliding_sync_handler.filter_rooms( + UserID.from_string(user1_id), + sync_room_map, + SlidingSyncConfig.SlidingSyncList.Filters(room_types=[None]), + after_rooms_token, + ) + ) + + self.assertEqual(filtered_room_map.keys(), {room_id}) + + # Try finding only spaces + filtered_room_map = self.get_success( + self.sliding_sync_handler.filter_rooms( + UserID.from_string(user1_id), + sync_room_map, + SlidingSyncConfig.SlidingSyncList.Filters(room_types=[RoomTypes.SPACE]), + after_rooms_token, + ) + ) + + self.assertEqual(filtered_room_map.keys(), {space_room_id}) + def test_filter_room_types_with_remote_invite_room_no_stripped_state(self) -> None: """ Test that we can apply a `filter.room_types` filter against a remote invite From d4c4de12cc161d6c05ab3e9d30881bfa96abdd31 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 18 Jul 2024 13:58:01 -0500 Subject: [PATCH 24/47] Ideally, not just current state See https://github.com/element-hq/synapse/pull/17450#discussion_r1682621262 --- synapse/storage/databases/main/state.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py index 603d4af5633..83907653346 100644 --- a/synapse/storage/databases/main/state.py +++ b/synapse/storage/databases/main/state.py @@ -342,6 +342,9 @@ async def bulk_get_room_type( results = dict(rows) for room_id in room_ids - results.keys(): try: + # FIXME: Currently, this grabs the create event from the current state + # which will be cleared if the server is no longer in the room. We should + # make this work even if the server is no longer in the room. create_event = await self.get_create_event_for_room(room_id) room_type = create_event.content.get(EventContentFields.ROOM_TYPE) results[room_id] = room_type @@ -390,6 +393,9 @@ async def bulk_get_room_encryption( results = dict(rows) encryption_event_ids: List[str] = [] for room_id in room_ids - results.keys(): + # FIXME: Currently, this grabs the create event from the current state + # which will be cleared if the server is no longer in the room. We should + # make this work even if the server is no longer in the room. state_map = await self.get_partial_filtered_current_state_ids( room_id, state_filter=StateFilter.from_types( From 0ace82de247ae92001f04558984140a65fe24786 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 18 Jul 2024 18:27:23 -0500 Subject: [PATCH 25/47] Add `test_filter_encrypted_after_we_left` --- synapse/storage/databases/main/state.py | 10 +--- tests/handlers/test_sliding_sync.py | 72 +++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 8 deletions(-) diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py index 83907653346..af99a264066 100644 --- a/synapse/storage/databases/main/state.py +++ b/synapse/storage/databases/main/state.py @@ -315,7 +315,7 @@ async def bulk_get_room_type( self, room_ids: Set[str] ) -> Mapping[str, Union[Optional[str], Sentinel]]: """ - Bulk fetch room types for the given rooms. + Bulk fetch room types for the given rooms (via current state). Since this function is cached, any missing values would be cached as `None`. In order to distinguish between an unencrypted room that has `None` encryption and @@ -342,9 +342,6 @@ async def bulk_get_room_type( results = dict(rows) for room_id in room_ids - results.keys(): try: - # FIXME: Currently, this grabs the create event from the current state - # which will be cleared if the server is no longer in the room. We should - # make this work even if the server is no longer in the room. create_event = await self.get_create_event_for_room(room_id) room_type = create_event.content.get(EventContentFields.ROOM_TYPE) results[room_id] = room_type @@ -365,7 +362,7 @@ async def bulk_get_room_encryption( self, room_ids: Set[str] ) -> Mapping[str, Union[Optional[str], Sentinel]]: """ - Bulk fetch room encryption for the given rooms. + Bulk fetch room encryption for the given rooms (via current state). Since this function is cached, any missing values would be cached as `None`. In order to distinguish between an unencrypted room that has `None` encryption and @@ -393,9 +390,6 @@ async def bulk_get_room_encryption( results = dict(rows) encryption_event_ids: List[str] = [] for room_id in room_ids - results.keys(): - # FIXME: Currently, this grabs the create event from the current state - # which will be cleared if the server is no longer in the room. We should - # make this work even if the server is no longer in the room. state_map = await self.get_partial_filtered_current_state_ids( room_id, state_filter=StateFilter.from_types( diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index d8506c3e6fb..ae7117338c8 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -3333,6 +3333,78 @@ def test_filter_encrypted_server_left_room(self) -> None: self.assertEqual(falsy_filtered_room_map.keys(), {room_id}) + def test_filter_encrypted_after_we_left(self) -> None: + """ + Test that we can apply a `filter.is_encrypted` against a room that was encrypted + after we left the room (make sure we don't just use the current state) + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + # Create an unencrypted room + room_id = self.helper.create_room_as(user2_id, tok=user2_tok) + # Leave the room + self.helper.join(room_id, user1_id, tok=user1_tok) + self.helper.leave(room_id, user1_id, tok=user1_tok) + + # Create a room that will be encrypted + encrypted_after_we_left_room_id = self.helper.create_room_as( + user2_id, tok=user2_tok + ) + # Leave the room + self.helper.join(encrypted_after_we_left_room_id, user1_id, tok=user1_tok) + self.helper.leave(encrypted_after_we_left_room_id, user1_id, tok=user1_tok) + + # Encrypt the room after we've left + self.helper.send_state( + encrypted_after_we_left_room_id, + EventTypes.RoomEncryption, + {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, + tok=user2_tok, + ) + + after_rooms_token = self.event_sources.get_current_token() + + # Get the rooms the user should be syncing with + sync_room_map = self._get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=None, + to_token=after_rooms_token, + ) + + # Try with `is_encrypted=True` + truthy_filtered_room_map = self.get_success( + self.sliding_sync_handler.filter_rooms( + UserID.from_string(user1_id), + sync_room_map, + SlidingSyncConfig.SlidingSyncList.Filters( + is_encrypted=True, + ), + after_rooms_token, + ) + ) + + self.assertEqual(truthy_filtered_room_map.keys(), {}) + + # Try with `is_encrypted=False` + falsy_filtered_room_map = self.get_success( + self.sliding_sync_handler.filter_rooms( + UserID.from_string(user1_id), + sync_room_map, + SlidingSyncConfig.SlidingSyncList.Filters( + is_encrypted=False, + ), + after_rooms_token, + ) + ) + + # At the time of us leaving, both rooms were unencrypted + self.assertEqual( + falsy_filtered_room_map.keys(), {room_id, encrypted_after_we_left_room_id} + ) + def test_filter_encrypted_with_remote_invite_room_no_stripped_state(self) -> None: """ Test that we can apply a `filter.is_encrypted` filter against a remote invite From 8c97eaaa29cd06650ca935f7e5038f9f54e874db Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 18 Jul 2024 20:00:45 -0500 Subject: [PATCH 26/47] Try get server left rooms (not working) --- synapse/handlers/sliding_sync.py | 56 +++++++++++++++++++++---- synapse/storage/databases/main/state.py | 1 - tests/handlers/test_sliding_sync.py | 4 +- 3 files changed, 52 insertions(+), 9 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 798250837c0..e5c17195f96 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -1219,6 +1219,13 @@ async def filter_rooms( # Filter for encrypted rooms if filters.is_encrypted is not None: + # As a bulk shortcut, lookup the encryption state for the room based on the + # current state. Ideally, for leave/ban rooms, we would at the state at the + # time of the membership instead of current state to not leak anything but + # we consider the room encryption status to not be a secret given it's often + # set at the start of the room and it's one of the stripped state events + # that is normally handed out. + # # Lookup the encryption state from the database. Since this function is # cached, we need to make a mutable copy via `dict(...)`. room_id_to_encryption = dict( @@ -1230,9 +1237,9 @@ async def filter_rooms( if encryption is not ROOM_UNKNOWN_SENTINEL ] - # We might not have room state for remote invite/knocks if we are the first - # person on our server to see the room. The best we can do is look in the - # optional stripped state from the invite/knock event. + # We might not have current room state for remote invite/knocks if we are + # the first person on our server to see the room. The best we can do is look + # in the optional stripped state from the invite/knock event. room_ids_without_results = filtered_room_id_set.difference( chain(room_ids_with_results, room_id_to_stripped_state_map.keys()) ) @@ -1255,18 +1262,53 @@ async def filter_rooms( ) if stripped_state_map is not None: - encryption_event = stripped_state_map.get( + encryption_stripped_event = stripped_state_map.get( (EventTypes.RoomEncryption, "") ) - if encryption_event is not None: - room_id_to_encryption[room_id] = encryption_event.content.get( - EventContentFields.ENCRYPTION_ALGORITHM + if encryption_stripped_event is not None: + room_id_to_encryption[room_id] = ( + encryption_stripped_event.content.get( + EventContentFields.ENCRYPTION_ALGORITHM + ) ) else: # Didn't see any encryption events in the stripped state so we # can assume the room is unencrypted. room_id_to_encryption[room_id] = None + # Last resort, we might not have current room state for rooms that the + # server has left (everyone local has left the room). + left_room_ids_without_results = [ + room_id + for room_id in room_ids_without_results + if sync_room_map[room_id].membership + in (Membership.LEAVE, Membership.BAN) + ] + logger.info( + "left_room_ids_without_results %s", left_room_ids_without_results + ) + + # Update our `room_id_to_encryption` map based on the state at the time of + # the leave/ban membership event. + for room_id in left_room_ids_without_results: + room_state = await self.get_current_state_at( + room_id=room_id, + room_membership_for_user_at_to_token=sync_room_map[room_id], + state_filter=StateFilter.from_types( + [(EventTypes.RoomEncryption, "")] + ), + to_token=to_token, + ) + encryption_event = room_state.get((EventTypes.RoomEncryption, "")) + if encryption_event is not None: + room_id_to_encryption[room_id] = encryption_event.content.get( + EventContentFields.ENCRYPTION_ALGORITHM + ) + else: + # Didn't see any encryption events in the state so we can assume the + # room is unencrypted. + room_id_to_encryption[room_id] = None + # Make a copy so we don't run into an error: `Set changed size during # iteration`, when we filter out and remove items for room_id in filtered_room_id_set.copy(): diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py index af99a264066..7256f3fd7b5 100644 --- a/synapse/storage/databases/main/state.py +++ b/synapse/storage/databases/main/state.py @@ -375,7 +375,6 @@ async def bulk_get_room_encryption( encrypted, otherwise `None`. Rooms unknown to this server will return `ROOM_UNKNOWN_SENTINEL`. """ - rows = await self.db_pool.simple_select_many_batch( table="room_stats_state", column="room_id", diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index ae7117338c8..e6f8e700eb0 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -3280,6 +3280,8 @@ def test_filter_encrypted_server_left_room(self) -> None: user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") + before_rooms_token = self.event_sources.get_current_token() + # Create an unencrypted room room_id = self.helper.create_room_as(user1_id, tok=user1_tok) # Leave the room @@ -3301,7 +3303,7 @@ def test_filter_encrypted_server_left_room(self) -> None: # Get the rooms the user should be syncing with sync_room_map = self._get_sync_room_ids_for_user( UserID.from_string(user1_id), - from_token=None, + from_token=before_rooms_token, to_token=after_rooms_token, ) From 8dd5a473c2f68e2c68256a073f3077010a915fe1 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 18 Jul 2024 23:40:38 -0500 Subject: [PATCH 27/47] Implement `is_local_host_in_rooms(...)` --- synapse/handlers/sliding_sync.py | 66 +++++++++----- synapse/storage/databases/main/roommember.py | 50 ++++++++++- tests/handlers/test_sliding_sync.py | 91 ++++++++++++++++++-- 3 files changed, 178 insertions(+), 29 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index e5c17195f96..3c6226a694f 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -1219,39 +1219,64 @@ async def filter_rooms( # Filter for encrypted rooms if filters.is_encrypted is not None: + localhost_participating_room_ids: set[str] = { + room_id + for room_id in filtered_room_id_set + # We already know the server is participating because this user is + # joined + if sync_room_map[room_id].membership == Membership.JOIN + } + + # Check if the server is participating in the room which we can use + # to derive whether we can rely on current state for this room. + room_id_to_server_participating = await self.store.is_local_host_in_rooms( + # We only need to check the rooms that we don't know are participating + filtered_room_id_set.difference(localhost_participating_room_ids), + ) + + localhost_not_participating_room_ids = set() + for room_id, is_participating in room_id_to_server_participating.items(): + if is_participating: + localhost_participating_room_ids.add(room_id) + else: + localhost_not_participating_room_ids.add(room_id) + # As a bulk shortcut, lookup the encryption state for the room based on the - # current state. Ideally, for leave/ban rooms, we would at the state at the + # current state if the server is particpating in the room (meaning we have + # current state). Ideally, for leave/ban rooms, we would at the state at the # time of the membership instead of current state to not leak anything but # we consider the room encryption status to not be a secret given it's often # set at the start of the room and it's one of the stripped state events # that is normally handed out. # - # Lookup the encryption state from the database. Since this function is - # cached, we need to make a mutable copy via `dict(...)`. + # Since this function is cached, we need to make a mutable copy via + # `dict(...)`. + # + # TODO: It's possible we race from the time we checked if the server is + # participating in the room and the time we fetch the encryption state here + # which may be unset now because the server left in the time in between. We + # can't really easily tell if this happened. We could do both queries in the + # same transaction. room_id_to_encryption = dict( - await self.store.bulk_get_room_encryption(filtered_room_id_set) + await self.store.bulk_get_room_encryption( + localhost_participating_room_ids + ) ) - room_ids_with_results = [ - room_id - for room_id, encryption in room_id_to_encryption.items() - if encryption is not ROOM_UNKNOWN_SENTINEL - ] # We might not have current room state for remote invite/knocks if we are # the first person on our server to see the room. The best we can do is look # in the optional stripped state from the invite/knock event. - room_ids_without_results = filtered_room_id_set.difference( - chain(room_ids_with_results, room_id_to_stripped_state_map.keys()) - ) room_id_to_stripped_state_map = { **room_id_to_stripped_state_map, **await self._bulk_get_stripped_state_for_rooms_from_sync_room_map( - room_ids_without_results, sync_room_map + localhost_not_participating_room_ids, sync_room_map ), } # Update our `room_id_to_encryption` map based on the stripped state - for room_id in room_ids_without_results: + # (applies to invite/knock rooms) + rooms_ids_without_stripped_state: set[str] = [] + for room_id in localhost_not_participating_room_ids: stripped_state_map = room_id_to_stripped_state_map.get( room_id, Sentinel.UNSET_SENTINEL ) @@ -1275,18 +1300,15 @@ async def filter_rooms( # Didn't see any encryption events in the stripped state so we # can assume the room is unencrypted. room_id_to_encryption[room_id] = None + else: + rooms_ids_without_stripped_state.add(room_id) # Last resort, we might not have current room state for rooms that the - # server has left (everyone local has left the room). + # server has left (no one local is in the room) but we can look at the + # historical state. left_room_ids_without_results = [ - room_id - for room_id in room_ids_without_results - if sync_room_map[room_id].membership - in (Membership.LEAVE, Membership.BAN) + room_id for room_id in rooms_ids_without_stripped_state ] - logger.info( - "left_room_ids_without_results %s", left_room_ids_without_results - ) # Update our `room_id_to_encryption` map based on the state at the time of # the leave/ban membership event. diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 5d2fd08495c..c0627198dc9 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -49,7 +49,7 @@ ) from synapse.storage.databases.main.cache import CacheInvalidationWorkerStore from synapse.storage.databases.main.events_worker import EventsWorkerStore -from synapse.storage.engines import Sqlite3Engine +from synapse.storage.engines import PostgresEngine, Sqlite3Engine from synapse.storage.roommember import MemberSummary, ProfileInfo, RoomsForUser from synapse.types import ( JsonDict, @@ -1281,6 +1281,54 @@ def _is_local_host_in_room_ignoring_users_txn( _is_local_host_in_room_ignoring_users_txn, ) + async def is_local_host_in_rooms( + self, room_ids: Sequence[str] + ) -> Mapping[str, bool]: + """ + Check if there are any local users joined to the given rooms + + Returns: + A mapping from room_id to whether there are any local users in the room. + """ + + def txn( + txn: LoggingTransaction, + ) -> Mapping[str, bool]: + if isinstance(self.database_engine, PostgresEngine): + sql = """ + WITH room_ids AS ( + SELECT unnest(ARRAY[%s]::text[]) AS room_id + ) + SELECT x.* + FROM room_ids, + LATERAL ( + SELECT room_id FROM local_current_membership + WHERE room_id = room_ids.room_id AND membership = ? + LIMIT 1 + ) AS x + """ % ( + ",".join("?" for _ in room_ids) + ) + elif isinstance(self.database_engine, Sqlite3Engine): + # TODO: Handle SQLite + raise NotImplementedError() + else: + # This should be unreachable. + raise AssertionError("Unrecognized database engine") + + txn.execute(sql, (*room_ids, Membership.JOIN)) + + room_id_to_server_participating = {room_id: False for room_id in room_ids} + for row in txn: + room_id_to_server_participating[row[0]] = True + + return room_id_to_server_participating + + return await self.db_pool.runInteraction( + "is_local_host_in_rooms", + txn, + ) + async def forget(self, user_id: str, room_id: str) -> None: """Indicate that user_id wishes to discard history for room_id.""" diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index e6f8e700eb0..cd5befe1732 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -3335,6 +3335,78 @@ def test_filter_encrypted_server_left_room(self) -> None: self.assertEqual(falsy_filtered_room_map.keys(), {room_id}) + def test_filter_encrypted_server_left_room2(self) -> None: + """ + Test that we can apply a `filter.is_encrypted` against a room that everyone has + left. + + There is still someone local who is invited to the rooms but that doesn't affect + whether the server is participating in the room. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + _user2_tok = self.login(user2_id, "pass") + + before_rooms_token = self.event_sources.get_current_token() + + # Create an unencrypted room + room_id = self.helper.create_room_as(user1_id, tok=user1_tok) + # Invite user2 + self.helper.invite(room_id, targ=user2_id, tok=user1_tok) + # User1 leaves the room + self.helper.leave(room_id, user1_id, tok=user1_tok) + + # Create an encrypted room + encrypted_room_id = self.helper.create_room_as(user1_id, tok=user1_tok) + self.helper.send_state( + encrypted_room_id, + EventTypes.RoomEncryption, + {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, + tok=user1_tok, + ) + # Invite user2 + self.helper.invite(encrypted_room_id, targ=user2_id, tok=user1_tok) + # User1 leaves the room + self.helper.leave(encrypted_room_id, user1_id, tok=user1_tok) + + after_rooms_token = self.event_sources.get_current_token() + + # Get the rooms the user should be syncing with + sync_room_map = self._get_sync_room_ids_for_user( + UserID.from_string(user1_id), + from_token=before_rooms_token, + to_token=after_rooms_token, + ) + + # Try with `is_encrypted=True` + truthy_filtered_room_map = self.get_success( + self.sliding_sync_handler.filter_rooms( + UserID.from_string(user1_id), + sync_room_map, + SlidingSyncConfig.SlidingSyncList.Filters( + is_encrypted=True, + ), + after_rooms_token, + ) + ) + + self.assertEqual(truthy_filtered_room_map.keys(), {encrypted_room_id}) + + # Try with `is_encrypted=False` + falsy_filtered_room_map = self.get_success( + self.sliding_sync_handler.filter_rooms( + UserID.from_string(user1_id), + sync_room_map, + SlidingSyncConfig.SlidingSyncList.Filters( + is_encrypted=False, + ), + after_rooms_token, + ) + ) + + self.assertEqual(falsy_filtered_room_map.keys(), {room_id}) + def test_filter_encrypted_after_we_left(self) -> None: """ Test that we can apply a `filter.is_encrypted` against a room that was encrypted @@ -3345,6 +3417,8 @@ def test_filter_encrypted_after_we_left(self) -> None: user2_id = self.register_user("user2", "pass") user2_tok = self.login(user2_id, "pass") + before_rooms_token = self.event_sources.get_current_token() + # Create an unencrypted room room_id = self.helper.create_room_as(user2_id, tok=user2_tok) # Leave the room @@ -3372,7 +3446,7 @@ def test_filter_encrypted_after_we_left(self) -> None: # Get the rooms the user should be syncing with sync_room_map = self._get_sync_room_ids_for_user( UserID.from_string(user1_id), - from_token=None, + from_token=before_rooms_token, to_token=after_rooms_token, ) @@ -3388,7 +3462,14 @@ def test_filter_encrypted_after_we_left(self) -> None: ) ) - self.assertEqual(truthy_filtered_room_map.keys(), {}) + # Even though we left the room before it was encrypted, we still see it because + # someone else on our server is still participating in the room and we "leak" + # the current state to the left user. But we consider the room encryption status + # to not be a secret given it's often set at the start of the room and it's one + # of the stripped state events that is normally handed out. + self.assertEqual( + truthy_filtered_room_map.keys(), {encrypted_after_we_left_room_id} + ) # Try with `is_encrypted=False` falsy_filtered_room_map = self.get_success( @@ -3402,10 +3483,8 @@ def test_filter_encrypted_after_we_left(self) -> None: ) ) - # At the time of us leaving, both rooms were unencrypted - self.assertEqual( - falsy_filtered_room_map.keys(), {room_id, encrypted_after_we_left_room_id} - ) + # Even though we left the room before it was encrypted... (see comment above) + self.assertEqual(falsy_filtered_room_map.keys(), {room_id}) def test_filter_encrypted_with_remote_invite_room_no_stripped_state(self) -> None: """ From 5267b03424f37eaeb6738ad8b9a0faf42076d7bb Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 19 Jul 2024 13:36:44 -0500 Subject: [PATCH 28/47] Start of join room_stats_current --- synapse/handlers/sliding_sync.py | 2 +- synapse/storage/databases/main/roommember.py | 2 +- synapse/storage/databases/main/state.py | 40 ++++++++++++++++---- 3 files changed, 35 insertions(+), 9 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 3c6226a694f..98f3de3e545 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -1275,7 +1275,7 @@ async def filter_rooms( # Update our `room_id_to_encryption` map based on the stripped state # (applies to invite/knock rooms) - rooms_ids_without_stripped_state: set[str] = [] + rooms_ids_without_stripped_state: Set[str] = set() for room_id in localhost_not_participating_room_ids: stripped_state_map = room_id_to_stripped_state_map.get( room_id, Sentinel.UNSET_SENTINEL diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index c0627198dc9..fca866cef31 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -1285,7 +1285,7 @@ async def is_local_host_in_rooms( self, room_ids: Sequence[str] ) -> Mapping[str, bool]: """ - Check if there are any local users joined to the given rooms + Check if there are any local users joined to the given rooms. Returns: A mapping from room_id to whether there are any local users in the room. diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py index 7256f3fd7b5..59a1c20e016 100644 --- a/synapse/storage/databases/main/state.py +++ b/synapse/storage/databases/main/state.py @@ -375,18 +375,44 @@ async def bulk_get_room_encryption( encrypted, otherwise `None`. Rooms unknown to this server will return `ROOM_UNKNOWN_SENTINEL`. """ - rows = await self.db_pool.simple_select_many_batch( - table="room_stats_state", - column="room_id", - iterable=room_ids, - retcols=("room_id", "encryption"), - desc="bulk_get_encryption", + + def txn( + txn: LoggingTransaction, + ) -> Mapping[str, bool]: + clause, args = make_in_list_sql_clause( + txn.database_engine, "room_id", room_ids + ) + + # FIXME: Use `room_stats_current.current_state_events` instead of + # `room_stats_current.joined_members` once + # https://github.com/element-hq/synapse/issues/17457 is fixed. + sql = """ + SELECT room_id, encryption + FROM room_stats_state + INNER JOIN room_stats_current USING (room_id) + WHERE + %s + AND joined_members > 0 + """ % ( + clause + ) + + txn.execute(sql, args) + + room_id_to_encryption_map = {} + for row in txn: + room_id_to_encryption_map[row[0]] = row[1] + + return room_id_to_encryption_map + + results = await self.db_pool.runInteraction( + "bulk_get_room_encryption", + txn, ) # If we haven't updated `room_stats_state` with the room yet, query the state # directly. This should happen only rarely so we don't mind if we do this in a # loop. - results = dict(rows) encryption_event_ids: List[str] = [] for room_id in room_ids - results.keys(): state_map = await self.get_partial_filtered_current_state_ids( From 329bf09a899f5cb3fe34cc25fb59ae4272ae596f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 19 Jul 2024 15:13:20 -0500 Subject: [PATCH 29/47] Working `room_stats_state` joined by `room_stats_current.local_users_in_room > 0` --- synapse/handlers/sliding_sync.py | 173 ++++++++++++------- synapse/storage/databases/main/roommember.py | 48 ----- synapse/storage/databases/main/state.py | 43 ++++- tests/handlers/test_sliding_sync.py | 12 +- 4 files changed, 152 insertions(+), 124 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 98f3de3e545..84d74b6f65a 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -1219,64 +1219,41 @@ async def filter_rooms( # Filter for encrypted rooms if filters.is_encrypted is not None: - localhost_participating_room_ids: set[str] = { - room_id - for room_id in filtered_room_id_set - # We already know the server is participating because this user is - # joined - if sync_room_map[room_id].membership == Membership.JOIN - } - - # Check if the server is participating in the room which we can use - # to derive whether we can rely on current state for this room. - room_id_to_server_participating = await self.store.is_local_host_in_rooms( - # We only need to check the rooms that we don't know are participating - filtered_room_id_set.difference(localhost_participating_room_ids), - ) - - localhost_not_participating_room_ids = set() - for room_id, is_participating in room_id_to_server_participating.items(): - if is_participating: - localhost_participating_room_ids.add(room_id) - else: - localhost_not_participating_room_ids.add(room_id) - # As a bulk shortcut, lookup the encryption state for the room based on the # current state if the server is particpating in the room (meaning we have - # current state). Ideally, for leave/ban rooms, we would at the state at the - # time of the membership instead of current state to not leak anything but - # we consider the room encryption status to not be a secret given it's often - # set at the start of the room and it's one of the stripped state events - # that is normally handed out. + # current state). Ideally, for leave/ban rooms, we would want the state at + # the time of the membership instead of current state to not leak anything + # but we consider the room encryption status to not be a secret given it's + # often set at the start of the room and it's one of the stripped state + # events that is normally handed out. # # Since this function is cached, we need to make a mutable copy via # `dict(...)`. - # - # TODO: It's possible we race from the time we checked if the server is - # participating in the room and the time we fetch the encryption state here - # which may be unset now because the server left in the time in between. We - # can't really easily tell if this happened. We could do both queries in the - # same transaction. room_id_to_encryption = dict( - await self.store.bulk_get_room_encryption( - localhost_participating_room_ids - ) + await self.store.bulk_get_room_encryption(filtered_room_id_set) ) + room_ids_with_results = [ + room_id + for room_id, encryption in room_id_to_encryption.items() + if encryption is not ROOM_UNKNOWN_SENTINEL + ] # We might not have current room state for remote invite/knocks if we are # the first person on our server to see the room. The best we can do is look # in the optional stripped state from the invite/knock event. - room_id_to_stripped_state_map = { - **room_id_to_stripped_state_map, - **await self._bulk_get_stripped_state_for_rooms_from_sync_room_map( - localhost_not_participating_room_ids, sync_room_map - ), - } + room_ids_without_results = filtered_room_id_set.difference( + chain(room_ids_with_results, room_id_to_stripped_state_map.keys()) + ) + room_id_to_stripped_state_map.update( + await self._bulk_get_stripped_state_for_rooms_from_sync_room_map( + room_ids_without_results, sync_room_map + ) + ) # Update our `room_id_to_encryption` map based on the stripped state # (applies to invite/knock rooms) rooms_ids_without_stripped_state: Set[str] = set() - for room_id in localhost_not_participating_room_ids: + for room_id in room_ids_without_results: stripped_state_map = room_id_to_stripped_state_map.get( room_id, Sentinel.UNSET_SENTINEL ) @@ -1306,22 +1283,37 @@ async def filter_rooms( # Last resort, we might not have current room state for rooms that the # server has left (no one local is in the room) but we can look at the # historical state. - left_room_ids_without_results = [ + server_left_room_ids_without_results = [ room_id for room_id in rooms_ids_without_stripped_state ] - # Update our `room_id_to_encryption` map based on the state at the time of - # the leave/ban membership event. - for room_id in left_room_ids_without_results: - room_state = await self.get_current_state_at( + # the membership event. + for room_id in server_left_room_ids_without_results: + room_state = await self.storage_controllers.state.get_state_at( room_id=room_id, - room_membership_for_user_at_to_token=sync_room_map[room_id], + stream_position=to_token.copy_and_replace( + StreamKeyType.ROOM, + sync_room_map[room_id].event_pos.to_room_stream_token(), + ), state_filter=StateFilter.from_types( - [(EventTypes.RoomEncryption, "")] + [ + (EventTypes.Create, ""), + (EventTypes.RoomEncryption, ""), + ] ), - to_token=to_token, + # Partially-stated rooms should have all state events except for + # remote membership events so we don't need to wait at all. + await_full_state=False, ) + # We can use the create event as a canary to tell whether the server has + # seen the room before + create_event = room_state.get((EventTypes.Create, "")) encryption_event = room_state.get((EventTypes.RoomEncryption, "")) + + if create_event is None: + # Skip for unknown rooms + continue + if encryption_event is not None: room_id_to_encryption[room_id] = encryption_event.content.get( EventContentFields.ENCRYPTION_ALGORITHM @@ -1369,8 +1361,16 @@ async def filter_rooms( # provided in the list. `None` is a valid type for rooms which do not have a # room type. if filters.room_types is not None or filters.not_room_types is not None: - # Lookup the room type from the database. Since this function is - # cached, we need to make a mutable copy via `dict(...)`. + # As a bulk shortcut, lookup the room_type based on the current state if the + # server is particpating in the room (meaning we have current state). + # Ideally, for leave/ban rooms, we wouldn't be looking at the current state + # but the create event isn't secret given it's created at the beginning of + # the room meaning if you've had any membership before, you already knew + # about it and for invite/knock it is handed out as one of the stripped + # events anyway. + # + # Since this function is cached, we need to make a mutable copy via + # `dict(...)`. room_id_to_type = dict( await self.store.bulk_get_room_type(filtered_room_id_set) ) @@ -1386,14 +1386,14 @@ async def filter_rooms( room_ids_without_results = filtered_room_id_set.difference( chain(room_ids_with_results, room_id_to_stripped_state_map.keys()) ) - room_id_to_stripped_state_map = { - **room_id_to_stripped_state_map, - **await self._bulk_get_stripped_state_for_rooms_from_sync_room_map( + room_id_to_stripped_state_map.update( + await self._bulk_get_stripped_state_for_rooms_from_sync_room_map( room_ids_without_results, sync_room_map - ), - } + ) + ) # Update our `room_id_to_type` map based on the stripped state + rooms_ids_without_stripped_state: Set[str] = set() for room_id in room_ids_without_results: stripped_state_map = room_id_to_stripped_state_map.get( room_id, Sentinel.UNSET_SENTINEL @@ -1410,6 +1410,44 @@ async def filter_rooms( room_id_to_type[room_id] = create_event.content.get( EventContentFields.ROOM_TYPE ) + else: + rooms_ids_without_stripped_state.add(room_id) + + # Last resort, we might not have current room state for rooms that the + # server has left (no one local is in the room) but we can look at the + # historical state. + server_left_room_ids_without_results = [ + room_id for room_id in rooms_ids_without_stripped_state + ] + # Update our `room_id_to_type` map based on the state at the time of + # the membership event. + for room_id in server_left_room_ids_without_results: + room_state = await self.storage_controllers.state.get_state_at( + room_id=room_id, + stream_position=to_token.copy_and_replace( + StreamKeyType.ROOM, + sync_room_map[room_id].event_pos.to_room_stream_token(), + ), + state_filter=StateFilter.from_types( + [ + (EventTypes.Create, ""), + ] + ), + # Partially-stated rooms should have all state events except for + # remote membership events so we don't need to wait at all. + await_full_state=False, + ) + # We can use the create event as a canary to tell whether the server has + # seen the room before + create_event = room_state.get((EventTypes.Create, "")) + + if create_event is None: + # Skip for unknown rooms + continue + + room_id_to_type[room_id] = create_event.content.get( + EventContentFields.ROOM_TYPE + ) # Make a copy so we don't run into an error: `Set changed size during # iteration`, when we filter out and remove items @@ -1520,14 +1558,17 @@ async def get_current_state_ids_at( in the room at the time of `to_token`. to_token: The point in the stream to sync up to. """ - room_state_ids: StateMap[str] + state_ids: StateMap[str] # People shouldn't see past their leave/ban event if room_membership_for_user_at_to_token.membership in ( Membership.LEAVE, Membership.BAN, ): - # TODO: `get_state_ids_at(...)` doesn't take into account the "current state" - room_state_ids = await self.storage_controllers.state.get_state_ids_at( + # TODO: `get_state_ids_at(...)` doesn't take into account the "current + # state". Maybe we need to use + # `get_forward_extremities_for_room_at_stream_ordering(...)` to "Fetch the + # current state at the time." + state_ids = await self.storage_controllers.state.get_state_ids_at( room_id, stream_position=to_token.copy_and_replace( StreamKeyType.ROOM, @@ -1546,7 +1587,7 @@ async def get_current_state_ids_at( ) # Otherwise, we can get the latest current state in the room else: - room_state_ids = await self.storage_controllers.state.get_current_state_ids( + state_ids = await self.storage_controllers.state.get_current_state_ids( room_id, state_filter, # Partially-stated rooms should have all state events except for @@ -1561,7 +1602,7 @@ async def get_current_state_ids_at( ) # TODO: Query `current_state_delta_stream` and reverse/rewind back to the `to_token` - return room_state_ids + return state_ids async def get_current_state_at( self, @@ -1581,17 +1622,17 @@ async def get_current_state_at( in the room at the time of `to_token`. to_token: The point in the stream to sync up to. """ - room_state_ids = await self.get_current_state_ids_at( + state_ids = await self.get_current_state_ids_at( room_id=room_id, room_membership_for_user_at_to_token=room_membership_for_user_at_to_token, state_filter=state_filter, to_token=to_token, ) - event_map = await self.store.get_events(list(room_state_ids.values())) + event_map = await self.store.get_events(list(state_ids.values())) state_map = {} - for key, event_id in room_state_ids.items(): + for key, event_id in state_ids.items(): event = event_map.get(event_id) if event: state_map[key] = event diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index fca866cef31..72b52772fa0 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -1281,54 +1281,6 @@ def _is_local_host_in_room_ignoring_users_txn( _is_local_host_in_room_ignoring_users_txn, ) - async def is_local_host_in_rooms( - self, room_ids: Sequence[str] - ) -> Mapping[str, bool]: - """ - Check if there are any local users joined to the given rooms. - - Returns: - A mapping from room_id to whether there are any local users in the room. - """ - - def txn( - txn: LoggingTransaction, - ) -> Mapping[str, bool]: - if isinstance(self.database_engine, PostgresEngine): - sql = """ - WITH room_ids AS ( - SELECT unnest(ARRAY[%s]::text[]) AS room_id - ) - SELECT x.* - FROM room_ids, - LATERAL ( - SELECT room_id FROM local_current_membership - WHERE room_id = room_ids.room_id AND membership = ? - LIMIT 1 - ) AS x - """ % ( - ",".join("?" for _ in room_ids) - ) - elif isinstance(self.database_engine, Sqlite3Engine): - # TODO: Handle SQLite - raise NotImplementedError() - else: - # This should be unreachable. - raise AssertionError("Unrecognized database engine") - - txn.execute(sql, (*room_ids, Membership.JOIN)) - - room_id_to_server_participating = {room_id: False for room_id in room_ids} - for row in txn: - room_id_to_server_participating[row[0]] = True - - return room_id_to_server_participating - - return await self.db_pool.runInteraction( - "is_local_host_in_rooms", - txn, - ) - async def forget(self, user_id: str, room_id: str) -> None: """Indicate that user_id wishes to discard history for room_id.""" diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py index 59a1c20e016..e9b5acc663e 100644 --- a/synapse/storage/databases/main/state.py +++ b/synapse/storage/databases/main/state.py @@ -328,18 +328,43 @@ async def bulk_get_room_type( Rooms unknown to this server will return `ROOM_UNKNOWN_SENTINEL`. """ - rows = await self.db_pool.simple_select_many_batch( - table="room_stats_state", - column="room_id", - iterable=room_ids, - retcols=("room_id", "room_type"), - desc="bulk_get_room_type", + def txn( + txn: LoggingTransaction, + ) -> Mapping[str, bool]: + clause, args = make_in_list_sql_clause( + txn.database_engine, "room_id", room_ids + ) + + # FIXME: Use `room_stats_current.current_state_events` instead of + # `room_stats_current.local_users_in_room` once + # https://github.com/element-hq/synapse/issues/17457 is fixed. + sql = """ + SELECT room_id, room_type + FROM room_stats_state + INNER JOIN room_stats_current USING (room_id) + WHERE + %s + AND local_users_in_room > 0 + """ % ( + clause + ) + + txn.execute(sql, args) + + room_id_to_type_map = {} + for row in txn: + room_id_to_type_map[row[0]] = row[1] + + return room_id_to_type_map + + results = await self.db_pool.runInteraction( + "bulk_get_room_type", + txn, ) # If we haven't updated `room_stats_state` with the room yet, query the # create events directly. This should happen only rarely so we don't # mind if we do this in a loop. - results = dict(rows) for room_id in room_ids - results.keys(): try: create_event = await self.get_create_event_for_room(room_id) @@ -384,7 +409,7 @@ def txn( ) # FIXME: Use `room_stats_current.current_state_events` instead of - # `room_stats_current.joined_members` once + # `room_stats_current.local_users_in_room` once # https://github.com/element-hq/synapse/issues/17457 is fixed. sql = """ SELECT room_id, encryption @@ -392,7 +417,7 @@ def txn( INNER JOIN room_stats_current USING (room_id) WHERE %s - AND joined_members > 0 + AND local_users_in_room > 0 """ % ( clause ) diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index cd5befe1732..225d5359d99 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -3303,6 +3303,8 @@ def test_filter_encrypted_server_left_room(self) -> None: # Get the rooms the user should be syncing with sync_room_map = self._get_sync_room_ids_for_user( UserID.from_string(user1_id), + # We're using a `from_token` so that the room is considered `newly_left` and + # appears in our list of relevant sync rooms from_token=before_rooms_token, to_token=after_rooms_token, ) @@ -3375,6 +3377,8 @@ def test_filter_encrypted_server_left_room2(self) -> None: # Get the rooms the user should be syncing with sync_room_map = self._get_sync_room_ids_for_user( UserID.from_string(user1_id), + # We're using a `from_token` so that the room is considered `newly_left` and + # appears in our list of relevant sync rooms from_token=before_rooms_token, to_token=after_rooms_token, ) @@ -3446,6 +3450,8 @@ def test_filter_encrypted_after_we_left(self) -> None: # Get the rooms the user should be syncing with sync_room_map = self._get_sync_room_ids_for_user( UserID.from_string(user1_id), + # We're using a `from_token` so that the room is considered `newly_left` and + # appears in our list of relevant sync rooms from_token=before_rooms_token, to_token=after_rooms_token, ) @@ -3983,6 +3989,8 @@ def test_filter_room_types_server_left_room(self) -> None: user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") + before_rooms_token = self.event_sources.get_current_token() + # Create a normal room (no room type) room_id = self.helper.create_room_as(user1_id, tok=user1_tok) # Leave the room @@ -4004,7 +4012,9 @@ def test_filter_room_types_server_left_room(self) -> None: # Get the rooms the user should be syncing with sync_room_map = self._get_sync_room_ids_for_user( UserID.from_string(user1_id), - from_token=None, + # We're using a `from_token` so that the room is considered `newly_left` and + # appears in our list of relevant sync rooms + from_token=before_rooms_token, to_token=after_rooms_token, ) From d7dcbfddaed5c7ca924e5829641651aaaf61ef51 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 19 Jul 2024 15:22:09 -0500 Subject: [PATCH 30/47] Add corresponding tests for `filter.room_types` --- tests/handlers/test_sliding_sync.py | 73 ++++++++++++++++++++++++++++- 1 file changed, 71 insertions(+), 2 deletions(-) diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index 225d5359d99..a161641115b 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -3343,7 +3343,7 @@ def test_filter_encrypted_server_left_room2(self) -> None: left. There is still someone local who is invited to the rooms but that doesn't affect - whether the server is participating in the room. + whether the server is participating in the room (users need to be joined). """ user1_id = self.register_user("user1", "pass") user1_tok = self.login(user1_id, "pass") @@ -4042,6 +4042,75 @@ def test_filter_room_types_server_left_room(self) -> None: self.assertEqual(filtered_room_map.keys(), {space_room_id}) + def test_filter_room_types_server_left_room2(self) -> None: + """ + Test that we can apply a `filter.room_types` against a room that everyone has left. + + There is still someone local who is invited to the rooms but that doesn't affect + whether the server is participating in the room (users need to be joined). + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + _user2_tok = self.login(user2_id, "pass") + + before_rooms_token = self.event_sources.get_current_token() + + # Create a normal room (no room type) + room_id = self.helper.create_room_as(user1_id, tok=user1_tok) + # Invite user2 + self.helper.invite(room_id, targ=user2_id, tok=user1_tok) + # User1 leaves the room + self.helper.leave(room_id, user1_id, tok=user1_tok) + + # Create a space room + space_room_id = self.helper.create_room_as( + user1_id, + tok=user1_tok, + extra_content={ + "creation_content": {EventContentFields.ROOM_TYPE: RoomTypes.SPACE} + }, + ) + # Invite user2 + self.helper.invite(space_room_id, targ=user2_id, tok=user1_tok) + # User1 leaves the room + self.helper.leave(space_room_id, user1_id, tok=user1_tok) + + after_rooms_token = self.event_sources.get_current_token() + + # Get the rooms the user should be syncing with + sync_room_map = self._get_sync_room_ids_for_user( + UserID.from_string(user1_id), + # We're using a `from_token` so that the room is considered `newly_left` and + # appears in our list of relevant sync rooms + from_token=before_rooms_token, + to_token=after_rooms_token, + ) + + # Try finding only normal rooms + filtered_room_map = self.get_success( + self.sliding_sync_handler.filter_rooms( + UserID.from_string(user1_id), + sync_room_map, + SlidingSyncConfig.SlidingSyncList.Filters(room_types=[None]), + after_rooms_token, + ) + ) + + self.assertEqual(filtered_room_map.keys(), {room_id}) + + # Try finding only spaces + filtered_room_map = self.get_success( + self.sliding_sync_handler.filter_rooms( + UserID.from_string(user1_id), + sync_room_map, + SlidingSyncConfig.SlidingSyncList.Filters(room_types=[RoomTypes.SPACE]), + after_rooms_token, + ) + ) + + self.assertEqual(filtered_room_map.keys(), {space_room_id}) + def test_filter_room_types_with_remote_invite_room_no_stripped_state(self) -> None: """ Test that we can apply a `filter.room_types` filter against a remote invite @@ -4182,7 +4251,7 @@ def test_filter_room_types_with_remote_invite_space(self) -> None: filtered_room_map.keys(), {space_room_id, remote_invite_room_id} ) - def test_filter_encrypted_with_remote_invite_normal_room(self) -> None: + def test_filter_room_types_with_remote_invite_normal_room(self) -> None: """ Test that we can apply a `filter.room_types` filter against a remote invite to a normal room with some `unsigned.invite_room_state` (stripped state). From 60ec4e352723ddbbc34a365da9dd2fe3b3096940 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 19 Jul 2024 15:28:05 -0500 Subject: [PATCH 31/47] Fix some lints --- synapse/handlers/sliding_sync.py | 18 ++++++++---------- synapse/storage/databases/main/roommember.py | 2 +- synapse/storage/databases/main/state.py | 5 +++-- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 84d74b6f65a..0c9dce2daa9 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -1283,12 +1283,10 @@ async def filter_rooms( # Last resort, we might not have current room state for rooms that the # server has left (no one local is in the room) but we can look at the # historical state. - server_left_room_ids_without_results = [ - room_id for room_id in rooms_ids_without_stripped_state - ] + # # Update our `room_id_to_encryption` map based on the state at the time of # the membership event. - for room_id in server_left_room_ids_without_results: + for room_id in rooms_ids_without_stripped_state: room_state = await self.storage_controllers.state.get_state_at( room_id=room_id, stream_position=to_token.copy_and_replace( @@ -1302,7 +1300,8 @@ async def filter_rooms( ] ), # Partially-stated rooms should have all state events except for - # remote membership events so we don't need to wait at all. + # remote membership events so we don't need to wait at all because + # we only want the create/room-encryptione events. await_full_state=False, ) # We can use the create event as a canary to tell whether the server has @@ -1416,12 +1415,10 @@ async def filter_rooms( # Last resort, we might not have current room state for rooms that the # server has left (no one local is in the room) but we can look at the # historical state. - server_left_room_ids_without_results = [ - room_id for room_id in rooms_ids_without_stripped_state - ] + # # Update our `room_id_to_type` map based on the state at the time of # the membership event. - for room_id in server_left_room_ids_without_results: + for room_id in rooms_ids_without_stripped_state: room_state = await self.storage_controllers.state.get_state_at( room_id=room_id, stream_position=to_token.copy_and_replace( @@ -1434,7 +1431,8 @@ async def filter_rooms( ] ), # Partially-stated rooms should have all state events except for - # remote membership events so we don't need to wait at all. + # remote membership events so we don't need to wait at all because + # we only want the create event. await_full_state=False, ) # We can use the create event as a canary to tell whether the server has diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 72b52772fa0..5d2fd08495c 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -49,7 +49,7 @@ ) from synapse.storage.databases.main.cache import CacheInvalidationWorkerStore from synapse.storage.databases.main.events_worker import EventsWorkerStore -from synapse.storage.engines import PostgresEngine, Sqlite3Engine +from synapse.storage.engines import Sqlite3Engine from synapse.storage.roommember import MemberSummary, ProfileInfo, RoomsForUser from synapse.types import ( JsonDict, diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py index e9b5acc663e..fa32b16b95f 100644 --- a/synapse/storage/databases/main/state.py +++ b/synapse/storage/databases/main/state.py @@ -30,6 +30,7 @@ Iterable, List, Mapping, + MutableMapping, Optional, Set, Tuple, @@ -330,7 +331,7 @@ async def bulk_get_room_type( def txn( txn: LoggingTransaction, - ) -> Mapping[str, bool]: + ) -> MutableMapping[str, Union[Optional[str], Sentinel]]: clause, args = make_in_list_sql_clause( txn.database_engine, "room_id", room_ids ) @@ -403,7 +404,7 @@ async def bulk_get_room_encryption( def txn( txn: LoggingTransaction, - ) -> Mapping[str, bool]: + ) -> MutableMapping[str, Union[Optional[str], Sentinel]]: clause, args = make_in_list_sql_clause( txn.database_engine, "room_id", room_ids ) From affee2220dcdfcfcc5820d38af9b548f5d8d96f5 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 19 Jul 2024 15:32:14 -0500 Subject: [PATCH 32/47] Fix lints --- synapse/handlers/sliding_sync.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 0c9dce2daa9..1a2623356a6 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -1264,6 +1264,8 @@ async def filter_rooms( ) if stripped_state_map is not None: + # We assume that if the invite/knock event has some stripped state, + # it would include the room encryption event if it was encrypted. encryption_stripped_event = stripped_state_map.get( (EventTypes.RoomEncryption, "") ) @@ -1392,7 +1394,7 @@ async def filter_rooms( ) # Update our `room_id_to_type` map based on the stripped state - rooms_ids_without_stripped_state: Set[str] = set() + rooms_ids_without_stripped_state = set() for room_id in room_ids_without_results: stripped_state_map = room_id_to_stripped_state_map.get( room_id, Sentinel.UNSET_SENTINEL @@ -1404,9 +1406,11 @@ async def filter_rooms( ) if stripped_state_map is not None: - create_event = stripped_state_map.get((EventTypes.Create, "")) - if create_event is not None: - room_id_to_type[room_id] = create_event.content.get( + create_stripped_event = stripped_state_map.get( + (EventTypes.Create, "") + ) + if create_stripped_event is not None: + room_id_to_type[room_id] = create_stripped_event.content.get( EventContentFields.ROOM_TYPE ) else: From 8ee1708462e006f6c818f429ba12d4840a6754b9 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 19 Jul 2024 15:59:21 -0500 Subject: [PATCH 33/47] Explain why we join tables --- synapse/storage/databases/main/state.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py index fa32b16b95f..8731ae90711 100644 --- a/synapse/storage/databases/main/state.py +++ b/synapse/storage/databases/main/state.py @@ -336,6 +336,15 @@ def txn( txn.database_engine, "room_id", room_ids ) + # We can't rely on `room_stats_state.room_type` if the server has left the + # room because the `room_id` will still be in the table but everything will + # be set to `None` but `None` is a valid room type value. We join against + # the `room_stats_current` table which keeps track of the + # `current_state_events` count (and a proxy value `local_users_in_room` + # which can used to assume the server is participating in the room and has + # current state) to ensure that the data in `room_stats_state` is up-to-date + # with the current state. + # # FIXME: Use `room_stats_current.current_state_events` instead of # `room_stats_current.local_users_in_room` once # https://github.com/element-hq/synapse/issues/17457 is fixed. @@ -409,6 +418,15 @@ def txn( txn.database_engine, "room_id", room_ids ) + # We can't rely on `room_stats_state.encryption` if the server has left the + # room because the `room_id` will still be in the table but everything will + # be set to `None` but `None` is a valid encryption value. We join against + # the `room_stats_current` table which keeps track of the + # `current_state_events` count (and a proxy value `local_users_in_room` + # which can used to assume the server is participating in the room and has + # current state) to ensure that the data in `room_stats_state` is up-to-date + # with the current state. + # # FIXME: Use `room_stats_current.current_state_events` instead of # `room_stats_current.local_users_in_room` once # https://github.com/element-hq/synapse/issues/17457 is fixed. From fe2d84bbc115440026a365af53c5f1796b200949 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 19 Jul 2024 16:07:27 -0500 Subject: [PATCH 34/47] Use `StrippedStateEvent` type in tests --- tests/handlers/test_sliding_sync.py | 85 +++++++++++++++-------------- 1 file changed, 45 insertions(+), 40 deletions(-) diff --git a/tests/handlers/test_sliding_sync.py b/tests/handlers/test_sliding_sync.py index a161641115b..96da47f3b9c 100644 --- a/tests/handlers/test_sliding_sync.py +++ b/tests/handlers/test_sliding_sync.py @@ -35,7 +35,7 @@ RoomTypes, ) from synapse.api.room_versions import RoomVersions -from synapse.events import make_event_from_dict +from synapse.events import StrippedStateEvent, make_event_from_dict from synapse.events.snapshot import EventContext from synapse.handlers.sliding_sync import ( RoomSyncConfig, @@ -3096,7 +3096,9 @@ def _create_dm_room( _remote_invite_count: int = 0 def _create_remote_invite_room_for_user( - self, invitee_user_id: str, unsigned_invite_room_state: Optional[List[JsonDict]] + self, + invitee_user_id: str, + unsigned_invite_room_state: Optional[List[StrippedStateEvent]], ) -> str: """ Create a fake invite for a remote room and persist it. @@ -3108,8 +3110,7 @@ def _create_remote_invite_room_for_user( Args: invitee_user_id: The person being invited unsigned_invite_room_state: List of stripped state events to assist the - receiver in identifying the room. Stripped state events have `type`, - `state_key`, `sender`, `content`. + receiver in identifying the room. Returns: The room ID of the remote invite room @@ -3128,15 +3129,19 @@ def _create_remote_invite_room_for_user( "prev_events": [], } if unsigned_invite_room_state is not None: + serialized_stripped_state_events = [] for stripped_event in unsigned_invite_room_state: - # Required stripped event fields - assert "type" in stripped_event - assert "state_key" in stripped_event - assert "sender" in stripped_event - assert "content" in stripped_event + serialized_stripped_state_events.append( + { + "type": stripped_event.type, + "state_key": stripped_event.state_key, + "sender": stripped_event.sender, + "content": stripped_event.content, + } + ) invite_event_dict["unsigned"] = { - "invite_room_state": unsigned_invite_room_state + "invite_room_state": serialized_stripped_state_events } invite_event = make_event_from_dict( @@ -3571,23 +3576,23 @@ def test_filter_encrypted_with_remote_invite_encrypted_room(self) -> None: remote_invite_room_id = self._create_remote_invite_room_for_user( user1_id, [ - { - "type": EventTypes.Create, - "state_key": "", - "sender": "@inviter:remote_server", - "content": { + StrippedStateEvent( + type=EventTypes.Create, + state_key="", + sender="@inviter:remote_server", + content={ EventContentFields.ROOM_CREATOR: "@inviter:remote_server", EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, }, - }, - { - "type": EventTypes.RoomEncryption, - "state_key": "", - "sender": "@inviter:remote_server", - "content": { + ), + StrippedStateEvent( + type=EventTypes.RoomEncryption, + state_key="", + sender="@inviter:remote_server", + content={ EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2", }, - }, + ), ], ) @@ -3659,15 +3664,15 @@ def test_filter_encrypted_with_remote_invite_unencrypted_room(self) -> None: remote_invite_room_id = self._create_remote_invite_room_for_user( user1_id, [ - { - "type": EventTypes.Create, - "state_key": "", - "sender": "@inviter:remote_server", - "content": { + StrippedStateEvent( + type=EventTypes.Create, + state_key="", + sender="@inviter:remote_server", + content={ EventContentFields.ROOM_CREATOR: "@inviter:remote_server", EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, }, - }, + ), # No room encryption event ], ) @@ -4186,17 +4191,17 @@ def test_filter_room_types_with_remote_invite_space(self) -> None: remote_invite_room_id = self._create_remote_invite_room_for_user( user1_id, [ - { - "type": EventTypes.Create, - "state_key": "", - "sender": "@inviter:remote_server", - "content": { + StrippedStateEvent( + type=EventTypes.Create, + state_key="", + sender="@inviter:remote_server", + content={ EventContentFields.ROOM_CREATOR: "@inviter:remote_server", EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, # Specify that it is a space room EventContentFields.ROOM_TYPE: RoomTypes.SPACE, }, - }, + ), ], ) @@ -4264,16 +4269,16 @@ def test_filter_room_types_with_remote_invite_normal_room(self) -> None: remote_invite_room_id = self._create_remote_invite_room_for_user( user1_id, [ - { - "type": EventTypes.Create, - "state_key": "", - "sender": "@inviter:remote_server", - "content": { + StrippedStateEvent( + type=EventTypes.Create, + state_key="", + sender="@inviter:remote_server", + content={ EventContentFields.ROOM_CREATOR: "@inviter:remote_server", EventContentFields.ROOM_VERSION: RoomVersions.V10.identifier, # No room type means this is a normal room }, - }, + ), ], ) From 4ae30795bf80bd18e6ec04b0e14e3602f966c1da Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 22 Jul 2024 15:45:03 -0500 Subject: [PATCH 35/47] Prefer f-string See https://github.com/element-hq/synapse/pull/17450#discussion_r1686763570, https://github.com/element-hq/synapse/pull/17450#discussion_r1686768110 --- synapse/storage/databases/main/state.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py index 8731ae90711..62bc4600fb2 100644 --- a/synapse/storage/databases/main/state.py +++ b/synapse/storage/databases/main/state.py @@ -348,16 +348,14 @@ def txn( # FIXME: Use `room_stats_current.current_state_events` instead of # `room_stats_current.local_users_in_room` once # https://github.com/element-hq/synapse/issues/17457 is fixed. - sql = """ + sql = f""" SELECT room_id, room_type FROM room_stats_state INNER JOIN room_stats_current USING (room_id) WHERE - %s + {clause} AND local_users_in_room > 0 - """ % ( - clause - ) + """ txn.execute(sql, args) @@ -430,16 +428,14 @@ def txn( # FIXME: Use `room_stats_current.current_state_events` instead of # `room_stats_current.local_users_in_room` once # https://github.com/element-hq/synapse/issues/17457 is fixed. - sql = """ + sql = f""" SELECT room_id, encryption FROM room_stats_state INNER JOIN room_stats_current USING (room_id) WHERE - %s + {clause} AND local_users_in_room > 0 - """ % ( - clause - ) + """ txn.execute(sql, args) From 05bbcb042542c8f57d1bd16a97b1047297da0f05 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 22 Jul 2024 19:19:51 -0500 Subject: [PATCH 36/47] Add `_bulk_get_partial_current_state_content_for_rooms_from_sync_room_map` --- synapse/api/constants.py | 2 + synapse/handlers/sliding_sync.py | 322 ++++++++++++++++-------- synapse/storage/databases/main/state.py | 82 ++++++ 3 files changed, 298 insertions(+), 108 deletions(-) diff --git a/synapse/api/constants.py b/synapse/api/constants.py index 07be493c5cc..2c1f8d10916 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -228,6 +228,8 @@ class EventContentFields: # The version of the room for `m.room.create` events. ROOM_VERSION: Final = "room_version" + ROOM_NAME: Final = "name" + # Used in m.room.guest_access events. GUEST_ACCESS: Final = "guest_access" diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 1a2623356a6..5c383ecaf84 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -20,7 +20,19 @@ import enum import logging from itertools import chain -from typing import TYPE_CHECKING, Any, Dict, Final, List, Mapping, Optional, Set, Tuple +from typing import ( + TYPE_CHECKING, + Any, + Dict, + Final, + List, + Literal, + Mapping, + Optional, + Set, + Tuple, + Union, +) import attr from immutabledict import immutabledict @@ -36,7 +48,10 @@ from synapse.events.utils import parse_stripped_state_event, strip_event from synapse.handlers.relations import BundledAggregations from synapse.storage.databases.main.roommember import extract_heroes_from_room_summary -from synapse.storage.databases.main.state import ROOM_UNKNOWN_SENTINEL +from synapse.storage.databases.main.state import ( + ROOM_UNKNOWN_SENTINEL, + Sentinel as StateSentinel, +) from synapse.storage.databases.main.stream import CurrentStateDeltaMembership from synapse.storage.roommember import MemberSummary from synapse.types import ( @@ -1170,6 +1185,192 @@ async def _bulk_get_stripped_state_for_rooms_from_sync_room_map( return room_id_to_stripped_state_map + async def _bulk_get_partial_current_state_content_for_rooms_from_sync_room_map( + self, + event_type: Literal[ + # EventTypes.Create + "m.room.create", + # EventTypes.RoomEncryption + "m.room.encryption", + ], + event_content_field: Literal[ + None, + # EventContentFields.ROOM_TYPE + "type", + # EventContentFields.ENCRYPTION_ALGORITHM + "algorithm", + ], + room_ids: Set[str], + sync_room_map: Dict[str, _RoomMembershipForUser], + to_token: StreamToken, + room_id_to_stripped_state_map: Dict[ + str, Optional[StateMap[StrippedStateEvent]] + ], + ) -> Mapping[str, Union[Optional[str], StateSentinel]]: + """ + TODO + + Args: + event_type: TODO + event_content_field: TODO + room_ids: Room IDs to fetch the given content field for. + sync_room_map: Dictionary of room IDs to sort along with membership + information in the room at the time of `to_token`. + to_token: We filter based on the state of the room at this token + room_id_to_stripped_state_map: TODO (Modified in place) + + Returns: + A mapping from room ID to the `event_content_field` value if the room has + the given state event (event_type, ""), otherwise `None`. Rooms unknown to + this server will return `ROOM_UNKNOWN_SENTINEL`. + """ + + # As a bulk shortcut, use the current state if the server is particpating in the + # room (meaning we have current state). Ideally, for leave/ban rooms, we would + # want the state at the time of the membership instead of current state to not + # leak anything but we consider the stripped state events to not be a secret + # given they are often set at the start of the room and they one of the stripped + # state events that is normally handed out on invite/knock. + # + # Since this function is cached, we need to make a mutable copy via + # `dict(...)`. + room_id_to_state_event_id = dict( + await self.store.bulk_get_partial_current_state_ids_for_event_type( + room_ids=room_ids, event_type=event_type, state_key="" + ) + ) + room_id_to_content_field: Dict[str, Union[Optional[str], StateSentinel]] = {} + # If we're just looking for the mere existence of the event, we can use the + # map we already have for the state event IDs. + if event_content_field is None: + room_id_to_content_field = room_id_to_state_event_id + # Otherwise, we need to fetch the full event to look at the content field. + else: + state_events = await self.store.get_events( + [ + state_event_id + for state_event_id in room_id_to_state_event_id.values() + # Dont waste our time fetching `None` or ROOM_UNKNOWN_SENTINEL values + if isinstance(state_event_id, str) + ] + ) + for room_id in room_ids: + state_event_id = room_id_to_state_event_id.get(room_id) + if state_event_id is ROOM_UNKNOWN_SENTINEL: + room_id_to_content_field[room_id] = ROOM_UNKNOWN_SENTINEL + continue + elif state_event_id is None: + room_id_to_content_field[room_id] = None + continue + + # Given the above checks, we can assume `state_event_id` is a string now + assert isinstance(state_event_id, str) + + state_event = state_events.get(state_event_id) + # If the curent state says there is a state event, we should have it in + # the database. + assert state_event is not None + + room_id_to_content_field[room_id] = ( + state_event.content.get(event_content_field) + if event_content_field is not None + else "set" + ) + + room_ids_with_results = [ + room_id + for room_id, content_field in room_id_to_content_field.items() + if content_field is not ROOM_UNKNOWN_SENTINEL + ] + + # We might not have current room state for remote invite/knocks if we are + # the first person on our server to see the room. The best we can do is look + # in the optional stripped state from the invite/knock event. + room_ids_without_results = room_ids.difference( + chain(room_ids_with_results, room_id_to_stripped_state_map.keys()) + ) + room_id_to_stripped_state_map.update( + await self._bulk_get_stripped_state_for_rooms_from_sync_room_map( + room_ids_without_results, sync_room_map + ) + ) + + # Update our `room_id_to_content_field` map based on the stripped state + # (applies to invite/knock rooms) + rooms_ids_without_stripped_state: Set[str] = set() + for room_id in room_ids_without_results: + stripped_state_map = room_id_to_stripped_state_map.get( + room_id, Sentinel.UNSET_SENTINEL + ) + assert stripped_state_map is not Sentinel.UNSET_SENTINEL, ( + f"Stripped state left unset for room {room_id}. " + + "Make sure you're calling `_bulk_get_stripped_state_for_rooms_from_sync_room_map(...)` " + + "with that room_id. (this is a problem with Synapse itself)" + ) + + # If there is some stripped state, we assume the remote server passed *all* + # of the potential stripped state events for the room. + if stripped_state_map is not None: + stripped_event = stripped_state_map.get((event_type, "")) + if stripped_event is not None: + room_id_to_content_field[room_id] = ( + stripped_event.content.get(event_content_field) + if event_content_field is not None + else "set" + ) + else: + # Didn't see the state event we're looking for in the stripped + # state so we can assume relevant content field is `None`. + room_id_to_content_field[room_id] = None + else: + rooms_ids_without_stripped_state.add(room_id) + + # Last resort, we might not have current room state for rooms that the + # server has left (no one local is in the room) but we can look at the + # historical state. + # + # Update our `room_id_to_content_field` map based on the state at the time of + # the membership event. + for room_id in rooms_ids_without_stripped_state: + room_state = await self.storage_controllers.state.get_state_at( + room_id=room_id, + stream_position=to_token.copy_and_replace( + StreamKeyType.ROOM, + sync_room_map[room_id].event_pos.to_room_stream_token(), + ), + state_filter=StateFilter.from_types( + [ + (EventTypes.Create, ""), + (event_type, ""), + ] + ), + # Partially-stated rooms should have all state events except for + # remote membership events so we don't need to wait at all because + # we only want the create event and some non-member event. + await_full_state=False, + ) + # We can use the create event as a canary to tell whether the server has + # seen the room before + create_event = room_state.get((EventTypes.Create, "")) + state_event = room_state.get((event_type, "")) + + if create_event is None: + # Skip for unknown rooms + continue + + if state_event is not None: + room_id_to_content_field[room_id] = ( + state_event.content.get(event_content_field) + if event_content_field is not None + else "set" + ) + else: + # Didn't see the state event we're looking for in the stripped + # state so we can assume relevant content field is `None`. + room_id_to_content_field[room_id] = None + + return room_id_to_content_field + async def filter_rooms( self, user: UserID, @@ -1219,124 +1420,29 @@ async def filter_rooms( # Filter for encrypted rooms if filters.is_encrypted is not None: - # As a bulk shortcut, lookup the encryption state for the room based on the - # current state if the server is particpating in the room (meaning we have - # current state). Ideally, for leave/ban rooms, we would want the state at - # the time of the membership instead of current state to not leak anything - # but we consider the room encryption status to not be a secret given it's - # often set at the start of the room and it's one of the stripped state - # events that is normally handed out. - # - # Since this function is cached, we need to make a mutable copy via - # `dict(...)`. - room_id_to_encryption = dict( - await self.store.bulk_get_room_encryption(filtered_room_id_set) - ) - room_ids_with_results = [ - room_id - for room_id, encryption in room_id_to_encryption.items() - if encryption is not ROOM_UNKNOWN_SENTINEL - ] - - # We might not have current room state for remote invite/knocks if we are - # the first person on our server to see the room. The best we can do is look - # in the optional stripped state from the invite/knock event. - room_ids_without_results = filtered_room_id_set.difference( - chain(room_ids_with_results, room_id_to_stripped_state_map.keys()) - ) - room_id_to_stripped_state_map.update( - await self._bulk_get_stripped_state_for_rooms_from_sync_room_map( - room_ids_without_results, sync_room_map - ) + room_id_to_is_encrypted = await self._bulk_get_partial_current_state_content_for_rooms_from_sync_room_map( + event_type=EventTypes.RoomEncryption, + event_content_field=None, + room_ids=filtered_room_id_set, + to_token=to_token, + sync_room_map=sync_room_map, + room_id_to_stripped_state_map=room_id_to_stripped_state_map, ) - # Update our `room_id_to_encryption` map based on the stripped state - # (applies to invite/knock rooms) - rooms_ids_without_stripped_state: Set[str] = set() - for room_id in room_ids_without_results: - stripped_state_map = room_id_to_stripped_state_map.get( - room_id, Sentinel.UNSET_SENTINEL - ) - assert stripped_state_map is not Sentinel.UNSET_SENTINEL, ( - f"Stripped state left unset for room {room_id}. " - + "Make sure you're calling `_bulk_get_stripped_state_for_rooms_from_sync_room_map(...)` " - + "with that room_id. (this is a problem with Synapse itself)" - ) - - if stripped_state_map is not None: - # We assume that if the invite/knock event has some stripped state, - # it would include the room encryption event if it was encrypted. - encryption_stripped_event = stripped_state_map.get( - (EventTypes.RoomEncryption, "") - ) - if encryption_stripped_event is not None: - room_id_to_encryption[room_id] = ( - encryption_stripped_event.content.get( - EventContentFields.ENCRYPTION_ALGORITHM - ) - ) - else: - # Didn't see any encryption events in the stripped state so we - # can assume the room is unencrypted. - room_id_to_encryption[room_id] = None - else: - rooms_ids_without_stripped_state.add(room_id) - - # Last resort, we might not have current room state for rooms that the - # server has left (no one local is in the room) but we can look at the - # historical state. - # - # Update our `room_id_to_encryption` map based on the state at the time of - # the membership event. - for room_id in rooms_ids_without_stripped_state: - room_state = await self.storage_controllers.state.get_state_at( - room_id=room_id, - stream_position=to_token.copy_and_replace( - StreamKeyType.ROOM, - sync_room_map[room_id].event_pos.to_room_stream_token(), - ), - state_filter=StateFilter.from_types( - [ - (EventTypes.Create, ""), - (EventTypes.RoomEncryption, ""), - ] - ), - # Partially-stated rooms should have all state events except for - # remote membership events so we don't need to wait at all because - # we only want the create/room-encryptione events. - await_full_state=False, - ) - # We can use the create event as a canary to tell whether the server has - # seen the room before - create_event = room_state.get((EventTypes.Create, "")) - encryption_event = room_state.get((EventTypes.RoomEncryption, "")) - - if create_event is None: - # Skip for unknown rooms - continue - - if encryption_event is not None: - room_id_to_encryption[room_id] = encryption_event.content.get( - EventContentFields.ENCRYPTION_ALGORITHM - ) - else: - # Didn't see any encryption events in the state so we can assume the - # room is unencrypted. - room_id_to_encryption[room_id] = None - # Make a copy so we don't run into an error: `Set changed size during # iteration`, when we filter out and remove items for room_id in filtered_room_id_set.copy(): - encryption = room_id_to_encryption.get(room_id, ROOM_UNKNOWN_SENTINEL) + is_encrypted = room_id_to_is_encrypted.get( + room_id, ROOM_UNKNOWN_SENTINEL + ) # Just remove rooms if we can't determine their encryption status - if encryption is ROOM_UNKNOWN_SENTINEL: + if is_encrypted is ROOM_UNKNOWN_SENTINEL: filtered_room_id_set.remove(room_id) continue # If we're looking for encrypted rooms, filter out rooms that are not # encrypted and vice versa - is_encrypted = encryption is not None if (filters.is_encrypted and not is_encrypted) or ( not filters.is_encrypted and is_encrypted ): diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py index 62bc4600fb2..1e94f68b292 100644 --- a/synapse/storage/databases/main/state.py +++ b/synapse/storage/databases/main/state.py @@ -495,6 +495,88 @@ def txn( return results + @cached(max_entries=10000) + async def get_partial_current_state_id_for_event_type( + self, room_id: str, event_type: str, state_key: str + ) -> Optional[str]: + raise NotImplementedError() + + @cachedList( + cached_method_name="get_partial_current_state_id_for_event_type", + list_name="room_ids", + ) + async def bulk_get_partial_current_state_ids_for_event_type( + self, room_ids: Set[str], event_type: str, state_key: str + ) -> Mapping[str, Union[Optional[str], Sentinel]]: + """ + Bulk fetch a given (type, state_key) for the given rooms (via current state). + + Since this function is cached, any missing values would be cached as `None`. In + order to distinguish between a room without the state and a room that is unknown + to the server where we might want to omit the value (which would make it cached + as `None`), instead we use the sentinel value `ROOM_UNKNOWN_SENTINEL`. + + Returns: + A mapping from room ID to the state event ID if the room has the state, + otherwise `None`. Rooms unknown to this server will return + `ROOM_UNKNOWN_SENTINEL`. + """ + + def txn( + txn: LoggingTransaction, + ) -> MutableMapping[str, Union[Optional[str], Sentinel]]: + clause, args = make_in_list_sql_clause( + txn.database_engine, "room_id", room_ids + ) + + sql = f""" + SELECT room_id, event_id, type + FROM current_state_events + WHERE + {clause} + AND ( + (type = ? AND state_key = ?) + OR (type = ? AND state_key = ?) + ) + """ + + # We can use the create event as a canary to tell whether the server has + # seen the room before + txn.execute(sql, args + [EventTypes.Create, "", event_type, state_key]) + + room_id_to_create_map: Dict[str, str] = {} + room_id_to_state_map: Dict[str, str] = {} + for row in txn: + room_id = row[0] + event_id = row[1] + event_type_from_db = row[2] + + if event_type_from_db == EventTypes.Create: + room_id_to_create_map[room_id] = event_id + + if event_type_from_db == event_type: + room_id_to_state_map[room_id] = event_id + + results: Dict[str, Union[Optional[str], Sentinel]] = {} + for room_id in room_ids: + if room_id_to_create_map.get(room_id) is None: + # We use the sentinel value to distinguish between `None` which is a + # valid room type and a room that is unknown to the server so the value + # is just unset. + results[room_id] = ROOM_UNKNOWN_SENTINEL + continue + + results[room_id] = room_id_to_state_map.get(room_id) + + return results + + results = await self.db_pool.runInteraction( + "bulk_get_partial_current_state_ids_for_event_type", + txn, + ) + + return results + @cached(max_entries=100000, iterable=True) async def get_partial_current_state_ids(self, room_id: str) -> StateMap[str]: """Get the current state event ids for a room based on the From 376d9008ed997968d0a7b83266492f3f5b4a922f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 22 Jul 2024 19:44:22 -0500 Subject: [PATCH 37/47] Simplify and share --- synapse/handlers/sliding_sync.py | 241 +++++++++++-------------------- 1 file changed, 81 insertions(+), 160 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 5c383ecaf84..5d2443e5b0e 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -1187,43 +1187,45 @@ async def _bulk_get_stripped_state_for_rooms_from_sync_room_map( async def _bulk_get_partial_current_state_content_for_rooms_from_sync_room_map( self, + # These should be restricted to the possible stripped state events (see bulk + # shortcut note below for more details). event_type: Literal[ # EventTypes.Create "m.room.create", # EventTypes.RoomEncryption "m.room.encryption", ], - event_content_field: Literal[ - None, - # EventContentFields.ROOM_TYPE - "type", - # EventContentFields.ENCRYPTION_ALGORITHM - "algorithm", - ], room_ids: Set[str], sync_room_map: Dict[str, _RoomMembershipForUser], to_token: StreamToken, room_id_to_stripped_state_map: Dict[ str, Optional[StateMap[StrippedStateEvent]] ], - ) -> Mapping[str, Union[Optional[str], StateSentinel]]: + ) -> Mapping[str, Union[Optional[Dict[str, Any]], StateSentinel]]: """ - TODO + Get the given state event content for a list of rooms. First we check the + current state of the room, then fallback to stripped state if available, then + historical state. Args: - event_type: TODO - event_content_field: TODO + event_type: State event type to fetch (event_type, "") room_ids: Room IDs to fetch the given content field for. sync_room_map: Dictionary of room IDs to sort along with membership information in the room at the time of `to_token`. to_token: We filter based on the state of the room at this token - room_id_to_stripped_state_map: TODO (Modified in place) + room_id_to_stripped_state_map: This does not need to be filled in before + calling this function. Mapping from room_id to mapping of (type, state_key) + to stripped state event. Modified in place when we fetch new rooms so we can + save work next time this function is called. Returns: - A mapping from room ID to the `event_content_field` value if the room has + A mapping from room ID to the state event content if the room has the given state event (event_type, ""), otherwise `None`. Rooms unknown to this server will return `ROOM_UNKNOWN_SENTINEL`. """ + room_id_to_content: Dict[ + str, Union[Optional[Dict[str, Any]], StateSentinel] + ] = {} # As a bulk shortcut, use the current state if the server is particpating in the # room (meaning we have current state). Ideally, for leave/ban rooms, we would @@ -1239,47 +1241,38 @@ async def _bulk_get_partial_current_state_content_for_rooms_from_sync_room_map( room_ids=room_ids, event_type=event_type, state_key="" ) ) - room_id_to_content_field: Dict[str, Union[Optional[str], StateSentinel]] = {} - # If we're just looking for the mere existence of the event, we can use the - # map we already have for the state event IDs. - if event_content_field is None: - room_id_to_content_field = room_id_to_state_event_id - # Otherwise, we need to fetch the full event to look at the content field. - else: - state_events = await self.store.get_events( - [ - state_event_id - for state_event_id in room_id_to_state_event_id.values() - # Dont waste our time fetching `None` or ROOM_UNKNOWN_SENTINEL values - if isinstance(state_event_id, str) - ] - ) - for room_id in room_ids: - state_event_id = room_id_to_state_event_id.get(room_id) - if state_event_id is ROOM_UNKNOWN_SENTINEL: - room_id_to_content_field[room_id] = ROOM_UNKNOWN_SENTINEL - continue - elif state_event_id is None: - room_id_to_content_field[room_id] = None - continue - # Given the above checks, we can assume `state_event_id` is a string now - assert isinstance(state_event_id, str) + # We need to fetch the full event to look at the content field. + state_events = await self.store.get_events( + [ + state_event_id + for state_event_id in room_id_to_state_event_id.values() + # Dont waste our time fetching `None` or ROOM_UNKNOWN_SENTINEL values + if isinstance(state_event_id, str) + ] + ) + for room_id in room_ids: + state_event_id = room_id_to_state_event_id.get(room_id) + if state_event_id is ROOM_UNKNOWN_SENTINEL: + room_id_to_content[room_id] = ROOM_UNKNOWN_SENTINEL + continue + elif state_event_id is None: + room_id_to_content[room_id] = None + continue - state_event = state_events.get(state_event_id) - # If the curent state says there is a state event, we should have it in - # the database. - assert state_event is not None + # Given the above checks, we can assume `state_event_id` is a string now + assert isinstance(state_event_id, str) - room_id_to_content_field[room_id] = ( - state_event.content.get(event_content_field) - if event_content_field is not None - else "set" - ) + state_event = state_events.get(state_event_id) + # If the curent state says there is a state event, we should have it in + # the database. + assert state_event is not None + + room_id_to_content[room_id] = state_event.content room_ids_with_results = [ room_id - for room_id, content_field in room_id_to_content_field.items() + for room_id, content_field in room_id_to_content.items() if content_field is not ROOM_UNKNOWN_SENTINEL ] @@ -1295,7 +1288,7 @@ async def _bulk_get_partial_current_state_content_for_rooms_from_sync_room_map( ) ) - # Update our `room_id_to_content_field` map based on the stripped state + # Update our `room_id_to_content` map based on the stripped state # (applies to invite/knock rooms) rooms_ids_without_stripped_state: Set[str] = set() for room_id in room_ids_without_results: @@ -1311,17 +1304,16 @@ async def _bulk_get_partial_current_state_content_for_rooms_from_sync_room_map( # If there is some stripped state, we assume the remote server passed *all* # of the potential stripped state events for the room. if stripped_state_map is not None: + create_stripped_event = stripped_state_map.get((EventTypes.Create, "")) stripped_event = stripped_state_map.get((event_type, "")) - if stripped_event is not None: - room_id_to_content_field[room_id] = ( - stripped_event.content.get(event_content_field) - if event_content_field is not None - else "set" - ) - else: - # Didn't see the state event we're looking for in the stripped - # state so we can assume relevant content field is `None`. - room_id_to_content_field[room_id] = None + # Sanity check that we at-least have the create event + if create_stripped_event is not None: + if stripped_event is not None: + room_id_to_content[room_id] = stripped_event.content + else: + # Didn't see the state event we're looking for in the stripped + # state so we can assume relevant content field is `None`. + room_id_to_content[room_id] = None else: rooms_ids_without_stripped_state.add(room_id) @@ -1329,7 +1321,7 @@ async def _bulk_get_partial_current_state_content_for_rooms_from_sync_room_map( # server has left (no one local is in the room) but we can look at the # historical state. # - # Update our `room_id_to_content_field` map based on the state at the time of + # Update our `room_id_to_content` map based on the state at the time of # the membership event. for room_id in rooms_ids_without_stripped_state: room_state = await self.storage_controllers.state.get_state_at( @@ -1359,17 +1351,13 @@ async def _bulk_get_partial_current_state_content_for_rooms_from_sync_room_map( continue if state_event is not None: - room_id_to_content_field[room_id] = ( - state_event.content.get(event_content_field) - if event_content_field is not None - else "set" - ) + room_id_to_content[room_id] = state_event.content else: # Didn't see the state event we're looking for in the stripped # state so we can assume relevant content field is `None`. - room_id_to_content_field[room_id] = None + room_id_to_content[room_id] = None - return room_id_to_content_field + return room_id_to_content async def filter_rooms( self, @@ -1420,9 +1408,8 @@ async def filter_rooms( # Filter for encrypted rooms if filters.is_encrypted is not None: - room_id_to_is_encrypted = await self._bulk_get_partial_current_state_content_for_rooms_from_sync_room_map( + room_id_to_encrypted_content = await self._bulk_get_partial_current_state_content_for_rooms_from_sync_room_map( event_type=EventTypes.RoomEncryption, - event_content_field=None, room_ids=filtered_room_id_set, to_token=to_token, sync_room_map=sync_room_map, @@ -1432,17 +1419,18 @@ async def filter_rooms( # Make a copy so we don't run into an error: `Set changed size during # iteration`, when we filter out and remove items for room_id in filtered_room_id_set.copy(): - is_encrypted = room_id_to_is_encrypted.get( + encrypted_content = room_id_to_encrypted_content.get( room_id, ROOM_UNKNOWN_SENTINEL ) # Just remove rooms if we can't determine their encryption status - if is_encrypted is ROOM_UNKNOWN_SENTINEL: + if encrypted_content is ROOM_UNKNOWN_SENTINEL: filtered_room_id_set.remove(room_id) continue # If we're looking for encrypted rooms, filter out rooms that are not # encrypted and vice versa + is_encrypted = encrypted_content is not None if (filters.is_encrypted and not is_encrypted) or ( not filters.is_encrypted and is_encrypted ): @@ -1468,105 +1456,30 @@ async def filter_rooms( # provided in the list. `None` is a valid type for rooms which do not have a # room type. if filters.room_types is not None or filters.not_room_types is not None: - # As a bulk shortcut, lookup the room_type based on the current state if the - # server is particpating in the room (meaning we have current state). - # Ideally, for leave/ban rooms, we wouldn't be looking at the current state - # but the create event isn't secret given it's created at the beginning of - # the room meaning if you've had any membership before, you already knew - # about it and for invite/knock it is handed out as one of the stripped - # events anyway. - # - # Since this function is cached, we need to make a mutable copy via - # `dict(...)`. - room_id_to_type = dict( - await self.store.bulk_get_room_type(filtered_room_id_set) - ) - room_ids_with_results = [ - room_id - for room_id, room_type in room_id_to_type.items() - if room_type is not ROOM_UNKNOWN_SENTINEL - ] - - # We might not have room state for remote invite/knocks if we are the first - # person on our server to see the room. The best we can do is look in the - # optional stripped state from the invite/knock event. - room_ids_without_results = filtered_room_id_set.difference( - chain(room_ids_with_results, room_id_to_stripped_state_map.keys()) - ) - room_id_to_stripped_state_map.update( - await self._bulk_get_stripped_state_for_rooms_from_sync_room_map( - room_ids_without_results, sync_room_map - ) + room_id_to_create_content = await self._bulk_get_partial_current_state_content_for_rooms_from_sync_room_map( + event_type=EventTypes.Create, + room_ids=filtered_room_id_set, + to_token=to_token, + sync_room_map=sync_room_map, + room_id_to_stripped_state_map=room_id_to_stripped_state_map, ) - # Update our `room_id_to_type` map based on the stripped state - rooms_ids_without_stripped_state = set() - for room_id in room_ids_without_results: - stripped_state_map = room_id_to_stripped_state_map.get( - room_id, Sentinel.UNSET_SENTINEL - ) - assert stripped_state_map is not Sentinel.UNSET_SENTINEL, ( - f"Stripped state left unset for room {room_id}. " - + "Make sure you're calling `_bulk_get_stripped_state_for_rooms_from_sync_room_map(...)` " - + "with that room_id. (this is a problem with Synapse itself)" - ) - - if stripped_state_map is not None: - create_stripped_event = stripped_state_map.get( - (EventTypes.Create, "") - ) - if create_stripped_event is not None: - room_id_to_type[room_id] = create_stripped_event.content.get( - EventContentFields.ROOM_TYPE - ) - else: - rooms_ids_without_stripped_state.add(room_id) - - # Last resort, we might not have current room state for rooms that the - # server has left (no one local is in the room) but we can look at the - # historical state. - # - # Update our `room_id_to_type` map based on the state at the time of - # the membership event. - for room_id in rooms_ids_without_stripped_state: - room_state = await self.storage_controllers.state.get_state_at( - room_id=room_id, - stream_position=to_token.copy_and_replace( - StreamKeyType.ROOM, - sync_room_map[room_id].event_pos.to_room_stream_token(), - ), - state_filter=StateFilter.from_types( - [ - (EventTypes.Create, ""), - ] - ), - # Partially-stated rooms should have all state events except for - # remote membership events so we don't need to wait at all because - # we only want the create event. - await_full_state=False, - ) - # We can use the create event as a canary to tell whether the server has - # seen the room before - create_event = room_state.get((EventTypes.Create, "")) - - if create_event is None: - # Skip for unknown rooms - continue - - room_id_to_type[room_id] = create_event.content.get( - EventContentFields.ROOM_TYPE - ) - # Make a copy so we don't run into an error: `Set changed size during # iteration`, when we filter out and remove items for room_id in filtered_room_id_set.copy(): - room_type = room_id_to_type.get(room_id, ROOM_UNKNOWN_SENTINEL) + create_content = room_id_to_create_content.get( + room_id, ROOM_UNKNOWN_SENTINEL + ) # Just remove rooms if we can't determine their type - if room_type is ROOM_UNKNOWN_SENTINEL: + if create_content is ROOM_UNKNOWN_SENTINEL: filtered_room_id_set.remove(room_id) continue + # We assume that the room is either unknown or has a create event + assert isinstance(create_content, dict) + + room_type = create_content.get(EventContentFields.ROOM_TYPE) if ( filters.room_types is not None and room_type not in filters.room_types @@ -1580,6 +1493,14 @@ async def filter_rooms( filtered_room_id_set.remove(room_id) if filters.room_name_like is not None: + # TODO: + # room_id_to_create_content = await self._bulk_get_partial_current_state_content_for_rooms_from_sync_room_map( + # event_type=EventTypes.Name, + # room_ids=filtered_room_id_set, + # to_token=to_token, + # sync_room_map=sync_room_map, + # room_id_to_stripped_state_map=room_id_to_stripped_state_map, + # ) raise NotImplementedError() if filters.tags is not None: From 28a36e8a4e37b5c2de7479f6b45123d5c46fd972 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 22 Jul 2024 19:50:09 -0500 Subject: [PATCH 38/47] Remove unused methods --- synapse/handlers/sliding_sync.py | 1 + synapse/storage/databases/main/state.py | 190 +----------------------- 2 files changed, 2 insertions(+), 189 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 5d2443e5b0e..a14a099cd21 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -1324,6 +1324,7 @@ async def _bulk_get_partial_current_state_content_for_rooms_from_sync_room_map( # Update our `room_id_to_content` map based on the state at the time of # the membership event. for room_id in rooms_ids_without_stripped_state: + # TODO: It would be nice to look this up in a bulk way (N+1 queries) room_state = await self.storage_controllers.state.get_state_at( room_id=room_id, stream_position=to_token.copy_and_replace( diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py index 1e94f68b292..8b09527e61d 100644 --- a/synapse/storage/databases/main/state.py +++ b/synapse/storage/databases/main/state.py @@ -42,7 +42,7 @@ import attr -from synapse.api.constants import EventContentFields, EventTypes, Membership +from synapse.api.constants import EventTypes, Membership from synapse.api.errors import NotFoundError, UnsupportedRoomVersionError from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersion from synapse.events import EventBase @@ -307,194 +307,6 @@ async def get_create_event_for_room(self, room_id: str) -> EventBase: create_event = await self.get_event(create_id) return create_event - @cached(max_entries=10000) - async def get_room_type(self, room_id: str) -> Optional[str]: - raise NotImplementedError() - - @cachedList(cached_method_name="get_room_type", list_name="room_ids") - async def bulk_get_room_type( - self, room_ids: Set[str] - ) -> Mapping[str, Union[Optional[str], Sentinel]]: - """ - Bulk fetch room types for the given rooms (via current state). - - Since this function is cached, any missing values would be cached as `None`. In - order to distinguish between an unencrypted room that has `None` encryption and - a room that is unknown to the server where we might want to omit the value - (which would make it cached as `None`), instead we use the sentinel value - `ROOM_UNKNOWN_SENTINEL`. - - Returns: - A mapping from room ID to the room's type (`None` is a valid room type). - Rooms unknown to this server will return `ROOM_UNKNOWN_SENTINEL`. - """ - - def txn( - txn: LoggingTransaction, - ) -> MutableMapping[str, Union[Optional[str], Sentinel]]: - clause, args = make_in_list_sql_clause( - txn.database_engine, "room_id", room_ids - ) - - # We can't rely on `room_stats_state.room_type` if the server has left the - # room because the `room_id` will still be in the table but everything will - # be set to `None` but `None` is a valid room type value. We join against - # the `room_stats_current` table which keeps track of the - # `current_state_events` count (and a proxy value `local_users_in_room` - # which can used to assume the server is participating in the room and has - # current state) to ensure that the data in `room_stats_state` is up-to-date - # with the current state. - # - # FIXME: Use `room_stats_current.current_state_events` instead of - # `room_stats_current.local_users_in_room` once - # https://github.com/element-hq/synapse/issues/17457 is fixed. - sql = f""" - SELECT room_id, room_type - FROM room_stats_state - INNER JOIN room_stats_current USING (room_id) - WHERE - {clause} - AND local_users_in_room > 0 - """ - - txn.execute(sql, args) - - room_id_to_type_map = {} - for row in txn: - room_id_to_type_map[row[0]] = row[1] - - return room_id_to_type_map - - results = await self.db_pool.runInteraction( - "bulk_get_room_type", - txn, - ) - - # If we haven't updated `room_stats_state` with the room yet, query the - # create events directly. This should happen only rarely so we don't - # mind if we do this in a loop. - for room_id in room_ids - results.keys(): - try: - create_event = await self.get_create_event_for_room(room_id) - room_type = create_event.content.get(EventContentFields.ROOM_TYPE) - results[room_id] = room_type - except NotFoundError: - # We use the sentinel value to distinguish between `None` which is a - # valid room type and a room that is unknown to the server so the value - # is just unset. - results[room_id] = ROOM_UNKNOWN_SENTINEL - - return results - - @cached(max_entries=10000) - async def get_room_encryption(self, room_id: str) -> Optional[str]: - raise NotImplementedError() - - @cachedList(cached_method_name="get_room_encryption", list_name="room_ids") - async def bulk_get_room_encryption( - self, room_ids: Set[str] - ) -> Mapping[str, Union[Optional[str], Sentinel]]: - """ - Bulk fetch room encryption for the given rooms (via current state). - - Since this function is cached, any missing values would be cached as `None`. In - order to distinguish between an unencrypted room that has `None` encryption and - a room that is unknown to the server where we might want to omit the value - (which would make it cached as `None`), instead we use the sentinel value - `ROOM_UNKNOWN_SENTINEL`. - - Returns: - A mapping from room ID to the room's encryption algorithm if the room is - encrypted, otherwise `None`. Rooms unknown to this server will return - `ROOM_UNKNOWN_SENTINEL`. - """ - - def txn( - txn: LoggingTransaction, - ) -> MutableMapping[str, Union[Optional[str], Sentinel]]: - clause, args = make_in_list_sql_clause( - txn.database_engine, "room_id", room_ids - ) - - # We can't rely on `room_stats_state.encryption` if the server has left the - # room because the `room_id` will still be in the table but everything will - # be set to `None` but `None` is a valid encryption value. We join against - # the `room_stats_current` table which keeps track of the - # `current_state_events` count (and a proxy value `local_users_in_room` - # which can used to assume the server is participating in the room and has - # current state) to ensure that the data in `room_stats_state` is up-to-date - # with the current state. - # - # FIXME: Use `room_stats_current.current_state_events` instead of - # `room_stats_current.local_users_in_room` once - # https://github.com/element-hq/synapse/issues/17457 is fixed. - sql = f""" - SELECT room_id, encryption - FROM room_stats_state - INNER JOIN room_stats_current USING (room_id) - WHERE - {clause} - AND local_users_in_room > 0 - """ - - txn.execute(sql, args) - - room_id_to_encryption_map = {} - for row in txn: - room_id_to_encryption_map[row[0]] = row[1] - - return room_id_to_encryption_map - - results = await self.db_pool.runInteraction( - "bulk_get_room_encryption", - txn, - ) - - # If we haven't updated `room_stats_state` with the room yet, query the state - # directly. This should happen only rarely so we don't mind if we do this in a - # loop. - encryption_event_ids: List[str] = [] - for room_id in room_ids - results.keys(): - state_map = await self.get_partial_filtered_current_state_ids( - room_id, - state_filter=StateFilter.from_types( - [ - (EventTypes.Create, ""), - (EventTypes.RoomEncryption, ""), - ] - ), - ) - # We can use the create event as a canary to tell whether the server has - # seen the room before - create_event_id = state_map.get((EventTypes.Create, "")) - encryption_event_id = state_map.get((EventTypes.RoomEncryption, "")) - - if create_event_id is None: - # We use the sentinel value to distinguish between `None` which is a - # valid room type and a room that is unknown to the server so the value - # is just unset. - results[room_id] = ROOM_UNKNOWN_SENTINEL - continue - - if encryption_event_id is None: - results[room_id] = None - else: - encryption_event_ids.append(encryption_event_id) - - encryption_event_map = await self.get_events(encryption_event_ids) - - for encryption_event_id in encryption_event_ids: - encryption_event = encryption_event_map.get(encryption_event_id) - # If the curent state says there is an encryption event, we should have it - # in the database. - assert encryption_event is not None - - results[encryption_event.room_id] = encryption_event.content.get( - EventContentFields.ENCRYPTION_ALGORITHM - ) - - return results - @cached(max_entries=10000) async def get_partial_current_state_id_for_event_type( self, room_id: str, event_type: str, state_key: str From ac7ca6702236d5d5a23a7065565025dc1e5caa7b Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 22 Jul 2024 19:59:56 -0500 Subject: [PATCH 39/47] Maybe current state in the future --- synapse/handlers/sliding_sync.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index a14a099cd21..cd9ae21b6cd 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -1325,6 +1325,8 @@ async def _bulk_get_partial_current_state_content_for_rooms_from_sync_room_map( # the membership event. for room_id in rooms_ids_without_stripped_state: # TODO: It would be nice to look this up in a bulk way (N+1 queries) + # + # TODO: `get_state_at(...)` doesn't take into account the "current state". room_state = await self.storage_controllers.state.get_state_at( room_id=room_id, stream_position=to_token.copy_and_replace( From 15e157ef5e001e9a1e2eba353d737f1963af5482 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 22 Jul 2024 20:43:22 -0500 Subject: [PATCH 40/47] Fix cache busting --- synapse/handlers/sliding_sync.py | 9 +- synapse/storage/_base.py | 6 +- synapse/storage/databases/main/cache.py | 26 ++--- synapse/storage/databases/main/state.py | 2 +- tests/rest/client/test_sync.py | 145 ++++++++++++++++++++++++ 5 files changed, 168 insertions(+), 20 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index cd9ae21b6cd..58e408adf77 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -1280,7 +1280,14 @@ async def _bulk_get_partial_current_state_content_for_rooms_from_sync_room_map( # the first person on our server to see the room. The best we can do is look # in the optional stripped state from the invite/knock event. room_ids_without_results = room_ids.difference( - chain(room_ids_with_results, room_id_to_stripped_state_map.keys()) + chain( + room_ids_with_results, + [ + room_id + for room_id, stripped_state_map in room_id_to_stripped_state_map.items() + if stripped_state_map is not None + ], + ) ) room_id_to_stripped_state_map.update( await self._bulk_get_stripped_state_for_rooms_from_sync_room_map( diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 060a8aa45e3..c4783e0ee8e 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -124,8 +124,7 @@ def _invalidate_state_caches( # Purge other caches based on room state. self._attempt_to_invalidate_cache("get_room_summary", (room_id,)) self._attempt_to_invalidate_cache("get_partial_current_state_ids", (room_id,)) - self._attempt_to_invalidate_cache("get_room_type", (room_id,)) - self._attempt_to_invalidate_cache("get_room_encryption", (room_id,)) + self._attempt_to_invalidate_cache("get_partial_current_state_id_for_event_type", (room_id,)) def _invalidate_state_caches_all(self, room_id: str) -> None: """Invalidates caches that are based on the current state, but does @@ -149,8 +148,7 @@ def _invalidate_state_caches_all(self, room_id: str) -> None: self._attempt_to_invalidate_cache("get_user_in_room_with_profile", None) self._attempt_to_invalidate_cache("get_rooms_for_user", None) self._attempt_to_invalidate_cache("get_room_summary", (room_id,)) - self._attempt_to_invalidate_cache("get_room_type", (room_id,)) - self._attempt_to_invalidate_cache("get_room_encryption", (room_id,)) + self._attempt_to_invalidate_cache("get_partial_current_state_id_for_event_type", (room_id,)) def _attempt_to_invalidate_cache( self, cache_name: str, key: Optional[Collection[Any]] diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py index 264b7c9bf4a..b6e613c4850 100644 --- a/synapse/storage/databases/main/cache.py +++ b/synapse/storage/databases/main/cache.py @@ -269,18 +269,15 @@ def _process_event_stream_row(self, token: int, row: EventsStreamRow) -> None: if data.type == EventTypes.Member: self.get_rooms_for_user.invalidate((data.state_key,)) # type: ignore[attr-defined] - elif data.type == EventTypes.RoomEncryption: - self.get_room_encryption.invalidate((data.room_id,)) # type: ignore[attr-defined] - elif data.type == EventTypes.Create: - self.get_room_type.invalidate((data.room_id,)) # type: ignore[attr-defined] + + self.get_partial_current_state_id_for_event_type.invalidate((data.room_id, data.type, data.state_key)) # type: ignore[attr-defined] elif row.type == EventsStreamAllStateRow.TypeId: assert isinstance(data, EventsStreamAllStateRow) # Similar to the above, but the entire caches are invalidated. This is # unfortunate for the membership caches, but should recover quickly. self._curr_state_delta_stream_cache.entity_has_changed(data.room_id, token) # type: ignore[attr-defined] self.get_rooms_for_user.invalidate_all() # type: ignore[attr-defined] - self.get_room_type.invalidate((data.room_id,)) # type: ignore[attr-defined] - self.get_room_encryption.invalidate((data.room_id,)) # type: ignore[attr-defined] + self.get_partial_current_state_id_for_event_type.invalidate((data.room_id,)) # type: ignore[attr-defined] else: raise Exception("Unknown events stream row type %s" % (row.type,)) @@ -308,6 +305,9 @@ def _invalidate_caches_for_event( self._attempt_to_invalidate_cache( "get_unread_event_push_actions_by_room_for_user", (room_id,) ) + self._attempt_to_invalidate_cache( + "get_partial_current_state_id_for_event_type", (room_id, etype, state_key) + ) # The `_get_membership_from_event_id` is immutable, except for the # case where we look up an event *before* persisting it. @@ -348,10 +348,6 @@ def _invalidate_caches_for_event( self._attempt_to_invalidate_cache( "get_forgotten_rooms_for_user", (state_key,) ) - elif etype == EventTypes.Create: - self._attempt_to_invalidate_cache("get_room_type", (room_id,)) - elif etype == EventTypes.RoomEncryption: - self._attempt_to_invalidate_cache("get_room_encryption", (room_id,)) if relates_to: self._attempt_to_invalidate_cache( @@ -409,8 +405,9 @@ def _invalidate_caches_for_room_events(self, room_id: str) -> None: self._attempt_to_invalidate_cache("get_thread_summary", None) self._attempt_to_invalidate_cache("get_thread_participated", None) self._attempt_to_invalidate_cache("get_threads", (room_id,)) - self._attempt_to_invalidate_cache("get_room_type", (room_id,)) - self._attempt_to_invalidate_cache("get_room_encryption", (room_id,)) + self._attempt_to_invalidate_cache( + "get_partial_current_state_id_for_event_type", (room_id,) + ) self._attempt_to_invalidate_cache("_get_state_group_for_event", None) @@ -463,8 +460,9 @@ def _invalidate_caches_for_room(self, room_id: str) -> None: self._attempt_to_invalidate_cache("get_forgotten_rooms_for_user", None) self._attempt_to_invalidate_cache("_get_membership_from_event_id", None) self._attempt_to_invalidate_cache("get_room_version_id", (room_id,)) - self._attempt_to_invalidate_cache("get_room_type", (room_id,)) - self._attempt_to_invalidate_cache("get_room_encryption", (room_id,)) + self._attempt_to_invalidate_cache( + "get_partial_current_state_id_for_event_type", (room_id,) + ) # And delete state caches. diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py index 8b09527e61d..2758b07de73 100644 --- a/synapse/storage/databases/main/state.py +++ b/synapse/storage/databases/main/state.py @@ -307,7 +307,7 @@ async def get_create_event_for_room(self, room_id: str) -> EventBase: create_event = await self.get_event(create_id) return create_event - @cached(max_entries=10000) + @cached(max_entries=10000, tree=True) async def get_partial_current_state_id_for_event_type( self, room_id: str, event_type: str, state_key: str ) -> Optional[str]: diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index f5d57e689cd..c19e0f6fa6c 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -36,6 +36,7 @@ Membership, ReceiptTypes, RelationTypes, + RoomTypes, ) from synapse.events import EventBase from synapse.handlers.sliding_sync import StateValues @@ -1627,6 +1628,150 @@ def test_filter_list(self) -> None: }, ) + def test_filter_regardless_of_membership_server_left_room(self) -> None: + """ + Test that filters apply to rooms regardless of membership. We're also + compounding the problem by having all of the local users leave the room causing + our server to leave the room. + + We want to make sure that if someone is filtering rooms, and leaves, you still + get that final update down sync that you left. + """ + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + # Create a normal room + room_id = self.helper.create_room_as(user1_id, tok=user2_tok) + self.helper.join(room_id, user1_id, tok=user1_tok) + + # Create an encrypted space room + space_room_id = self.helper.create_room_as( + user2_id, + tok=user2_tok, + extra_content={ + "creation_content": {EventContentFields.ROOM_TYPE: RoomTypes.SPACE} + }, + ) + self.helper.send_state( + space_room_id, + EventTypes.RoomEncryption, + {EventContentFields.ENCRYPTION_ALGORITHM: "m.megolm.v1.aes-sha2"}, + tok=user2_tok, + ) + self.helper.join(space_room_id, user1_id, tok=user1_tok) + + # Make an initial Sliding Sync request + channel = self.make_request( + "POST", + self.sync_endpoint, + { + "lists": { + "all-list": { + "ranges": [[0, 99]], + "required_state": [], + "timeline_limit": 0, + "filters": {}, + }, + "foo-list": { + "ranges": [[0, 99]], + "required_state": [], + "timeline_limit": 1, + "filters": { + "is_encrypted": True, + "room_types": [RoomTypes.SPACE], + }, + }, + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + from_token = channel.json_body["pos"] + + # Make sure the response has the lists we requested + self.assertListEqual( + list(channel.json_body["lists"].keys()), + ["all-list", "foo-list"], + channel.json_body["lists"].keys(), + ) + + # Make sure the lists have the correct rooms + self.assertListEqual( + list(channel.json_body["lists"]["all-list"]["ops"]), + [ + { + "op": "SYNC", + "range": [0, 99], + "room_ids": [space_room_id, room_id], + } + ], + ) + self.assertListEqual( + list(channel.json_body["lists"]["foo-list"]["ops"]), + [ + { + "op": "SYNC", + "range": [0, 99], + "room_ids": [space_room_id], + } + ], + ) + + # Everyone leaves the encrypted space room + self.helper.leave(space_room_id, user1_id, tok=user1_tok) + self.helper.leave(space_room_id, user2_id, tok=user2_tok) + + # Make an incremental Sliding Sync request + channel = self.make_request( + "POST", + self.sync_endpoint + f"?pos={from_token}", + { + "lists": { + "all-list": { + "ranges": [[0, 99]], + "required_state": [], + "timeline_limit": 0, + "filters": {}, + }, + "foo-list": { + "ranges": [[0, 99]], + "required_state": [], + "timeline_limit": 1, + "filters": { + "is_encrypted": True, + "room_types": [RoomTypes.SPACE], + }, + }, + } + }, + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + + # Make sure the lists have the correct rooms even though we `newly_left` + self.assertListEqual( + list(channel.json_body["lists"]["all-list"]["ops"]), + [ + { + "op": "SYNC", + "range": [0, 99], + "room_ids": [space_room_id, room_id], + } + ], + ) + self.assertListEqual( + list(channel.json_body["lists"]["foo-list"]["ops"]), + [ + { + "op": "SYNC", + "range": [0, 99], + "room_ids": [space_room_id], + } + ], + ) + def test_sort_list(self) -> None: """ Test that the `lists` are sorted by `stream_ordering` From 19968e1ed7b2d88b3ed5696afc4838a095bcb42a Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 22 Jul 2024 20:44:24 -0500 Subject: [PATCH 41/47] Fix lints --- synapse/storage/_base.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index c4783e0ee8e..ad6aa720c20 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -124,7 +124,9 @@ def _invalidate_state_caches( # Purge other caches based on room state. self._attempt_to_invalidate_cache("get_room_summary", (room_id,)) self._attempt_to_invalidate_cache("get_partial_current_state_ids", (room_id,)) - self._attempt_to_invalidate_cache("get_partial_current_state_id_for_event_type", (room_id,)) + self._attempt_to_invalidate_cache( + "get_partial_current_state_id_for_event_type", (room_id,) + ) def _invalidate_state_caches_all(self, room_id: str) -> None: """Invalidates caches that are based on the current state, but does @@ -148,7 +150,9 @@ def _invalidate_state_caches_all(self, room_id: str) -> None: self._attempt_to_invalidate_cache("get_user_in_room_with_profile", None) self._attempt_to_invalidate_cache("get_rooms_for_user", None) self._attempt_to_invalidate_cache("get_room_summary", (room_id,)) - self._attempt_to_invalidate_cache("get_partial_current_state_id_for_event_type", (room_id,)) + self._attempt_to_invalidate_cache( + "get_partial_current_state_id_for_event_type", (room_id,) + ) def _attempt_to_invalidate_cache( self, cache_name: str, key: Optional[Collection[Any]] From 8f5e4bf98bf0ed4790d16787420a5c865e3b07d4 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 22 Jul 2024 20:51:27 -0500 Subject: [PATCH 42/47] Shorten name --- synapse/handlers/sliding_sync.py | 42 +++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 58e408adf77..76c1eab1db5 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -1185,13 +1185,23 @@ async def _bulk_get_stripped_state_for_rooms_from_sync_room_map( return room_id_to_stripped_state_map - async def _bulk_get_partial_current_state_content_for_rooms_from_sync_room_map( + async def _bulk_get_partial_current_state_content_for_rooms( self, # These should be restricted to the possible stripped state events (see bulk # shortcut note below for more details). event_type: Literal[ # EventTypes.Create "m.room.create", + # EventTypes.Name + "m.room.name", + # EventTypes.RoomAvatar + "m.room.avatar", + # EventTypes.Topic + "m.room.topic", + # EventTypes.JoinRules + "m.room.join_rules", + # EventTypes.CanonicalAlias + "m.room.canonical_alias", # EventTypes.RoomEncryption "m.room.encryption", ], @@ -1418,12 +1428,14 @@ async def filter_rooms( # Filter for encrypted rooms if filters.is_encrypted is not None: - room_id_to_encrypted_content = await self._bulk_get_partial_current_state_content_for_rooms_from_sync_room_map( - event_type=EventTypes.RoomEncryption, - room_ids=filtered_room_id_set, - to_token=to_token, - sync_room_map=sync_room_map, - room_id_to_stripped_state_map=room_id_to_stripped_state_map, + room_id_to_encrypted_content = ( + await self._bulk_get_partial_current_state_content_for_rooms( + event_type=EventTypes.RoomEncryption, + room_ids=filtered_room_id_set, + to_token=to_token, + sync_room_map=sync_room_map, + room_id_to_stripped_state_map=room_id_to_stripped_state_map, + ) ) # Make a copy so we don't run into an error: `Set changed size during @@ -1466,12 +1478,14 @@ async def filter_rooms( # provided in the list. `None` is a valid type for rooms which do not have a # room type. if filters.room_types is not None or filters.not_room_types is not None: - room_id_to_create_content = await self._bulk_get_partial_current_state_content_for_rooms_from_sync_room_map( - event_type=EventTypes.Create, - room_ids=filtered_room_id_set, - to_token=to_token, - sync_room_map=sync_room_map, - room_id_to_stripped_state_map=room_id_to_stripped_state_map, + room_id_to_create_content = ( + await self._bulk_get_partial_current_state_content_for_rooms( + event_type=EventTypes.Create, + room_ids=filtered_room_id_set, + to_token=to_token, + sync_room_map=sync_room_map, + room_id_to_stripped_state_map=room_id_to_stripped_state_map, + ) ) # Make a copy so we don't run into an error: `Set changed size during @@ -1504,7 +1518,7 @@ async def filter_rooms( if filters.room_name_like is not None: # TODO: - # room_id_to_create_content = await self._bulk_get_partial_current_state_content_for_rooms_from_sync_room_map( + # room_id_to_create_content = await self._bulk_get_partial_current_state_content_for_rooms( # event_type=EventTypes.Name, # room_ids=filtered_room_id_set, # to_token=to_token, From 3c144b300a02c925864db31b4a87447edd9d89f8 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 22 Jul 2024 20:56:42 -0500 Subject: [PATCH 43/47] Reconsider all stripped events --- synapse/handlers/sliding_sync.py | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 76c1eab1db5..960ed1c9daf 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -1187,21 +1187,13 @@ async def _bulk_get_stripped_state_for_rooms_from_sync_room_map( async def _bulk_get_partial_current_state_content_for_rooms( self, - # These should be restricted to the possible stripped state events (see bulk - # shortcut note below for more details). + # These should be restricted to the possible stripped state events and + # non-sensitive details (see bulk shortcut note below for more details). For + # example, even though the room name/avatar/topic are stripped state, they seem + # a lot more senstive to leak the current state value of. event_type: Literal[ # EventTypes.Create "m.room.create", - # EventTypes.Name - "m.room.name", - # EventTypes.RoomAvatar - "m.room.avatar", - # EventTypes.Topic - "m.room.topic", - # EventTypes.JoinRules - "m.room.join_rules", - # EventTypes.CanonicalAlias - "m.room.canonical_alias", # EventTypes.RoomEncryption "m.room.encryption", ], @@ -1240,9 +1232,9 @@ async def _bulk_get_partial_current_state_content_for_rooms( # As a bulk shortcut, use the current state if the server is particpating in the # room (meaning we have current state). Ideally, for leave/ban rooms, we would # want the state at the time of the membership instead of current state to not - # leak anything but we consider the stripped state events to not be a secret - # given they are often set at the start of the room and they one of the stripped - # state events that is normally handed out on invite/knock. + # leak anything but we consider the create/encryption stripped state events to + # not be a secret given they are often set at the start of the room and they are + # normally handed out on invite/knock. # # Since this function is cached, we need to make a mutable copy via # `dict(...)`. @@ -1517,7 +1509,10 @@ async def filter_rooms( filtered_room_id_set.remove(room_id) if filters.room_name_like is not None: - # TODO: + # TODO: The room name is a bit more sensitive to leak than the + # create/encryption event. Maybe we should consider a better way to fetch + # historical state before implementing this. + # # room_id_to_create_content = await self._bulk_get_partial_current_state_content_for_rooms( # event_type=EventTypes.Name, # room_ids=filtered_room_id_set, From 1732b022a8f49c16f22999b5e3d71008fca6fbb3 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 23 Jul 2024 11:29:40 -0500 Subject: [PATCH 44/47] Prefer cache invalidation helper See https://github.com/element-hq/synapse/pull/17450#discussion_r1680120460 This allows us to get rid of the type ignore comments and standardize --- synapse/storage/databases/main/cache.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py index dd6ae4e9b7b..bc0976411d9 100644 --- a/synapse/storage/databases/main/cache.py +++ b/synapse/storage/databases/main/cache.py @@ -268,16 +268,23 @@ def _process_event_stream_row(self, token: int, row: EventsStreamRow) -> None: self._curr_state_delta_stream_cache.entity_has_changed(data.room_id, token) # type: ignore[attr-defined] if data.type == EventTypes.Member: - self.get_rooms_for_user.invalidate((data.state_key,)) # type: ignore[attr-defined] + self._attempt_to_invalidate_cache( + "get_rooms_for_user", (data.state_key,) + ) - self.get_partial_current_state_id_for_event_type.invalidate((data.room_id, data.type, data.state_key)) # type: ignore[attr-defined] + self._attempt_to_invalidate_cache( + "get_partial_current_state_id_for_event_type", + (data.room_id, data.type, data.state_key), + ) elif row.type == EventsStreamAllStateRow.TypeId: assert isinstance(data, EventsStreamAllStateRow) # Similar to the above, but the entire caches are invalidated. This is # unfortunate for the membership caches, but should recover quickly. self._curr_state_delta_stream_cache.entity_has_changed(data.room_id, token) # type: ignore[attr-defined] - self.get_rooms_for_user.invalidate_all() # type: ignore[attr-defined] - self.get_partial_current_state_id_for_event_type.invalidate((data.room_id,)) # type: ignore[attr-defined] + self._attempt_to_invalidate_cache("get_rooms_for_user", None) + self._attempt_to_invalidate_cache( + "get_partial_current_state_id_for_event_type", (data.room_id,) + ) else: raise Exception("Unknown events stream row type %s" % (row.type,)) From e9b76cc55c299327255d3f2ae0cd5a2796fc7738 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 25 Jul 2024 17:34:55 -0500 Subject: [PATCH 45/47] Go back to dedicated `room_stats_state` lookups See https://github.com/element-hq/synapse/pull/17450#discussion_r1689890966 --- synapse/handlers/sliding_sync.py | 109 +++++-------- synapse/storage/databases/main/state.py | 200 ++++++++++++++++++------ 2 files changed, 195 insertions(+), 114 deletions(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index fd22da8d2b7..81d0b22191c 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -59,8 +59,8 @@ from synapse.types import ( DeviceListUpdates, JsonDict, - MutableStateMap, JsonMapping, + MutableStateMap, PersistedEventPosition, Requester, RoomStreamToken, @@ -1206,15 +1206,11 @@ async def _bulk_get_stripped_state_for_rooms_from_sync_room_map( async def _bulk_get_partial_current_state_content_for_rooms( self, - # These should be restricted to the possible stripped state events and - # non-sensitive details (see bulk shortcut note below for more details). For - # example, even though the room name/avatar/topic are stripped state, they seem - # a lot more senstive to leak the current state value of. - event_type: Literal[ - # EventTypes.Create - "m.room.create", - # EventTypes.RoomEncryption - "m.room.encryption", + content_type: Literal[ + # `content.type` from `EventTypes.Create`` + "room_type", + # `content.algorithm` from `EventTypes.RoomEncryption` + "room_encryption", ], room_ids: Set[str], sync_room_map: Dict[str, _RoomMembershipForUser], @@ -1222,14 +1218,14 @@ async def _bulk_get_partial_current_state_content_for_rooms( room_id_to_stripped_state_map: Dict[ str, Optional[StateMap[StrippedStateEvent]] ], - ) -> Mapping[str, Union[Optional[Dict[str, Any]], StateSentinel]]: + ) -> Mapping[str, Union[Optional[str], StateSentinel]]: """ Get the given state event content for a list of rooms. First we check the current state of the room, then fallback to stripped state if available, then historical state. Args: - event_type: State event type to fetch (event_type, "") + content_type: Which content to grab room_ids: Room IDs to fetch the given content field for. sync_room_map: Dictionary of room IDs to sort along with membership information in the room at the time of `to_token`. @@ -1244,9 +1240,7 @@ async def _bulk_get_partial_current_state_content_for_rooms( the given state event (event_type, ""), otherwise `None`. Rooms unknown to this server will return `ROOM_UNKNOWN_SENTINEL`. """ - room_id_to_content: Dict[ - str, Union[Optional[Dict[str, Any]], StateSentinel] - ] = {} + room_id_to_content: Dict[str, Union[Optional[str], StateSentinel]] = {} # As a bulk shortcut, use the current state if the server is particpating in the # room (meaning we have current state). Ideally, for leave/ban rooms, we would @@ -1255,41 +1249,26 @@ async def _bulk_get_partial_current_state_content_for_rooms( # not be a secret given they are often set at the start of the room and they are # normally handed out on invite/knock. # + # Be mindful to only use this for non-sensitive details. For example, even + # though the room name/avatar/topic are also stripped state, they seem a lot + # more senstive to leak the current state value of. + # # Since this function is cached, we need to make a mutable copy via # `dict(...)`. - room_id_to_state_event_id = dict( - await self.store.bulk_get_partial_current_state_ids_for_event_type( - room_ids=room_ids, event_type=event_type, state_key="" + event_type = "" + event_content_field = "" + if content_type == "room_type": + event_type = EventTypes.Create + event_content_field = EventContentFields.ROOM_TYPE + room_id_to_content = dict(await self.store.bulk_get_room_type(room_ids)) + elif content_type == "room_encryption": + event_type = EventTypes.RoomEncryption + event_content_field = EventContentFields.ENCRYPTION_ALGORITHM + room_id_to_content = dict( + await self.store.bulk_get_room_encryption(room_ids) ) - ) - - # We need to fetch the full event to look at the content field. - state_events = await self.store.get_events( - [ - state_event_id - for state_event_id in room_id_to_state_event_id.values() - # Dont waste our time fetching `None` or ROOM_UNKNOWN_SENTINEL values - if isinstance(state_event_id, str) - ] - ) - for room_id in room_ids: - state_event_id = room_id_to_state_event_id.get(room_id) - if state_event_id is ROOM_UNKNOWN_SENTINEL: - room_id_to_content[room_id] = ROOM_UNKNOWN_SENTINEL - continue - elif state_event_id is None: - room_id_to_content[room_id] = None - continue - - # Given the above checks, we can assume `state_event_id` is a string now - assert isinstance(state_event_id, str) - - state_event = state_events.get(state_event_id) - # If the curent state says there is a state event, we should have it in - # the database. - assert state_event is not None - - room_id_to_content[room_id] = state_event.content + else: + raise AssertionError(f"Unexpected content type {content_type}") room_ids_with_results = [ room_id @@ -1337,7 +1316,9 @@ async def _bulk_get_partial_current_state_content_for_rooms( # Sanity check that we at-least have the create event if create_stripped_event is not None: if stripped_event is not None: - room_id_to_content[room_id] = stripped_event.content + room_id_to_content[room_id] = stripped_event.content.get( + event_content_field + ) else: # Didn't see the state event we're looking for in the stripped # state so we can assume relevant content field is `None`. @@ -1382,7 +1363,9 @@ async def _bulk_get_partial_current_state_content_for_rooms( continue if state_event is not None: - room_id_to_content[room_id] = state_event.content + room_id_to_content[room_id] = state_event.content.get( + event_content_field + ) else: # Didn't see the state event we're looking for in the stripped # state so we can assume relevant content field is `None`. @@ -1439,9 +1422,9 @@ async def filter_rooms( # Filter for encrypted rooms if filters.is_encrypted is not None: - room_id_to_encrypted_content = ( + room_id_to_encryption = ( await self._bulk_get_partial_current_state_content_for_rooms( - event_type=EventTypes.RoomEncryption, + content_type="room_encryption", room_ids=filtered_room_id_set, to_token=to_token, sync_room_map=sync_room_map, @@ -1452,18 +1435,16 @@ async def filter_rooms( # Make a copy so we don't run into an error: `Set changed size during # iteration`, when we filter out and remove items for room_id in filtered_room_id_set.copy(): - encrypted_content = room_id_to_encrypted_content.get( - room_id, ROOM_UNKNOWN_SENTINEL - ) + encryption = room_id_to_encryption.get(room_id, ROOM_UNKNOWN_SENTINEL) # Just remove rooms if we can't determine their encryption status - if encrypted_content is ROOM_UNKNOWN_SENTINEL: + if encryption is ROOM_UNKNOWN_SENTINEL: filtered_room_id_set.remove(room_id) continue # If we're looking for encrypted rooms, filter out rooms that are not # encrypted and vice versa - is_encrypted = encrypted_content is not None + is_encrypted = encryption is not None if (filters.is_encrypted and not is_encrypted) or ( not filters.is_encrypted and is_encrypted ): @@ -1489,9 +1470,9 @@ async def filter_rooms( # provided in the list. `None` is a valid type for rooms which do not have a # room type. if filters.room_types is not None or filters.not_room_types is not None: - room_id_to_create_content = ( + room_id_to_type = ( await self._bulk_get_partial_current_state_content_for_rooms( - event_type=EventTypes.Create, + content_type="room_type", room_ids=filtered_room_id_set, to_token=to_token, sync_room_map=sync_room_map, @@ -1502,19 +1483,13 @@ async def filter_rooms( # Make a copy so we don't run into an error: `Set changed size during # iteration`, when we filter out and remove items for room_id in filtered_room_id_set.copy(): - create_content = room_id_to_create_content.get( - room_id, ROOM_UNKNOWN_SENTINEL - ) + room_type = room_id_to_type.get(room_id, ROOM_UNKNOWN_SENTINEL) # Just remove rooms if we can't determine their type - if create_content is ROOM_UNKNOWN_SENTINEL: + if room_type is ROOM_UNKNOWN_SENTINEL: filtered_room_id_set.remove(room_id) continue - # We assume that the room is either unknown or has a create event - assert isinstance(create_content, dict) - - room_type = create_content.get(EventContentFields.ROOM_TYPE) if ( filters.room_types is not None and room_type not in filters.room_types @@ -1533,7 +1508,7 @@ async def filter_rooms( # historical state before implementing this. # # room_id_to_create_content = await self._bulk_get_partial_current_state_content_for_rooms( - # event_type=EventTypes.Name, + # content_type="room_name", # room_ids=filtered_room_id_set, # to_token=to_token, # sync_room_map=sync_room_map, diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py index 2758b07de73..62bc4600fb2 100644 --- a/synapse/storage/databases/main/state.py +++ b/synapse/storage/databases/main/state.py @@ -42,7 +42,7 @@ import attr -from synapse.api.constants import EventTypes, Membership +from synapse.api.constants import EventContentFields, EventTypes, Membership from synapse.api.errors import NotFoundError, UnsupportedRoomVersionError from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersion from synapse.events import EventBase @@ -307,31 +307,26 @@ async def get_create_event_for_room(self, room_id: str) -> EventBase: create_event = await self.get_event(create_id) return create_event - @cached(max_entries=10000, tree=True) - async def get_partial_current_state_id_for_event_type( - self, room_id: str, event_type: str, state_key: str - ) -> Optional[str]: + @cached(max_entries=10000) + async def get_room_type(self, room_id: str) -> Optional[str]: raise NotImplementedError() - @cachedList( - cached_method_name="get_partial_current_state_id_for_event_type", - list_name="room_ids", - ) - async def bulk_get_partial_current_state_ids_for_event_type( - self, room_ids: Set[str], event_type: str, state_key: str + @cachedList(cached_method_name="get_room_type", list_name="room_ids") + async def bulk_get_room_type( + self, room_ids: Set[str] ) -> Mapping[str, Union[Optional[str], Sentinel]]: """ - Bulk fetch a given (type, state_key) for the given rooms (via current state). + Bulk fetch room types for the given rooms (via current state). Since this function is cached, any missing values would be cached as `None`. In - order to distinguish between a room without the state and a room that is unknown - to the server where we might want to omit the value (which would make it cached - as `None`), instead we use the sentinel value `ROOM_UNKNOWN_SENTINEL`. + order to distinguish between an unencrypted room that has `None` encryption and + a room that is unknown to the server where we might want to omit the value + (which would make it cached as `None`), instead we use the sentinel value + `ROOM_UNKNOWN_SENTINEL`. Returns: - A mapping from room ID to the state event ID if the room has the state, - otherwise `None`. Rooms unknown to this server will return - `ROOM_UNKNOWN_SENTINEL`. + A mapping from room ID to the room's type (`None` is a valid room type). + Rooms unknown to this server will return `ROOM_UNKNOWN_SENTINEL`. """ def txn( @@ -341,52 +336,163 @@ def txn( txn.database_engine, "room_id", room_ids ) + # We can't rely on `room_stats_state.room_type` if the server has left the + # room because the `room_id` will still be in the table but everything will + # be set to `None` but `None` is a valid room type value. We join against + # the `room_stats_current` table which keeps track of the + # `current_state_events` count (and a proxy value `local_users_in_room` + # which can used to assume the server is participating in the room and has + # current state) to ensure that the data in `room_stats_state` is up-to-date + # with the current state. + # + # FIXME: Use `room_stats_current.current_state_events` instead of + # `room_stats_current.local_users_in_room` once + # https://github.com/element-hq/synapse/issues/17457 is fixed. sql = f""" - SELECT room_id, event_id, type - FROM current_state_events + SELECT room_id, room_type + FROM room_stats_state + INNER JOIN room_stats_current USING (room_id) WHERE {clause} - AND ( - (type = ? AND state_key = ?) - OR (type = ? AND state_key = ?) - ) + AND local_users_in_room > 0 """ - # We can use the create event as a canary to tell whether the server has - # seen the room before - txn.execute(sql, args + [EventTypes.Create, "", event_type, state_key]) + txn.execute(sql, args) - room_id_to_create_map: Dict[str, str] = {} - room_id_to_state_map: Dict[str, str] = {} + room_id_to_type_map = {} for row in txn: - room_id = row[0] - event_id = row[1] - event_type_from_db = row[2] + room_id_to_type_map[row[0]] = row[1] + + return room_id_to_type_map + + results = await self.db_pool.runInteraction( + "bulk_get_room_type", + txn, + ) + + # If we haven't updated `room_stats_state` with the room yet, query the + # create events directly. This should happen only rarely so we don't + # mind if we do this in a loop. + for room_id in room_ids - results.keys(): + try: + create_event = await self.get_create_event_for_room(room_id) + room_type = create_event.content.get(EventContentFields.ROOM_TYPE) + results[room_id] = room_type + except NotFoundError: + # We use the sentinel value to distinguish between `None` which is a + # valid room type and a room that is unknown to the server so the value + # is just unset. + results[room_id] = ROOM_UNKNOWN_SENTINEL - if event_type_from_db == EventTypes.Create: - room_id_to_create_map[room_id] = event_id + return results - if event_type_from_db == event_type: - room_id_to_state_map[room_id] = event_id + @cached(max_entries=10000) + async def get_room_encryption(self, room_id: str) -> Optional[str]: + raise NotImplementedError() - results: Dict[str, Union[Optional[str], Sentinel]] = {} - for room_id in room_ids: - if room_id_to_create_map.get(room_id) is None: - # We use the sentinel value to distinguish between `None` which is a - # valid room type and a room that is unknown to the server so the value - # is just unset. - results[room_id] = ROOM_UNKNOWN_SENTINEL - continue + @cachedList(cached_method_name="get_room_encryption", list_name="room_ids") + async def bulk_get_room_encryption( + self, room_ids: Set[str] + ) -> Mapping[str, Union[Optional[str], Sentinel]]: + """ + Bulk fetch room encryption for the given rooms (via current state). - results[room_id] = room_id_to_state_map.get(room_id) + Since this function is cached, any missing values would be cached as `None`. In + order to distinguish between an unencrypted room that has `None` encryption and + a room that is unknown to the server where we might want to omit the value + (which would make it cached as `None`), instead we use the sentinel value + `ROOM_UNKNOWN_SENTINEL`. - return results + Returns: + A mapping from room ID to the room's encryption algorithm if the room is + encrypted, otherwise `None`. Rooms unknown to this server will return + `ROOM_UNKNOWN_SENTINEL`. + """ + + def txn( + txn: LoggingTransaction, + ) -> MutableMapping[str, Union[Optional[str], Sentinel]]: + clause, args = make_in_list_sql_clause( + txn.database_engine, "room_id", room_ids + ) + + # We can't rely on `room_stats_state.encryption` if the server has left the + # room because the `room_id` will still be in the table but everything will + # be set to `None` but `None` is a valid encryption value. We join against + # the `room_stats_current` table which keeps track of the + # `current_state_events` count (and a proxy value `local_users_in_room` + # which can used to assume the server is participating in the room and has + # current state) to ensure that the data in `room_stats_state` is up-to-date + # with the current state. + # + # FIXME: Use `room_stats_current.current_state_events` instead of + # `room_stats_current.local_users_in_room` once + # https://github.com/element-hq/synapse/issues/17457 is fixed. + sql = f""" + SELECT room_id, encryption + FROM room_stats_state + INNER JOIN room_stats_current USING (room_id) + WHERE + {clause} + AND local_users_in_room > 0 + """ + + txn.execute(sql, args) + + room_id_to_encryption_map = {} + for row in txn: + room_id_to_encryption_map[row[0]] = row[1] + + return room_id_to_encryption_map results = await self.db_pool.runInteraction( - "bulk_get_partial_current_state_ids_for_event_type", + "bulk_get_room_encryption", txn, ) + # If we haven't updated `room_stats_state` with the room yet, query the state + # directly. This should happen only rarely so we don't mind if we do this in a + # loop. + encryption_event_ids: List[str] = [] + for room_id in room_ids - results.keys(): + state_map = await self.get_partial_filtered_current_state_ids( + room_id, + state_filter=StateFilter.from_types( + [ + (EventTypes.Create, ""), + (EventTypes.RoomEncryption, ""), + ] + ), + ) + # We can use the create event as a canary to tell whether the server has + # seen the room before + create_event_id = state_map.get((EventTypes.Create, "")) + encryption_event_id = state_map.get((EventTypes.RoomEncryption, "")) + + if create_event_id is None: + # We use the sentinel value to distinguish between `None` which is a + # valid room type and a room that is unknown to the server so the value + # is just unset. + results[room_id] = ROOM_UNKNOWN_SENTINEL + continue + + if encryption_event_id is None: + results[room_id] = None + else: + encryption_event_ids.append(encryption_event_id) + + encryption_event_map = await self.get_events(encryption_event_ids) + + for encryption_event_id in encryption_event_ids: + encryption_event = encryption_event_map.get(encryption_event_id) + # If the curent state says there is an encryption event, we should have it + # in the database. + assert encryption_event is not None + + results[encryption_event.room_id] = encryption_event.content.get( + EventContentFields.ENCRYPTION_ALGORITHM + ) + return results @cached(max_entries=100000, iterable=True) From 484007e360a5e2df75ebec90a9370961f509a777 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 25 Jul 2024 17:41:09 -0500 Subject: [PATCH 46/47] Update cache busting --- synapse/storage/_base.py | 10 +++----- synapse/storage/databases/main/cache.py | 33 ++++++++++++------------- 2 files changed, 20 insertions(+), 23 deletions(-) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index b67ace0b373..e12ab945767 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -127,9 +127,8 @@ def _invalidate_state_caches( # Purge other caches based on room state. self._attempt_to_invalidate_cache("get_room_summary", (room_id,)) self._attempt_to_invalidate_cache("get_partial_current_state_ids", (room_id,)) - self._attempt_to_invalidate_cache( - "get_partial_current_state_id_for_event_type", (room_id,) - ) + self._attempt_to_invalidate_cache("get_room_type", (room_id,)) + self._attempt_to_invalidate_cache("get_room_encryption", (room_id,)) def _invalidate_state_caches_all(self, room_id: str) -> None: """Invalidates caches that are based on the current state, but does @@ -156,9 +155,8 @@ def _invalidate_state_caches_all(self, room_id: str) -> None: "_get_rooms_for_local_user_where_membership_is_inner", None ) self._attempt_to_invalidate_cache("get_room_summary", (room_id,)) - self._attempt_to_invalidate_cache( - "get_partial_current_state_id_for_event_type", (room_id,) - ) + self._attempt_to_invalidate_cache("get_room_type", (room_id,)) + self._attempt_to_invalidate_cache("get_room_encryption", (room_id,)) def _attempt_to_invalidate_cache( self, cache_name: str, key: Optional[Collection[Any]] diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py index bc0976411d9..63624f3e8f3 100644 --- a/synapse/storage/databases/main/cache.py +++ b/synapse/storage/databases/main/cache.py @@ -271,20 +271,20 @@ def _process_event_stream_row(self, token: int, row: EventsStreamRow) -> None: self._attempt_to_invalidate_cache( "get_rooms_for_user", (data.state_key,) ) - - self._attempt_to_invalidate_cache( - "get_partial_current_state_id_for_event_type", - (data.room_id, data.type, data.state_key), - ) + elif data.type == EventTypes.RoomEncryption: + self._attempt_to_invalidate_cache( + "get_room_encryption", (data.room_id,) + ) + elif data.type == EventTypes.Create: + self._attempt_to_invalidate_cache("get_room_type", (data.room_id,)) elif row.type == EventsStreamAllStateRow.TypeId: assert isinstance(data, EventsStreamAllStateRow) # Similar to the above, but the entire caches are invalidated. This is # unfortunate for the membership caches, but should recover quickly. self._curr_state_delta_stream_cache.entity_has_changed(data.room_id, token) # type: ignore[attr-defined] self._attempt_to_invalidate_cache("get_rooms_for_user", None) - self._attempt_to_invalidate_cache( - "get_partial_current_state_id_for_event_type", (data.room_id,) - ) + self._attempt_to_invalidate_cache("get_room_type", (data.room_id,)) + self._attempt_to_invalidate_cache("get_room_encryption", (data.room_id,)) else: raise Exception("Unknown events stream row type %s" % (row.type,)) @@ -312,9 +312,6 @@ def _invalidate_caches_for_event( self._attempt_to_invalidate_cache( "get_unread_event_push_actions_by_room_for_user", (room_id,) ) - self._attempt_to_invalidate_cache( - "get_partial_current_state_id_for_event_type", (room_id, etype, state_key) - ) # The `_get_membership_from_event_id` is immutable, except for the # case where we look up an event *before* persisting it. @@ -358,6 +355,10 @@ def _invalidate_caches_for_event( self._attempt_to_invalidate_cache( "get_forgotten_rooms_for_user", (state_key,) ) + elif etype == EventTypes.Create: + self._attempt_to_invalidate_cache("get_room_type", (room_id,)) + elif etype == EventTypes.RoomEncryption: + self._attempt_to_invalidate_cache("get_room_encryption", (room_id,)) if relates_to: self._attempt_to_invalidate_cache( @@ -418,9 +419,8 @@ def _invalidate_caches_for_room_events(self, room_id: str) -> None: self._attempt_to_invalidate_cache("get_thread_summary", None) self._attempt_to_invalidate_cache("get_thread_participated", None) self._attempt_to_invalidate_cache("get_threads", (room_id,)) - self._attempt_to_invalidate_cache( - "get_partial_current_state_id_for_event_type", (room_id,) - ) + self._attempt_to_invalidate_cache("get_room_type", (room_id,)) + self._attempt_to_invalidate_cache("get_room_encryption", (room_id,)) self._attempt_to_invalidate_cache("_get_state_group_for_event", None) @@ -473,9 +473,8 @@ def _invalidate_caches_for_room(self, room_id: str) -> None: self._attempt_to_invalidate_cache("get_forgotten_rooms_for_user", None) self._attempt_to_invalidate_cache("_get_membership_from_event_id", None) self._attempt_to_invalidate_cache("get_room_version_id", (room_id,)) - self._attempt_to_invalidate_cache( - "get_partial_current_state_id_for_event_type", (room_id,) - ) + self._attempt_to_invalidate_cache("get_room_type", (room_id,)) + self._attempt_to_invalidate_cache("get_room_encryption", (room_id,)) # And delete state caches. From 7acb1ad4590f6a616ac661ba59ab510c188c30cb Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 30 Jul 2024 11:03:26 -0500 Subject: [PATCH 47/47] Prefer `assert_never` Co-authored-by: Erik Johnston --- synapse/handlers/sliding_sync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 81d0b22191c..7c0dd0e04c9 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -1268,7 +1268,7 @@ async def _bulk_get_partial_current_state_content_for_rooms( await self.store.bulk_get_room_encryption(room_ids) ) else: - raise AssertionError(f"Unexpected content type {content_type}") + assert_never(content_type) room_ids_with_results = [ room_id