Skip to content

Commit

Permalink
Improve D-Bus error handling for NetworkManager (#4720)
Browse files Browse the repository at this point in the history
* Improve D-Bus error handling for NetworkManager

There are quite some errors captured which are related by seemingly a
suddenly missing NetworkManager. Errors appear as:
23-11-21 17:42:50 ERROR (MainThread) [supervisor.dbus.network] Error while processing /org/freedesktop/NetworkManager/Devices/10: Remote peer disconnected
...
23-11-21 17:42:50 ERROR (MainThread) [supervisor.dbus.network] Error while processing /org/freedesktop/NetworkManager/Devices/35: The name is not activatable

Both errors seem to already happen at introspection time, however
the current code doesn't converts these errors to Supervisor issues.
This PR uses the already existing `DBus.from_dbus_error()`.

Furthermore this adds a new Exception `DBusNoReplyError` for the
`ErrorType.NO_REPLY` (or `org.freedesktop.DBus.Error.NoReply` in
D-Bus terms, which is the type of the first of the two issues above).

And finally it separates the `ErrorType.SERVICE_UNKNOWN` (or
`org.freedesktop.DBus.Error.ServiceUnknown` in D-Bus terms, which is
the second of the above issue) from `DBusInterfaceError` into a new
`DBusServiceUnkownError`.

This allows to handle errors more specifically.

To avoid too much churn, all instances where `DBusInterfaceError`
got handled, we are now also handling `DBusServiceUnkownError`.

The `DBusNoReplyError` and `DBusServiceUnkownError` appear when
the NetworkManager service stops or crashes. Instead of retrying
every interface we know, just give up if one of these issues appear.
This should significantly lower error messages users are seeing
and Sentry events.

* Remove unnecessary statement

* Fix pytests

* Make sure error strings are compared correctly

* Fix typo/remove unnecessary pylint exception

* Fix DBusError typing

* Add pytest for from_dbus_error

* Revert "Make sure error strings are compared correctly"

This reverts commit 10dc2e4.

* Add test cases

---------

Co-authored-by: Mike Degatano <[email protected]>
  • Loading branch information
agners and mdegat01 authored Nov 27, 2023
1 parent 172a705 commit 9088810
Show file tree
Hide file tree
Showing 23 changed files with 232 additions and 41 deletions.
4 changes: 2 additions & 2 deletions supervisor/dbus/agent/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from awesomeversion import AwesomeVersion
from dbus_fast.aio.message_bus import MessageBus

from ...exceptions import DBusError, DBusInterfaceError
from ...exceptions import DBusError, DBusInterfaceError, DBusServiceUnkownError
from ..const import (
DBUS_ATTR_DIAGNOSTICS,
DBUS_ATTR_VERSION,
Expand Down Expand Up @@ -99,7 +99,7 @@ async def connect(self, bus: MessageBus) -> None:
await asyncio.gather(*[dbus.connect(bus) for dbus in self.all])
except DBusError:
_LOGGER.warning("Can't connect to OS-Agent")
except DBusInterfaceError:
except (DBusServiceUnkownError, DBusInterfaceError):
_LOGGER.warning(
"No OS-Agent support on the host. Some Host functions have been disabled."
)
Expand Down
4 changes: 2 additions & 2 deletions supervisor/dbus/hostname.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from dbus_fast.aio.message_bus import MessageBus

from ..exceptions import DBusError, DBusInterfaceError
from ..exceptions import DBusError, DBusInterfaceError, DBusServiceUnkownError
from .const import (
DBUS_ATTR_CHASSIS,
DBUS_ATTR_DEPLOYMENT,
Expand Down Expand Up @@ -39,7 +39,7 @@ async def connect(self, bus: MessageBus):
await super().connect(bus)
except DBusError:
_LOGGER.warning("Can't connect to systemd-hostname")
except DBusInterfaceError:
except (DBusServiceUnkownError, DBusInterfaceError):
_LOGGER.warning(
"No hostname support on the host. Hostname functions have been disabled."
)
Expand Down
6 changes: 3 additions & 3 deletions supervisor/dbus/logind.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from dbus_fast.aio.message_bus import MessageBus

from ..exceptions import DBusError, DBusInterfaceError
from ..exceptions import DBusError, DBusInterfaceError, DBusServiceUnkownError
from .const import DBUS_NAME_LOGIND, DBUS_OBJECT_LOGIND
from .interface import DBusInterface
from .utils import dbus_connected
Expand All @@ -28,8 +28,8 @@ async def connect(self, bus: MessageBus):
await super().connect(bus)
except DBusError:
_LOGGER.warning("Can't connect to systemd-logind")
except DBusInterfaceError:
_LOGGER.info("No systemd-logind support on the host.")
except (DBusServiceUnkownError, DBusInterfaceError):
_LOGGER.warning("No systemd-logind support on the host.")

@dbus_connected
async def reboot(self) -> None:
Expand Down
20 changes: 18 additions & 2 deletions supervisor/dbus/network/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
DBusError,
DBusFatalError,
DBusInterfaceError,
DBusNoReplyError,
DBusServiceUnkownError,
HostNotSupportedError,
NetworkInterfaceNotFound,
)
Expand Down Expand Up @@ -143,7 +145,7 @@ async def connect(self, bus: MessageBus) -> None:
await self.settings.connect(bus)
except DBusError:
_LOGGER.warning("Can't connect to Network Manager")
except DBusInterfaceError:
except (DBusServiceUnkownError, DBusInterfaceError):
_LOGGER.warning(
"No Network Manager support on the host. Local network functions have been disabled."
)
Expand Down Expand Up @@ -210,8 +212,22 @@ async def update(self, changed: dict[str, Any] | None = None) -> None:
# try to query it. Ignore those cases.
_LOGGER.debug("Can't process %s: %s", device, err)
continue
except (
DBusNoReplyError,
DBusServiceUnkownError,
) as err:
# This typically means that NetworkManager disappeared. Give up immeaditly.
_LOGGER.error(
"NetworkManager not responding while processing %s: %s. Giving up.",
device,
err,
)
capture_exception(err)
return
except Exception as err: # pylint: disable=broad-except
_LOGGER.exception("Error while processing %s: %s", device, err)
_LOGGER.exception(
"Unkown error while processing %s: %s", device, err
)
capture_exception(err)
continue

Expand Down
4 changes: 2 additions & 2 deletions supervisor/dbus/network/dns.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
ATTR_PRIORITY,
ATTR_VPN,
)
from ...exceptions import DBusError, DBusInterfaceError
from ...exceptions import DBusError, DBusInterfaceError, DBusServiceUnkownError
from ..const import (
DBUS_ATTR_CONFIGURATION,
DBUS_ATTR_MODE,
Expand Down Expand Up @@ -67,7 +67,7 @@ async def connect(self, bus: MessageBus) -> None:
await super().connect(bus)
except DBusError:
_LOGGER.warning("Can't connect to DnsManager")
except DBusInterfaceError:
except (DBusServiceUnkownError, DBusInterfaceError):
_LOGGER.warning(
"No DnsManager support on the host. Local DNS functions have been disabled."
)
Expand Down
4 changes: 2 additions & 2 deletions supervisor/dbus/network/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from dbus_fast.aio.message_bus import MessageBus

from ...exceptions import DBusError, DBusInterfaceError
from ...exceptions import DBusError, DBusInterfaceError, DBusServiceUnkownError
from ..const import DBUS_NAME_NM, DBUS_OBJECT_SETTINGS
from ..interface import DBusInterface
from ..network.setting import NetworkSetting
Expand All @@ -28,7 +28,7 @@ async def connect(self, bus: MessageBus) -> None:
await super().connect(bus)
except DBusError:
_LOGGER.warning("Can't connect to Network Manager Settings")
except DBusInterfaceError:
except (DBusServiceUnkownError, DBusInterfaceError):
_LOGGER.warning(
"No Network Manager Settings support on the host. Local network functions have been disabled."
)
Expand Down
4 changes: 2 additions & 2 deletions supervisor/dbus/rauc.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from dbus_fast.aio.message_bus import MessageBus

from ..exceptions import DBusError, DBusInterfaceError
from ..exceptions import DBusError, DBusInterfaceError, DBusServiceUnkownError
from ..utils.dbus import DBusSignalWrapper
from .const import (
DBUS_ATTR_BOOT_SLOT,
Expand Down Expand Up @@ -49,7 +49,7 @@ async def connect(self, bus: MessageBus):
await super().connect(bus)
except DBusError:
_LOGGER.warning("Can't connect to rauc")
except DBusInterfaceError:
except (DBusServiceUnkownError, DBusInterfaceError):
_LOGGER.warning("Host has no rauc support. OTA updates have been disabled.")

@property
Expand Down
4 changes: 2 additions & 2 deletions supervisor/dbus/resolved.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from dbus_fast.aio.message_bus import MessageBus

from ..exceptions import DBusError, DBusInterfaceError
from ..exceptions import DBusError, DBusInterfaceError, DBusServiceUnkownError
from .const import (
DBUS_ATTR_CACHE_STATISTICS,
DBUS_ATTR_CURRENT_DNS_SERVER,
Expand Down Expand Up @@ -59,7 +59,7 @@ async def connect(self, bus: MessageBus):
await super().connect(bus)
except DBusError:
_LOGGER.warning("Can't connect to systemd-resolved.")
except DBusInterfaceError:
except (DBusServiceUnkownError, DBusInterfaceError):
_LOGGER.warning(
"Host has no systemd-resolved support. DNS will not work correctly."
)
Expand Down
3 changes: 2 additions & 1 deletion supervisor/dbus/systemd.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
DBusError,
DBusFatalError,
DBusInterfaceError,
DBusServiceUnkownError,
DBusSystemdNoSuchUnit,
)
from .const import (
Expand Down Expand Up @@ -86,7 +87,7 @@ async def connect(self, bus: MessageBus):
await super().connect(bus)
except DBusError:
_LOGGER.warning("Can't connect to systemd")
except DBusInterfaceError:
except (DBusServiceUnkownError, DBusInterfaceError):
_LOGGER.warning(
"No systemd support on the host. Host control has been disabled."
)
Expand Down
4 changes: 2 additions & 2 deletions supervisor/dbus/timedate.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from dbus_fast.aio.message_bus import MessageBus

from ..exceptions import DBusError, DBusInterfaceError
from ..exceptions import DBusError, DBusInterfaceError, DBusServiceUnkownError
from ..utils.dt import utc_from_timestamp
from .const import (
DBUS_ATTR_NTP,
Expand Down Expand Up @@ -63,7 +63,7 @@ async def connect(self, bus: MessageBus):
await super().connect(bus)
except DBusError:
_LOGGER.warning("Can't connect to systemd-timedate")
except DBusInterfaceError:
except (DBusServiceUnkownError, DBusInterfaceError):
_LOGGER.warning(
"No timedate support on the host. Time/Date functions have been disabled."
)
Expand Down
9 changes: 7 additions & 2 deletions supervisor/dbus/udisks2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,12 @@
from awesomeversion import AwesomeVersion
from dbus_fast.aio import MessageBus

from ...exceptions import DBusError, DBusInterfaceError, DBusObjectError
from ...exceptions import (
DBusError,
DBusInterfaceError,
DBusObjectError,
DBusServiceUnkownError,
)
from ..const import (
DBUS_ATTR_SUPPORTED_FILESYSTEMS,
DBUS_ATTR_VERSION,
Expand Down Expand Up @@ -45,7 +50,7 @@ async def connect(self, bus: MessageBus):
await super().connect(bus)
except DBusError:
_LOGGER.warning("Can't connect to udisks2")
except DBusInterfaceError:
except (DBusServiceUnkownError, DBusInterfaceError):
_LOGGER.warning(
"No udisks2 support on the host. Host control has been disabled."
)
Expand Down
8 changes: 8 additions & 0 deletions supervisor/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,10 @@ class DBusNotConnectedError(HostNotSupportedError):
"""D-Bus is not connected and call a method."""


class DBusServiceUnkownError(HassioNotSupportedError):
"""D-Bus service was not available."""


class DBusInterfaceError(HassioNotSupportedError):
"""D-Bus interface not connected."""

Expand Down Expand Up @@ -363,6 +367,10 @@ class DBusTimeoutError(DBusError):
"""D-Bus call timed out."""


class DBusNoReplyError(DBusError):
"""D-Bus remote didn't reply/disconnected."""


class DBusFatalError(DBusError):
"""D-Bus call going wrong.
Expand Down
17 changes: 13 additions & 4 deletions supervisor/utils/dbus.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,21 @@
)
from dbus_fast.aio.message_bus import MessageBus
from dbus_fast.aio.proxy_object import ProxyInterface, ProxyObject
from dbus_fast.errors import DBusError
from dbus_fast.errors import DBusError as DBusFastDBusError
from dbus_fast.introspection import Node

from ..exceptions import (
DBusError,
DBusFatalError,
DBusInterfaceError,
DBusInterfaceMethodError,
DBusInterfacePropertyError,
DBusInterfaceSignalError,
DBusNoReplyError,
DBusNotConnectedError,
DBusObjectError,
DBusParseError,
DBusServiceUnkownError,
DBusTimeoutError,
HassioNotSupportedError,
)
Expand Down Expand Up @@ -62,9 +65,11 @@ async def connect(bus: MessageBus, bus_name: str, object_path: str) -> DBus:
return self

@staticmethod
def from_dbus_error(err: DBusError) -> HassioNotSupportedError | DBusError:
def from_dbus_error(err: DBusFastDBusError) -> HassioNotSupportedError | DBusError:
"""Return correct dbus error based on type."""
if err.type in {ErrorType.SERVICE_UNKNOWN, ErrorType.UNKNOWN_INTERFACE}:
if err.type == ErrorType.SERVICE_UNKNOWN:
return DBusServiceUnkownError(err.text)
if err.type == ErrorType.UNKNOWN_INTERFACE:
return DBusInterfaceError(err.text)
if err.type in {
ErrorType.UNKNOWN_METHOD,
Expand All @@ -80,6 +85,8 @@ def from_dbus_error(err: DBusError) -> HassioNotSupportedError | DBusError:
return DBusNotConnectedError(err.text)
if err.type == ErrorType.TIMEOUT:
return DBusTimeoutError(err.text)
if err.type == ErrorType.NO_REPLY:
return DBusNoReplyError(err.text)
return DBusFatalError(err.text, type_=err.type)

@staticmethod
Expand All @@ -102,7 +109,7 @@ async def call_dbus(
*args, unpack_variants=True
)
return await getattr(proxy_interface, method)(*args)
except DBusError as err:
except DBusFastDBusError as err:
raise DBus.from_dbus_error(err)
except Exception as err: # pylint: disable=broad-except
capture_exception(err)
Expand All @@ -126,6 +133,8 @@ async def introspect(self) -> Node:
raise DBusParseError(
f"Can't parse introspect data: {err}", _LOGGER.error
) from err
except DBusFastDBusError as err:
raise DBus.from_dbus_error(err)
except (EOFError, TimeoutError):
_LOGGER.warning(
"Busy system at %s - %s", self.bus_name, self.object_path
Expand Down
36 changes: 35 additions & 1 deletion tests/dbus/agent/test_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@

from supervisor.dbus.agent import OSAgent

from tests.common import mock_dbus_services
from tests.dbus_service_mocks.base import DBusServiceMock
from tests.dbus_service_mocks.os_agent import OSAgent as OSAgentService


@pytest.fixture(name="os_agent_service", autouse=True)
@pytest.fixture(name="os_agent_service")
async def fixture_os_agent_service(
os_agent_services: dict[str, DBusServiceMock]
) -> OSAgentService:
Expand Down Expand Up @@ -39,3 +40,36 @@ async def test_dbus_osagent(
await os_agent_service.ping()
await os_agent_service.ping()
assert os_agent.diagnostics is True


@pytest.mark.parametrize(
"skip_service",
[
"os_agent",
"agent_apparmor",
"agent_datadisk",
],
)
async def test_dbus_osagent_connect_error(
skip_service: str, dbus_session_bus: MessageBus, caplog: pytest.LogCaptureFixture
):
"""Test OS Agent errors during connect."""
os_agent_services = {
"os_agent": None,
"agent_apparmor": None,
"agent_cgroup": None,
"agent_datadisk": None,
"agent_system": None,
"agent_boards": None,
"agent_boards_yellow": None,
}
os_agent_services.pop(skip_service)
await mock_dbus_services(
os_agent_services,
dbus_session_bus,
)

os_agent = OSAgent()
await os_agent.connect(dbus_session_bus)

assert "No OS-Agent support on the host" in caplog.text
11 changes: 10 additions & 1 deletion tests/dbus/network/test_dns.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from tests.dbus_service_mocks.network_dns_manager import DnsManager as DnsManagerService


@pytest.fixture(name="dns_manager_service", autouse=True)
@pytest.fixture(name="dns_manager_service")
async def fixture_dns_manager_service(
dbus_session_bus: MessageBus,
) -> DnsManagerService:
Expand Down Expand Up @@ -49,3 +49,12 @@ async def test_dns(
await dns_manager_service.ping()
await dns_manager_service.ping()
assert dns_manager.mode == "default"


async def test_dbus_dns_connect_error(
dbus_session_bus: MessageBus, caplog: pytest.LogCaptureFixture
):
"""Test connecting to dns error."""
dns_manager = NetworkManagerDNS()
await dns_manager.connect(dbus_session_bus)
assert "No DnsManager support on the host" in caplog.text
Loading

0 comments on commit 9088810

Please sign in to comment.