Skip to content

Commit

Permalink
Mark system as unhealthy on OSError Bad message errors (#4750)
Browse files Browse the repository at this point in the history
* Bad message error marks system as unhealthy

* Finish adding test cases for changes

* Rename test file for uniqueness

* bad_message to oserror_bad_message

* Omit some checks and check for network mounts
  • Loading branch information
mdegat01 authored Dec 21, 2023
1 parent b7ddfba commit 3cc6bd1
Show file tree
Hide file tree
Showing 24 changed files with 481 additions and 28 deletions.
4 changes: 4 additions & 0 deletions supervisor/addons/addon.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from collections.abc import Awaitable
from contextlib import suppress
from copy import deepcopy
import errno
from ipaddress import IPv4Address
import logging
from pathlib import Path, PurePath
Expand Down Expand Up @@ -72,6 +73,7 @@
from ..homeassistant.const import WSEvent, WSType
from ..jobs.const import JobExecutionLimit
from ..jobs.decorator import Job
from ..resolution.const import UnhealthyReason
from ..store.addon import AddonStore
from ..utils import check_port
from ..utils.apparmor import adjust_profile
Expand Down Expand Up @@ -793,6 +795,8 @@ def write_pulse(self) -> None:
try:
self.path_pulse.write_text(pulse_config, encoding="utf-8")
except OSError as err:
if err.errno == errno.EBADMSG:
self.sys_resolution.unhealthy = UnhealthyReason.OSERROR_BAD_MESSAGE
_LOGGER.error(
"Add-on %s can't write pulse/client.config: %s", self.slug, err
)
Expand Down
4 changes: 4 additions & 0 deletions supervisor/api/backups.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Backups RESTful API."""
import asyncio
import errno
import logging
from pathlib import Path
import re
Expand Down Expand Up @@ -36,6 +37,7 @@
from ..coresys import CoreSysAttributes
from ..exceptions import APIError
from ..mounts.const import MountUsage
from ..resolution.const import UnhealthyReason
from .const import CONTENT_TYPE_TAR
from .utils import api_process, api_validate

Expand Down Expand Up @@ -288,6 +290,8 @@ async def upload(self, request):
backup.write(chunk)

except OSError as err:
if err.errno == errno.EBADMSG:
self.sys_resolution.unhealthy = UnhealthyReason.OSERROR_BAD_MESSAGE
_LOGGER.error("Can't write new backup file: %s", err)
return False

Expand Down
36 changes: 23 additions & 13 deletions supervisor/backups/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import asyncio
from collections.abc import Awaitable, Iterable
import errno
import logging
from pathlib import Path

Expand All @@ -19,6 +20,7 @@
from ..jobs.decorator import Job
from ..jobs.job_group import JobGroup
from ..mounts.mount import Mount
from ..resolution.const import UnhealthyReason
from ..utils.common import FileConfiguration
from ..utils.dt import utcnow
from ..utils.sentinel import DEFAULT
Expand All @@ -31,18 +33,6 @@
_LOGGER: logging.Logger = logging.getLogger(__name__)


def _list_backup_files(path: Path) -> Iterable[Path]:
"""Return iterable of backup files, suppress and log OSError for network mounts."""
try:
# is_dir does a stat syscall which raises if the mount is down
if path.is_dir():
return path.glob("*.tar")
except OSError as err:
_LOGGER.error("Could not list backups from %s: %s", path.as_posix(), err)

return []


class BackupManager(FileConfiguration, JobGroup):
"""Manage backups."""

Expand Down Expand Up @@ -119,6 +109,19 @@ def _change_stage(
)
self.sys_jobs.current.stage = stage

def _list_backup_files(self, path: Path) -> Iterable[Path]:
"""Return iterable of backup files, suppress and log OSError for network mounts."""
try:
# is_dir does a stat syscall which raises if the mount is down
if path.is_dir():
return path.glob("*.tar")
except OSError as err:
if err.errno == errno.EBADMSG and path == self.sys_config.path_backup:
self.sys_resolution.unhealthy = UnhealthyReason.OSERROR_BAD_MESSAGE
_LOGGER.error("Could not list backups from %s: %s", path.as_posix(), err)

return []

def _create_backup(
self,
name: str,
Expand Down Expand Up @@ -169,7 +172,7 @@ async def _load_backup(tar_file):
tasks = [
self.sys_create_task(_load_backup(tar_file))
for path in self.backup_locations
for tar_file in _list_backup_files(path)
for tar_file in self._list_backup_files(path)
]

_LOGGER.info("Found %d backup files", len(tasks))
Expand All @@ -184,6 +187,11 @@ def remove(self, backup: Backup) -> bool:
_LOGGER.info("Removed backup file %s", backup.slug)

except OSError as err:
if (
err.errno == errno.EBADMSG
and backup.tarfile.parent == self.sys_config.path_backup
):
self.sys_resolution.unhealthy = UnhealthyReason.OSERROR_BAD_MESSAGE
_LOGGER.error("Can't remove backup %s: %s", backup.slug, err)
return False

Expand All @@ -208,6 +216,8 @@ async def import_backup(self, tar_file: Path) -> Backup | None:
backup.tarfile.rename(tar_origin)

except OSError as err:
if err.errno == errno.EBADMSG:
self.sys_resolution.unhealthy = UnhealthyReason.OSERROR_BAD_MESSAGE
_LOGGER.error("Can't move backup file to storage: %s", err)
return None

Expand Down
4 changes: 4 additions & 0 deletions supervisor/homeassistant/module.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Home Assistant control object."""
import asyncio
from datetime import timedelta
import errno
from ipaddress import IPv4Address
import logging
from pathlib import Path, PurePath
Expand Down Expand Up @@ -42,6 +43,7 @@
from ..hardware.const import PolicyGroup
from ..hardware.data import Device
from ..jobs.decorator import Job, JobExecutionLimit
from ..resolution.const import UnhealthyReason
from ..utils import remove_folder
from ..utils.common import FileConfiguration
from ..utils.json import read_json_file, write_json_file
Expand Down Expand Up @@ -300,6 +302,8 @@ def write_pulse(self):
try:
self.path_pulse.write_text(pulse_config, encoding="utf-8")
except OSError as err:
if err.errno == errno.EBADMSG:
self.sys_resolution.unhealthy = UnhealthyReason.OSERROR_BAD_MESSAGE
_LOGGER.error("Home Assistant can't write pulse/client.config: %s", err)
else:
_LOGGER.info("Update pulse/client.config: %s", self.path_pulse)
Expand Down
9 changes: 8 additions & 1 deletion supervisor/host/apparmor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""AppArmor control for host."""
from __future__ import annotations

import errno
import logging
from pathlib import Path
import shutil
Expand All @@ -9,7 +10,7 @@

from ..coresys import CoreSys, CoreSysAttributes
from ..exceptions import DBusError, HostAppArmorError
from ..resolution.const import UnsupportedReason
from ..resolution.const import UnhealthyReason, UnsupportedReason
from ..utils.apparmor import validate_profile
from .const import HostFeature

Expand Down Expand Up @@ -80,6 +81,8 @@ async def load_profile(self, profile_name: str, profile_file: Path) -> None:
try:
await self.sys_run_in_executor(shutil.copyfile, profile_file, dest_profile)
except OSError as err:
if err.errno == errno.EBADMSG:
self.sys_resolution.unhealthy = UnhealthyReason.OSERROR_BAD_MESSAGE
raise HostAppArmorError(
f"Can't copy {profile_file}: {err}", _LOGGER.error
) from err
Expand All @@ -103,6 +106,8 @@ async def remove_profile(self, profile_name: str) -> None:
try:
await self.sys_run_in_executor(profile_file.unlink)
except OSError as err:
if err.errno == errno.EBADMSG:
self.sys_resolution.unhealthy = UnhealthyReason.OSERROR_BAD_MESSAGE
raise HostAppArmorError(
f"Can't remove profile: {err}", _LOGGER.error
) from err
Expand All @@ -117,6 +122,8 @@ async def backup_profile(self, profile_name: str, backup_file: Path) -> None:
try:
await self.sys_run_in_executor(shutil.copy, profile_file, backup_file)
except OSError as err:
if err.errno == errno.EBADMSG:
self.sys_resolution.unhealthy = UnhealthyReason.OSERROR_BAD_MESSAGE
raise HostAppArmorError(
f"Can't backup profile {profile_name}: {err}", _LOGGER.error
) from err
Expand Down
4 changes: 4 additions & 0 deletions supervisor/os/manager.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""OS support on supervisor."""
from collections.abc import Awaitable
import errno
import logging
from pathlib import Path

Expand All @@ -13,6 +14,7 @@
from ..exceptions import DBusError, HassOSJobError, HassOSUpdateError
from ..jobs.const import JobCondition, JobExecutionLimit
from ..jobs.decorator import Job
from ..resolution.const import UnhealthyReason
from .data_disk import DataDisk

_LOGGER: logging.Logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -120,6 +122,8 @@ async def _download_raucb(self, url: str, raucb: Path) -> None:
) from err

except OSError as err:
if err.errno == errno.EBADMSG:
self.sys_resolution.unhealthy = UnhealthyReason.OSERROR_BAD_MESSAGE
raise HassOSUpdateError(
f"Can't write OTA file: {err!s}", _LOGGER.error
) from err
Expand Down
7 changes: 7 additions & 0 deletions supervisor/plugins/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"""
import asyncio
from contextlib import suppress
import errno
import logging
from pathlib import Path, PurePath
import shutil
Expand All @@ -25,6 +26,7 @@
)
from ..jobs.const import JobExecutionLimit
from ..jobs.decorator import Job
from ..resolution.const import UnhealthyReason
from ..utils.json import write_json_file
from ..utils.sentry import capture_exception
from .base import PluginBase
Expand Down Expand Up @@ -83,6 +85,9 @@ async def load(self) -> None:
PULSE_CLIENT_TMPL.read_text(encoding="utf-8")
)
except OSError as err:
if err.errno == errno.EBADMSG:
self.sys_resolution.unhealthy = UnhealthyReason.OSERROR_BAD_MESSAGE

_LOGGER.error("Can't read pulse-client.tmpl: %s", err)

await super().load()
Expand All @@ -93,6 +98,8 @@ async def load(self) -> None:
try:
shutil.copy(ASOUND_TMPL, asound)
except OSError as err:
if err.errno == errno.EBADMSG:
self.sys_resolution.unhealthy = UnhealthyReason.OSERROR_BAD_MESSAGE
_LOGGER.error("Can't create default asound: %s", err)

async def install(self) -> None:
Expand Down
17 changes: 16 additions & 1 deletion supervisor/plugins/dns.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"""
import asyncio
from contextlib import suppress
import errno
from ipaddress import IPv4Address
import logging
from pathlib import Path
Expand All @@ -29,7 +30,7 @@
)
from ..jobs.const import JobExecutionLimit
from ..jobs.decorator import Job
from ..resolution.const import ContextType, IssueType, SuggestionType
from ..resolution.const import ContextType, IssueType, SuggestionType, UnhealthyReason
from ..utils.json import write_json_file
from ..utils.sentry import capture_exception
from ..validate import dns_url
Expand Down Expand Up @@ -146,12 +147,16 @@ async def load(self) -> None:
RESOLV_TMPL.read_text(encoding="utf-8")
)
except OSError as err:
if err.errno == errno.EBADMSG:
self.sys_resolution.unhealthy = UnhealthyReason.OSERROR_BAD_MESSAGE
_LOGGER.error("Can't read resolve.tmpl: %s", err)
try:
self.hosts_template = jinja2.Template(
HOSTS_TMPL.read_text(encoding="utf-8")
)
except OSError as err:
if err.errno == errno.EBADMSG:
self.sys_resolution.unhealthy = UnhealthyReason.OSERROR_BAD_MESSAGE
_LOGGER.error("Can't read hosts.tmpl: %s", err)

await self._init_hosts()
Expand Down Expand Up @@ -364,6 +369,8 @@ async def write_hosts(self) -> None:
self.hosts.write_text, data, encoding="utf-8"
)
except OSError as err:
if err.errno == errno.EBADMSG:
self.sys_resolution.unhealthy = UnhealthyReason.OSERROR_BAD_MESSAGE
raise CoreDNSError(f"Can't update hosts: {err}", _LOGGER.error) from err

async def add_host(
Expand Down Expand Up @@ -436,6 +443,12 @@ async def repair(self) -> None:

def _write_resolv(self, resolv_conf: Path) -> None:
"""Update/Write resolv.conf file."""
if not self.resolv_template:
_LOGGER.warning(
"Resolv template is missing, cannot write/update %s", resolv_conf
)
return

nameservers = [str(self.sys_docker.network.dns), "127.0.0.11"]

# Read resolv config
Expand All @@ -445,6 +458,8 @@ def _write_resolv(self, resolv_conf: Path) -> None:
try:
resolv_conf.write_text(data)
except OSError as err:
if err.errno == errno.EBADMSG:
self.sys_resolution.unhealthy = UnhealthyReason.OSERROR_BAD_MESSAGE
_LOGGER.warning("Can't write/update %s: %s", resolv_conf, err)
return

Expand Down
3 changes: 2 additions & 1 deletion supervisor/resolution/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,10 @@ class UnhealthyReason(StrEnum):
"""Reasons for unsupported status."""

DOCKER = "docker"
OSERROR_BAD_MESSAGE = "oserror_bad_message"
PRIVILEGED = "privileged"
SUPERVISOR = "supervisor"
SETUP = "setup"
PRIVILEGED = "privileged"
UNTRUSTED = "untrusted"


Expand Down
6 changes: 5 additions & 1 deletion supervisor/resolution/evaluations/source_mods.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
"""Evaluation class for Content Trust."""
import errno
import logging
from pathlib import Path

from ...const import CoreState
from ...coresys import CoreSys
from ...exceptions import CodeNotaryError, CodeNotaryUntrusted
from ...utils.codenotary import calc_checksum_path_sourcecode
from ..const import ContextType, IssueType, UnsupportedReason
from ..const import ContextType, IssueType, UnhealthyReason, UnsupportedReason
from .base import EvaluateBase

_SUPERVISOR_SOURCE = Path("/usr/src/supervisor/supervisor")
Expand Down Expand Up @@ -48,6 +49,9 @@ async def evaluate(self) -> bool:
calc_checksum_path_sourcecode, _SUPERVISOR_SOURCE
)
except OSError as err:
if err.errno == errno.EBADMSG:
self.sys_resolution.unhealthy = UnhealthyReason.OSERROR_BAD_MESSAGE

self.sys_resolution.create_issue(
IssueType.CORRUPT_FILESYSTEM, ContextType.SYSTEM
)
Expand Down
7 changes: 5 additions & 2 deletions supervisor/store/data.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Init file for Supervisor add-on data."""
from dataclasses import dataclass
import errno
import logging
from pathlib import Path
from typing import Any
Expand All @@ -19,7 +20,7 @@
)
from ..coresys import CoreSys, CoreSysAttributes
from ..exceptions import ConfigurationFileError
from ..resolution.const import ContextType, IssueType, SuggestionType
from ..resolution.const import ContextType, IssueType, SuggestionType, UnhealthyReason
from ..utils.common import find_one_filetype, read_json_or_yaml_file
from ..utils.json import read_json_file
from .const import StoreType
Expand Down Expand Up @@ -157,7 +158,9 @@ def _get_addons_list() -> list[Path]:
addon_list = await self.sys_run_in_executor(_get_addons_list)
except OSError as err:
suggestion = None
if path.stem != StoreType.LOCAL:
if err.errno == errno.EBADMSG:
self.sys_resolution.unhealthy = UnhealthyReason.OSERROR_BAD_MESSAGE
elif path.stem != StoreType.LOCAL:
suggestion = [SuggestionType.EXECUTE_RESET]
self.sys_resolution.create_issue(
IssueType.CORRUPT_REPOSITORY,
Expand Down
Loading

0 comments on commit 3cc6bd1

Please sign in to comment.