Add system-info command to collect infos for troubleshooting (#65)

Add command `system-info` that outputs information about the system where the command is run and the content of the Qleverfile. This can be used to append to issue reports.
ad-freiburg · Oct 28, 2024 · e010e51 · e010e51
1 parent fb76e53
commit e010e51
Show file tree

Hide file tree

Showing 3 changed files with 248 additions and 63 deletions.
diff --git a/src/qlever/commands/system_info.py b/src/qlever/commands/system_info.py
@@ -0,0 +1,126 @@
+from __future__ import annotations
+
+import platform
+from importlib.metadata import version
+from pathlib import Path
+
+import psutil
+
+from qlever.command import QleverCommand
+from qlever.containerize import Containerize
+from qlever.log import log
+from qlever.util import format_size, run_command
+
+
+def show_heading(text: str) -> str:
+    log.info(text)
+    log.info("-" * len(text))
+    log.info("")
+
+
+def get_partition(dir: Path):
+    """
+    Returns the partition on which `dir` resides. May return None.
+    """
+    # The first partition that whose mountpoint is a parent of `dir` is
+    # returned. Sort the partitions by the length of the mountpoint to ensure
+    # that the result is correct. Assume there are partitions with mountpoint
+    # `/` and `/home`. This ensures that `/home/foo` is detected as being in
+    # the partition with mountpoint `/home`.
+    partitions = sorted(
+        psutil.disk_partitions(),
+        key=lambda part: len(part.mountpoint),
+        reverse=True,
+    )
+    for partition in partitions:
+        if dir.is_relative_to(partition.mountpoint):
+            return partition
+    return None
+
+
+class SystemInfoCommand(QleverCommand):
+    def __init__(self):
+        pass
+
+    def description(self) -> str:
+        return "Gather some system info to help with troubleshooting"
+
+    def should_have_qleverfile(self) -> bool:
+        return True
+
+    def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
+        return {"runtime": ["system", "image", "server_container"]}
+
+    def additional_arguments(self, subparser) -> None:
+        pass
+
+    def execute(self, args) -> bool:
+        # Say what the command is doing.
+        self.show("Show system information and Qleverfile", only_show=args.show)
+        if args.show:
+            return False
+
+        # Show system information.
+        show_heading("System Information")
+        system = platform.system()
+        is_linux = system == "Linux"
+        is_mac = system == "Darwin"
+        is_windows = system == "Windows"
+        if is_windows:
+            log.warn("Only limited information is gathered on Windows.")
+        log.info(f"Version: {version('qlever')} (qlever --version)")
+        if is_linux:
+            info = platform.freedesktop_os_release()
+            log.info(f"OS: {platform.system()} ({info['PRETTY_NAME']})")
+        else:
+            log.info(f"OS: {platform.system()}")
+        log.info(f"Arch: {platform.machine()}")
+        log.info(f"Host: {platform.node()}")
+        psutil.virtual_memory().total / (1000**3)
+        memory_total = psutil.virtual_memory().total / (1024.0**3)
+        memory_available = psutil.virtual_memory().available / (1024.0**3)
+        log.info(
+            f"RAM: {memory_total:.1f} GB total, " f"{memory_available:.1f} GB available"
+        )
+        num_cores = psutil.cpu_count(logical=False)
+        num_threads = psutil.cpu_count(logical=True)
+        cpu_freq = psutil.cpu_freq().max / 1000
+        log.info(
+            f"CPU: {num_cores} Cores, " f"{num_threads} Threads @ {cpu_freq:.2f} GHz"
+        )
+
+        cwd = Path.cwd()
+        log.info(f"CWD: {cwd}")
+        # Free and total size of the partition on which the current working
+        # directory resides.
+        disk_usage = psutil.disk_usage(str(cwd))
+        partition = get_partition(cwd)
+        partition_description = f"{partition.device} @ {partition.mountpoint}"
+        fs_type = partition.fstype
+        fs_free = format_size(disk_usage.free)
+        fs_total = format_size(disk_usage.total)
+        log.info(
+            f"Disk space in {partition_description} is "
+            f"({fs_type}): {fs_free} free / {fs_total} total"
+        )
+        # User/Group on host and in container
+        if is_linux or is_mac:
+            user_info = run_command("id", return_output=True).strip()
+            log.info(f"User and group on host: {user_info}")
+        elif is_windows:
+            user_info = run_command("whoami /all", return_output=True).strip()
+            log.info(f"User and group on host: {user_info}")
+        if args.system in Containerize.supported_systems():
+            user_info = Containerize.run_in_container("id", args).strip()
+            log.info(f"User and group in container: {user_info}")
+
+        # Show Qleverfile.
+        log.info("")
+        show_heading("Contents of Qleverfile")
+        qleverfile = cwd / "Qleverfile"
+        if qleverfile.exists():
+            # TODO: output the effective qlever file using primites from #57
+            log.info(qleverfile.read_text())
+        else:
+            log.info("No Qleverfile found")
+        return True
diff --git a/src/qlever/containerize.py b/src/qlever/containerize.py
@@ -9,7 +9,7 @@
 from typing import Optional
 
 from qlever.log import log
-from qlever.util import run_command
+from qlever.util import run_command, get_random_string
 
 
 class ContainerizeException(Exception):
@@ -31,12 +31,16 @@ def supported_systems() -> list[str]:
         return ["docker", "podman"]
 
     @staticmethod
-    def containerize_command(cmd: str, container_system: str,
-                             run_subcommand: str,
-                             image_name: str, container_name: str,
-                             volumes: list[tuple[str, str]] = [],
-                             ports: list[tuple[int, int]] = [],
-                             working_directory: Optional[str] = None) -> str:
+    def containerize_command(
+        cmd: str,
+        container_system: str,
+        run_subcommand: str,
+        image_name: str,
+        container_name: str,
+        volumes: list[tuple[str, str]] = [],
+        ports: list[tuple[int, int]] = [],
+        working_directory: Optional[str] = None,
+    ) -> str:
         """
         Get the command to run `cmd` with the given `container_system` and the
         given options.
@@ -45,8 +49,9 @@ def containerize_command(cmd: str, container_system: str,
         # Check that `container_system` is supported.
         if container_system not in Containerize.supported_systems():
             return ContainerizeException(
-                    f"Invalid container system \"{container_system}\""
-                    f" (must be one of {Containerize.supported_systems()})")
+                f'Invalid container system "{container_system}"'
+                f" (must be one of {Containerize.supported_systems()})"
+            )
 
         # Set user and group ids. This is important so that the files created
         # by the containerized command are owned by the user running the
@@ -62,37 +67,40 @@ def containerize_command(cmd: str, container_system: str,
         # dir.
         volume_options = "".join([f" -v {v1}:{v2}" for v1, v2 in volumes])
         port_options = "".join([f" -p {p1}:{p2}" for p1, p2 in ports])
-        working_directory_option = (f" -w {working_directory}"
-                                    if working_directory is not None else "")
+        working_directory_option = (
+            f" -w {working_directory}" if working_directory is not None else ""
+        )
 
         # Construct the command that runs `cmd` with the given container
         # system.
-        containerized_cmd = (f"{container_system} {run_subcommand}"
-                             f"{user_option}"
-                             f" -v /etc/localtime:/etc/localtime:ro"
-                             f"{volume_options}"
-                             f"{port_options}"
-                             f"{working_directory_option}"
-                             f" --init"
-                             f" --entrypoint bash"
-                             f" --name {container_name} {image_name}"
-                             f" -c {shlex.quote(cmd)}")
+        containerized_cmd = (
+            f"{container_system} {run_subcommand}"
+            f"{user_option}"
+            f" -v /etc/localtime:/etc/localtime:ro"
+            f"{volume_options}"
+            f"{port_options}"
+            f"{working_directory_option}"
+            f" --init"
+            f" --entrypoint bash"
+            f" --name {container_name} {image_name}"
+            f" -c {shlex.quote(cmd)}"
+        )
         return containerized_cmd
 
     @staticmethod
     def is_running(container_system: str, container_name: str) -> bool:
         # Note: the `{{{{` and `}}}}` result in `{{` and `}}`, respectively.
         containers = (
-            run_command(f"{container_system} ps --format=\"{{{{.Names}}}}\"",
-                        return_output=True)
+            run_command(
+                f'{container_system} ps --format="{{{{.Names}}}}"', return_output=True
+            )
             .strip()
             .splitlines()
         )
         return container_name in containers
 
     @staticmethod
-    def stop_and_remove_container(container_system: str,
-                                  container_name: str) -> bool:
+    def stop_and_remove_container(container_system: str, container_name: str) -> bool:
         """
         Stop the container with the given name using the given system. Return
         `True` if a container with that name was found and stopped, `False`
@@ -102,19 +110,45 @@ def stop_and_remove_container(container_system: str,
         # Check that `container_system` is supported.
         if container_system not in Containerize.supported_systems():
             return ContainerizeException(
-                    f"Invalid container system \"{container_system}\""
-                    f" (must be one of {Containerize.supported_systems()})")
+                f'Invalid container system "{container_system}"'
+                f" (must be one of {Containerize.supported_systems()})"
+            )
 
         # Construct the command that stops the container.
-        stop_cmd = f"{container_system} stop {container_name} && " \
-                   f"{container_system} rm {container_name}"
+        stop_cmd = (
+            f"{container_system} stop {container_name} && "
+            f"{container_system} rm {container_name}"
+        )
 
         # Run the command.
         try:
-            subprocess.run(stop_cmd, shell=True, check=True,
-                           stdout=subprocess.DEVNULL,
-                           stderr=subprocess.DEVNULL)
+            subprocess.run(
+                stop_cmd,
+                shell=True,
+                check=True,
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.DEVNULL,
+            )
             return True
         except Exception as e:
-            log.debug(f"Error running \"{stop_cmd}\": {e}")
+            log.debug(f'Error running "{stop_cmd}": {e}')
             return False
+
+    @staticmethod
+    def run_in_container(cmd: str, args) -> Optional[str]:
+        """
+        Run an arbitrary command in the qlever container and return its output.
+        """
+        if args.system in Containerize.supported_systems():
+            if not args.server_container:
+                args.server_container = get_random_string(20)
+            run_cmd = Containerize().containerize_command(
+                cmd,
+                args.system,
+                'run --rm -it --entrypoint "" ',
+                args.image,
+                args.server_container,
+                volumes=[("$(pwd)", "/index")],
+                working_directory="/index",
+            )
+            return run_command(run_cmd, return_output=True)