From e010e5184657dac5706005739ae5a7e8ff6ddb39 Mon Sep 17 00:00:00 2001 From: Julian <14220769+Qup42@users.noreply.github.com> Date: Mon, 28 Oct 2024 16:18:36 +0100 Subject: [PATCH] Add `system-info` command to collect infos for troubleshooting (#65) Add command `system-info` that outputs information about the system where the command is run and the content of the Qleverfile. This can be used to append to issue reports. --- src/qlever/commands/system_info.py | 126 +++++++++++++++++++++++++++++ src/qlever/containerize.py | 100 +++++++++++++++-------- src/qlever/util.py | 85 ++++++++++++------- 3 files changed, 248 insertions(+), 63 deletions(-) create mode 100644 src/qlever/commands/system_info.py diff --git a/src/qlever/commands/system_info.py b/src/qlever/commands/system_info.py new file mode 100644 index 00000000..0d1ed167 --- /dev/null +++ b/src/qlever/commands/system_info.py @@ -0,0 +1,126 @@ +from __future__ import annotations + +import platform +from importlib.metadata import version +from pathlib import Path + +import psutil + +from qlever.command import QleverCommand +from qlever.containerize import Containerize +from qlever.log import log +from qlever.util import format_size, run_command + + +def show_heading(text: str) -> str: + log.info(text) + log.info("-" * len(text)) + log.info("") + + +def get_partition(dir: Path): + """ + Returns the partition on which `dir` resides. May return None. + """ + # The first partition that whose mountpoint is a parent of `dir` is + # returned. Sort the partitions by the length of the mountpoint to ensure + # that the result is correct. Assume there are partitions with mountpoint + # `/` and `/home`. This ensures that `/home/foo` is detected as being in + # the partition with mountpoint `/home`. + partitions = sorted( + psutil.disk_partitions(), + key=lambda part: len(part.mountpoint), + reverse=True, + ) + for partition in partitions: + if dir.is_relative_to(partition.mountpoint): + return partition + return None + + +class SystemInfoCommand(QleverCommand): + def __init__(self): + pass + + def description(self) -> str: + return "Gather some system info to help with troubleshooting" + + def should_have_qleverfile(self) -> bool: + return True + + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return {"runtime": ["system", "image", "server_container"]} + + def additional_arguments(self, subparser) -> None: + pass + + def execute(self, args) -> bool: + # Say what the command is doing. + self.show("Show system information and Qleverfile", only_show=args.show) + if args.show: + return False + + # Show system information. + show_heading("System Information") + system = platform.system() + is_linux = system == "Linux" + is_mac = system == "Darwin" + is_windows = system == "Windows" + if is_windows: + log.warn("Only limited information is gathered on Windows.") + log.info(f"Version: {version('qlever')} (qlever --version)") + if is_linux: + info = platform.freedesktop_os_release() + log.info(f"OS: {platform.system()} ({info['PRETTY_NAME']})") + else: + log.info(f"OS: {platform.system()}") + log.info(f"Arch: {platform.machine()}") + log.info(f"Host: {platform.node()}") + psutil.virtual_memory().total / (1000**3) + memory_total = psutil.virtual_memory().total / (1024.0**3) + memory_available = psutil.virtual_memory().available / (1024.0**3) + log.info( + f"RAM: {memory_total:.1f} GB total, " f"{memory_available:.1f} GB available" + ) + num_cores = psutil.cpu_count(logical=False) + num_threads = psutil.cpu_count(logical=True) + cpu_freq = psutil.cpu_freq().max / 1000 + log.info( + f"CPU: {num_cores} Cores, " f"{num_threads} Threads @ {cpu_freq:.2f} GHz" + ) + + cwd = Path.cwd() + log.info(f"CWD: {cwd}") + # Free and total size of the partition on which the current working + # directory resides. + disk_usage = psutil.disk_usage(str(cwd)) + partition = get_partition(cwd) + partition_description = f"{partition.device} @ {partition.mountpoint}" + fs_type = partition.fstype + fs_free = format_size(disk_usage.free) + fs_total = format_size(disk_usage.total) + log.info( + f"Disk space in {partition_description} is " + f"({fs_type}): {fs_free} free / {fs_total} total" + ) + # User/Group on host and in container + if is_linux or is_mac: + user_info = run_command("id", return_output=True).strip() + log.info(f"User and group on host: {user_info}") + elif is_windows: + user_info = run_command("whoami /all", return_output=True).strip() + log.info(f"User and group on host: {user_info}") + if args.system in Containerize.supported_systems(): + user_info = Containerize.run_in_container("id", args).strip() + log.info(f"User and group in container: {user_info}") + + # Show Qleverfile. + log.info("") + show_heading("Contents of Qleverfile") + qleverfile = cwd / "Qleverfile" + if qleverfile.exists(): + # TODO: output the effective qlever file using primites from #57 + log.info(qleverfile.read_text()) + else: + log.info("No Qleverfile found") + return True diff --git a/src/qlever/containerize.py b/src/qlever/containerize.py index 9441ee99..42a8f13e 100644 --- a/src/qlever/containerize.py +++ b/src/qlever/containerize.py @@ -9,7 +9,7 @@ from typing import Optional from qlever.log import log -from qlever.util import run_command +from qlever.util import run_command, get_random_string class ContainerizeException(Exception): @@ -31,12 +31,16 @@ def supported_systems() -> list[str]: return ["docker", "podman"] @staticmethod - def containerize_command(cmd: str, container_system: str, - run_subcommand: str, - image_name: str, container_name: str, - volumes: list[tuple[str, str]] = [], - ports: list[tuple[int, int]] = [], - working_directory: Optional[str] = None) -> str: + def containerize_command( + cmd: str, + container_system: str, + run_subcommand: str, + image_name: str, + container_name: str, + volumes: list[tuple[str, str]] = [], + ports: list[tuple[int, int]] = [], + working_directory: Optional[str] = None, + ) -> str: """ Get the command to run `cmd` with the given `container_system` and the given options. @@ -45,8 +49,9 @@ def containerize_command(cmd: str, container_system: str, # Check that `container_system` is supported. if container_system not in Containerize.supported_systems(): return ContainerizeException( - f"Invalid container system \"{container_system}\"" - f" (must be one of {Containerize.supported_systems()})") + f'Invalid container system "{container_system}"' + f" (must be one of {Containerize.supported_systems()})" + ) # Set user and group ids. This is important so that the files created # by the containerized command are owned by the user running the @@ -62,37 +67,40 @@ def containerize_command(cmd: str, container_system: str, # dir. volume_options = "".join([f" -v {v1}:{v2}" for v1, v2 in volumes]) port_options = "".join([f" -p {p1}:{p2}" for p1, p2 in ports]) - working_directory_option = (f" -w {working_directory}" - if working_directory is not None else "") + working_directory_option = ( + f" -w {working_directory}" if working_directory is not None else "" + ) # Construct the command that runs `cmd` with the given container # system. - containerized_cmd = (f"{container_system} {run_subcommand}" - f"{user_option}" - f" -v /etc/localtime:/etc/localtime:ro" - f"{volume_options}" - f"{port_options}" - f"{working_directory_option}" - f" --init" - f" --entrypoint bash" - f" --name {container_name} {image_name}" - f" -c {shlex.quote(cmd)}") + containerized_cmd = ( + f"{container_system} {run_subcommand}" + f"{user_option}" + f" -v /etc/localtime:/etc/localtime:ro" + f"{volume_options}" + f"{port_options}" + f"{working_directory_option}" + f" --init" + f" --entrypoint bash" + f" --name {container_name} {image_name}" + f" -c {shlex.quote(cmd)}" + ) return containerized_cmd @staticmethod def is_running(container_system: str, container_name: str) -> bool: # Note: the `{{{{` and `}}}}` result in `{{` and `}}`, respectively. containers = ( - run_command(f"{container_system} ps --format=\"{{{{.Names}}}}\"", - return_output=True) + run_command( + f'{container_system} ps --format="{{{{.Names}}}}"', return_output=True + ) .strip() .splitlines() ) return container_name in containers @staticmethod - def stop_and_remove_container(container_system: str, - container_name: str) -> bool: + def stop_and_remove_container(container_system: str, container_name: str) -> bool: """ Stop the container with the given name using the given system. Return `True` if a container with that name was found and stopped, `False` @@ -102,19 +110,45 @@ def stop_and_remove_container(container_system: str, # Check that `container_system` is supported. if container_system not in Containerize.supported_systems(): return ContainerizeException( - f"Invalid container system \"{container_system}\"" - f" (must be one of {Containerize.supported_systems()})") + f'Invalid container system "{container_system}"' + f" (must be one of {Containerize.supported_systems()})" + ) # Construct the command that stops the container. - stop_cmd = f"{container_system} stop {container_name} && " \ - f"{container_system} rm {container_name}" + stop_cmd = ( + f"{container_system} stop {container_name} && " + f"{container_system} rm {container_name}" + ) # Run the command. try: - subprocess.run(stop_cmd, shell=True, check=True, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL) + subprocess.run( + stop_cmd, + shell=True, + check=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) return True except Exception as e: - log.debug(f"Error running \"{stop_cmd}\": {e}") + log.debug(f'Error running "{stop_cmd}": {e}') return False + + @staticmethod + def run_in_container(cmd: str, args) -> Optional[str]: + """ + Run an arbitrary command in the qlever container and return its output. + """ + if args.system in Containerize.supported_systems(): + if not args.server_container: + args.server_container = get_random_string(20) + run_cmd = Containerize().containerize_command( + cmd, + args.system, + 'run --rm -it --entrypoint "" ', + args.image, + args.server_container, + volumes=[("$(pwd)", "/index")], + working_directory="/index", + ) + return run_command(run_cmd, return_output=True) diff --git a/src/qlever/util.py b/src/qlever/util.py index e595fe08..d79a9413 100644 --- a/src/qlever/util.py +++ b/src/qlever/util.py @@ -29,8 +29,9 @@ def get_total_file_size(patterns: list[str]) -> int: return total_size -def run_command(cmd: str, return_output: bool = False, - show_output: bool = False) -> Optional[str]: +def run_command( + cmd: str, return_output: bool = False, show_output: bool = False +) -> Optional[str]: """ Run the given command and throw an exception if the exit code is non-zero. If `return_output` is `True`, return what the command wrote to `stdout`. @@ -45,7 +46,7 @@ def run_command(cmd: str, return_output: bool = False, "shell": True, "text": True, "stdout": None if show_output else subprocess.PIPE, - "stderr": subprocess.PIPE + "stderr": subprocess.PIPE, } result = subprocess.run(f"set -o pipefail; {cmd}", **subprocess_args) # If the exit code is non-zero, throw an exception. If something was @@ -63,17 +64,20 @@ def run_command(cmd: str, return_output: bool = False, raise Exception(result.stderr.replace("\n", " ").strip()) else: raise Exception( - f"Command failed with exit code {result.returncode}" - f" but nothing written to stderr") + f"Command failed with exit code {result.returncode}" + f" but nothing written to stderr" + ) # Optionally, return what was written to `stdout`. if return_output: return result.stdout -def run_curl_command(url: str, - headers: dict[str, str] = {}, - params: dict[str, str] = {}, - result_file: Optional[str] = None) -> str: +def run_curl_command( + url: str, + headers: dict[str, str] = {}, + params: dict[str, str] = {}, + result_file: Optional[str] = None, +) -> str: """ Run `curl` with the given `url`, `headers`, and `params`. If `result_file` is `None`, return the output, otherwise, write the output to the given file @@ -83,22 +87,29 @@ def run_curl_command(url: str, # Construct and run the `curl` command. default_result_file = "/tmp/qlever.curl.result" actual_result_file = result_file if result_file else default_result_file - curl_cmd = (f"curl -s -o \"{actual_result_file}\"" - f" -w \"%{{http_code}}\n\" {url}" - + "".join([f" -H \"{key}: {value}\"" - for key, value in headers.items()]) - + "".join([f" --data-urlencode {key}={shlex.quote(value)}" - for key, value in params.items()])) - result = subprocess.run(curl_cmd, shell=True, text=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + curl_cmd = ( + f'curl -s -o "{actual_result_file}"' + f' -w "%{{http_code}}\n" {url}' + + "".join([f' -H "{key}: {value}"' for key, value in headers.items()]) + + "".join( + [ + f" --data-urlencode {key}={shlex.quote(value)}" + for key, value in params.items() + ] + ) + ) + result = subprocess.run( + curl_cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) # Case 1: An error occurred, raise an exception. if result.returncode != 0: if len(result.stderr) > 0: raise Exception(result.stderr) else: - raise Exception(f"curl command failed with exit code " - f"{result.returncode}, stderr is empty") + raise Exception( + f"curl command failed with exit code " + f"{result.returncode}, stderr is empty" + ) # Case 2: Return output (read from `default_result_file`). if result_file is None: result_file_path = Path(default_result_file) @@ -117,9 +128,9 @@ def is_qlever_server_alive(port: str) -> bool: message = "from the qlever script".replace(" ", "%20") curl_cmd = f"curl -s http://localhost:{port}/ping?msg={message}" - exit_code = subprocess.call(curl_cmd, shell=True, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL) + exit_code = subprocess.call( + curl_cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL + ) return exit_code == 0 @@ -152,15 +163,15 @@ def show_table_line(pid, user, start_time, rss, cmdline): try: pinfo = psutil_process.as_dict( - attrs=['pid', 'username', 'create_time', - 'memory_info', 'cmdline']) + attrs=["pid", "username", "create_time", "memory_info", "cmdline"] + ) # Note: pinfo[`cmdline`] is `None` if the process is a zombie. - cmdline = " ".join(pinfo['cmdline'] or []) + cmdline = " ".join(pinfo["cmdline"] or []) if len(cmdline) == 0 or not re.search(cmdline_regex, cmdline): return False - pid = pinfo['pid'] - user = pinfo['username'] if pinfo['username'] else "" - start_time = datetime.fromtimestamp(pinfo['create_time']) + pid = pinfo["pid"] + user = pinfo["username"] if pinfo["username"] else "" + start_time = datetime.fromtimestamp(pinfo["create_time"]) if start_time.date() == date.today(): start_time = start_time.strftime("%H:%M") else: @@ -193,10 +204,24 @@ def is_port_used(port: int) -> bool: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # Ensure that the port is not blocked after the check. sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - sock.bind(('', port)) + sock.bind(("", port)) sock.close() return False except OSError as err: if err.errno != errno.EADDRINUSE: log.warning(f"Failed to determine if port is used: {err}") return True + + +def format_size(bytes, suffix="B"): + """ + Scale bytes to its proper format + e.g: + 1253656 => '1.20MB' + 1253656678 => '1.17GB' + """ + factor = 1024 + for unit in ["", "K", "M", "G", "T", "P"]: + if bytes < factor: + return f"{bytes:.2f} {unit}{suffix}" + bytes /= factor