diff --git a/doc/requirements.txt b/doc/requirements.txt index a78a8086..4bd13474 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -1,5 +1,5 @@ -sphinx==3.4.3 +sphinx<6.0.0 sphinx-rtd-theme>=0.5.2 docutils>=0.14,<0.18 -Jinja2<3.1 urllib3<2 +jinja2<3.10 diff --git a/src/imp/Makefile.am b/src/imp/Makefile.am index a9b5e1e2..31ac9761 100644 --- a/src/imp/Makefile.am +++ b/src/imp/Makefile.am @@ -58,6 +58,10 @@ IMP_SOURCES = \ passwd.h \ pidinfo.c \ pidinfo.h \ + signals.c \ + signals.h \ + cgroup.c \ + cgroup.h \ kill.c \ run.c \ exec/user.h \ diff --git a/src/imp/cgroup.c b/src/imp/cgroup.c new file mode 100644 index 00000000..8591164c --- /dev/null +++ b/src/imp/cgroup.c @@ -0,0 +1,245 @@ +/************************************************************\ + * Copyright 2024 Lawrence Livermore National Security, LLC + * (c.f. AUTHORS, NOTICE.LLNS, COPYING) + * + * This file is part of the Flux resource manager framework. + * For details, see https://github.com/flux-framework. + * + * SPDX-License-Identifier: LGPL-3.0 +\************************************************************/ + +#if HAVE_CONFIG_H +#include "config.h" +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#ifdef HAVE_LINUX_MAGIC_H +#include +#endif +#ifndef TMPFS_MAGIC +#define TMPFS_MAGIC 0x01021994 /* from linux/magic.h */ +#endif +#ifndef CGROUP_SUPER_MAGIC +#define CGROUP_SUPER_MAGIC 0x27e0eb +#endif +#ifndef CGROUP2_SUPER_MAGIC +#define CGROUP2_SUPER_MAGIC 0x63677270 +#endif +#include + +#include "src/libutil/strlcpy.h" + +#include "cgroup.h" +#include "imp_log.h" + +static char *remove_leading_dotdot (char *relpath) +{ + while (strncmp (relpath, "/..", 3) == 0) + relpath += 3; + return relpath; +} + +/* + * Look up the current cgroup relative path from /proc/self/cgroup. + * + * If cgroup->unified is true, then look for the first entry where + * 'subsys' is an empty string. + * + * Otherwise, use the `name=systemd` line. + * + * See NOTES: /proc/[pid]/cgroup in cgroups(7). + */ +static int cgroup_init_path (struct cgroup_info *cgroup) +{ + int rc = -1; + int n; + FILE *fp; + size_t size = 0; + char *line = NULL; + int saved_errno; + + if (!(fp = fopen ("/proc/self/cgroup", "r"))) + return -1; + + while ((n = getline (&line, &size, fp)) >= 0) { + char *nl; + char *relpath = NULL; + char *subsys = strchr (line, ':'); + if ((nl = strchr (line, '\n'))) + *nl = '\0'; + if (subsys == NULL + || *(++subsys) == '\0' + || !(relpath = strchr (subsys, ':'))) + continue; + + /* Nullify subsys, relpath is already nul-terminated at newline */ + *(relpath++) = '\0'; + + /* Remove leading /.. in relpath. This could be due to cgroup + * mounted in a container. + */ + relpath = remove_leading_dotdot (relpath); + + /* If unified cgroups are being used, then stop when we find + * subsys="". Otherwise stop at subsys="name=systemd": + */ + if ((cgroup->unified && subsys[0] == '\0') + || (!cgroup->unified && strcmp (subsys, "name=systemd") == 0)) { + int len = sizeof (cgroup->path); + if (snprintf (cgroup->path, + len, + "%s%s", + cgroup->mount_dir, + relpath) < len) + rc = 0; + break; + } + } + if (rc < 0) + errno = ENOENT; + + saved_errno = errno; + free (line); + fclose (fp); + errno = saved_errno; + return rc; +} + +/* Determine if this system is using the unified (v2) or legacy (v1) + * cgroups hierarchy (See https://systemd.io/CGROUP_DELEGATION/) + * and mount point for systemd managed cgroups. + */ +static int cgroup_init_mount_dir_and_type (struct cgroup_info *cg) +{ + struct statfs fs; + + /* Assume unified unless we discover otherwise + */ + cg->unified = true; + + /* Check if either /sys/fs/cgroup or /sys/fs/cgroup/unified + * are mounted as type cgroup2. If so, use this as the mount dir + * (Note: these paths are guaranteed to fit in cg->mount_dir, so + * no need to check for truncation) + */ + (void) strlcpy (cg->mount_dir, "/sys/fs/cgroup", sizeof (cg->mount_dir)); + if (statfs (cg->mount_dir, &fs) < 0) + return -1; + + /* if cgroup2 fs mounted: unified hierarchy for all users of cgroupfs + */ + if (fs.f_type == CGROUP2_SUPER_MAGIC) + return 0; + + /* O/w, check if cgroup2 unified hierarchy mounted at + * /sys/fs/cgroup/unified + */ + (void) strlcpy (cg->mount_dir, + "/sys/fs/cgroup/unified", + sizeof (cg->mount_dir)); + if (statfs (cg->mount_dir, &fs) < 0) + return -1; + + if (fs.f_type == CGROUP2_SUPER_MAGIC) + return 0; + + /* O/w, if /sys/fs/cgroup is mounted as tmpfs, we need to check + * for /sys/fs/cgroup/systemd mounted as cgroupfs (legacy). + */ + if (fs.f_type == TMPFS_MAGIC) { + + (void) strlcpy (cg->mount_dir, + "/sys/fs/cgroup/systemd", + sizeof (cg->mount_dir)); + if (statfs (cg->mount_dir, &fs) == 0 + && fs.f_type == CGROUP_SUPER_MAGIC) { + cg->unified = false; + return 0; + } + } + + /* Unable to determine cgroup mount point and/or unified vs legacy */ + return -1; +} + +void cgroup_info_destroy (struct cgroup_info *cg) +{ + if (cg) { + int saved_errno = errno; + free (cg); + errno = saved_errno; + } +} + +struct cgroup_info *cgroup_info_create (void) +{ + struct cgroup_info *cgroup = calloc (1, sizeof (*cgroup)); + if (!cgroup) + return NULL; + + if (cgroup_init_mount_dir_and_type (cgroup) < 0 + || cgroup_init_path (cgroup) < 0) { + cgroup_info_destroy (cgroup); + return NULL; + } + /* Note: GNU basename(3) never modifies its argument. (_GNU_SOURCE + * is defined in config.h.) + */ + if (strncmp (basename (cgroup->path), "imp-shell", 9) == 0) + cgroup->use_cgroup_kill = true; + + return cgroup; +} + +int cgroup_kill (struct cgroup_info *cgroup, int sig) +{ + int count = 0; + int rc = 0; + int saved_errno = 0; + char path [PATH_MAX+14]; /* cgroup->path[PATH_MAX] + "/cgroup.procs" */ + FILE *fp; + unsigned long child; + pid_t current_pid = getpid (); + + /* Note: path is guaranteed to have enough space to append "/cgroup.procs" + */ + (void) snprintf (path, sizeof (path), "%s/cgroup.procs", cgroup->path); + + if (!(fp = fopen (path, "r"))) + return -1; + while (fscanf (fp, "%lu", &child) == 1) { + pid_t pid = child; + if (pid == current_pid) + continue; + if (kill (pid, sig) < 0) { + saved_errno = errno; + rc = -1; + imp_warn ("Failed to send signal %d to pid %lu", + sig, + child); + continue; + } + count++; + } + fclose (fp); + if (rc < 0 && count == 0) { + count = -1; + errno = saved_errno; + } + return count; +} + +/* vi: ts=4 sw=4 expandtab + */ diff --git a/src/imp/cgroup.h b/src/imp/cgroup.h new file mode 100644 index 00000000..0c9bdc5d --- /dev/null +++ b/src/imp/cgroup.h @@ -0,0 +1,30 @@ +/************************************************************\ + * Copyright 2024 Lawrence Livermore National Security, LLC + * (c.f. AUTHORS, NOTICE.LLNS, COPYING) + * + * This file is part of the Flux resource manager framework. + * For details, see https://github.com/flux-framework. + * + * SPDX-License-Identifier: LGPL-3.0 +\************************************************************/ + +#ifndef HAVE_IMP_CGROUP_H +#define HAVE_IMP_CGROUP_H 1 + +struct cgroup_info { + char mount_dir[PATH_MAX + 1]; + char path[PATH_MAX + 1]; + bool unified; + bool use_cgroup_kill; +}; + +struct cgroup_info *cgroup_info_create (void); + +void cgroup_info_destroy (struct cgroup_info *cgroup); + +/* Send signal to all pids (excluding the current pid) in the + * current cgroup. + */ +int cgroup_kill (struct cgroup_info *cgroup, int sig); + +#endif /* !HAVE_IMP_CGROUP_H */ diff --git a/src/imp/exec/exec.c b/src/imp/exec/exec.c index acb33ab2..005b38f0 100644 --- a/src/imp/exec/exec.c +++ b/src/imp/exec/exec.c @@ -46,6 +46,7 @@ #include "passwd.h" #include "user.h" #include "safe_popen.h" +#include "signals.h" #if HAVE_PAM #include "pam.h" @@ -67,8 +68,6 @@ struct imp_exec { int specsz; }; -static pid_t imp_child = (pid_t) -1; - extern const char *imp_get_security_config_pattern (void); extern int imp_get_security_flags (void); @@ -225,68 +224,10 @@ static void __attribute__((noreturn)) imp_exec (struct imp_exec *exec) imp_die (exit_code, "%s: %s", exec->shell, strerror (errno)); } -static void fwd_signal (int signal) -{ - if (imp_child > 0) - kill (imp_child, signal); -} - -/* Setup signal handlers in the IMP for common signals which - * we want to forward to any child process. - */ -static void setup_signal_forwarding (void) -{ - struct sigaction sa; - sigset_t mask; - int i; - int signals[] = { - SIGTERM, - SIGINT, - SIGHUP, - SIGCONT, - SIGALRM, - SIGWINCH, - SIGTTIN, - SIGTTOU, - }; - int nsignals = sizeof (signals) / sizeof (signals[0]); - - memset(&sa, 0, sizeof(sa)); - sa.sa_handler = fwd_signal; - sa.sa_flags = SA_RESTART; - sigemptyset(&sa.sa_mask); - - sigfillset (&mask); - for (i = 0; i < nsignals; i++) { - sigdelset (&mask, signals[i]); - if (sigaction(signals[i], &sa, NULL) < 0) - imp_warn ("sigaction (signal=%d): %s", - signals[i], - strerror (errno)); - } - if (sigprocmask (SIG_SETMASK, &mask, NULL) < 0) - imp_die (1, "failed to block signals: %s", strerror (errno)); -} - -static void sigblock_all (void) -{ - sigset_t mask; - sigfillset (&mask); - if (sigprocmask (SIG_SETMASK, &mask, NULL) < 0) - imp_die (1, "failed to block signals: %s", strerror (errno)); -} - -static void sigunblock_all (void) -{ - sigset_t mask; - sigemptyset (&mask); - if (sigprocmask (SIG_SETMASK, &mask, NULL) < 0) - imp_die (1, "failed to unblock signals: %s", strerror (errno)); -} - int imp_exec_privileged (struct imp_state *imp, struct kv *kv) { int status; + pid_t child; struct imp_exec *exec = imp_exec_create (imp); if (!exec) imp_die (1, "exec: failed to initialize state"); @@ -322,15 +263,17 @@ int imp_exec_privileged (struct imp_state *imp, struct kv *kv) } /* Block signals so parent IMP isn't unduly terminated */ - sigblock_all (); + imp_sigblock_all (); - if ((imp_child = fork ()) < 0) + if ((child = fork ()) < 0) imp_die (1, "exec: fork: %s", strerror (errno)); - if (imp_child == 0) { + imp_set_signal_child (child); + + if (child == 0) { /* unblock all signals */ - sigunblock_all (); + imp_sigunblock_all (); /* Irreversibly switch to user */ imp_switch_user (exec->user_pwd->pw_uid); @@ -342,10 +285,10 @@ int imp_exec_privileged (struct imp_state *imp, struct kv *kv) /* Ensure common signals received by this IMP are forwarded to * the child process */ - setup_signal_forwarding (); + imp_setup_signal_forwarding (imp); /* Parent: wait for child to exit */ - while (waitpid (imp_child, &status, 0) != imp_child) { + while (waitpid (child, &status, 0) != child) { if (errno != EINTR) imp_die (1, "waitpid: %s", strerror (errno)); } @@ -360,7 +303,7 @@ int imp_exec_privileged (struct imp_state *imp, struct kv *kv) if (WIFEXITED (status)) exit (WEXITSTATUS (status)); else if (WIFSIGNALED (status)) - exit (WTERMSIG (status) + 128); + imp_raise (WTERMSIG (status)); else exit (1); diff --git a/src/imp/imp.c b/src/imp/imp.c index 8463b1ae..23c9873c 100644 --- a/src/imp/imp.c +++ b/src/imp/imp.c @@ -59,6 +59,11 @@ int main (int argc, char *argv[]) if (!(imp.conf = imp_conf_load (imp_get_config_pattern ()))) imp_die (1, "Failed to load configuration"); + /* Get current IMP cgroup information: + */ + if (!(imp.cgroup = cgroup_info_create ())) + imp_die (1, "Failed to get current cgroup info"); + /* Audit subsystem initialization */ // Skip. diff --git a/src/imp/imp_state.h b/src/imp/imp_state.h index 9347968c..107028f0 100644 --- a/src/imp/imp_state.h +++ b/src/imp/imp_state.h @@ -13,12 +13,14 @@ #include "src/libutil/cf.h" #include "privsep.h" +#include "cgroup.h" struct imp_state { int argc; char **argv; /* cmdline arguments from main() */ cf_t *conf; /* IMP configuration */ privsep_t *ps; /* Privilege separation handle */ + struct cgroup_info *cgroup; }; #endif /* !HAVE_IMP_STATE_H */ diff --git a/src/imp/signals.c b/src/imp/signals.c new file mode 100644 index 00000000..d43ad79e --- /dev/null +++ b/src/imp/signals.c @@ -0,0 +1,123 @@ +/************************************************************\ + * Copyright 2024 Lawrence Livermore National Security, LLC + * (c.f. AUTHORS, NOTICE.LLNS, COPYING) + * + * This file is part of the Flux resource manager framework. + * For details, see https://github.com/flux-framework. + * + * SPDX-License-Identifier: LGPL-3.0 +\************************************************************/ + +#if HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include + +#include "signals.h" +#include "pidinfo.h" +#include "imp_log.h" + +static const struct imp_state *imp_state = NULL; +static pid_t imp_child = (pid_t) -1; + +void imp_set_signal_child (pid_t child) +{ + imp_child = child; +} + +void imp_sigblock_all (void) +{ + sigset_t mask; + sigfillset (&mask); + if (sigprocmask (SIG_SETMASK, &mask, NULL) < 0) + imp_die (1, "failed to block signals: %s", strerror (errno)); +} + +void imp_sigunblock_all (void) +{ + sigset_t mask; + sigemptyset (&mask); + if (sigprocmask (SIG_SETMASK, &mask, NULL) < 0) + imp_die (1, "failed to unblock signals: %s", strerror (errno)); +} + +static void fwd_signal (int signum) +{ + if (signum == SIGUSR1) { + int count = -1; + + /* RFC 15 specifies that SIGUSR1 is a surrogate for SIGKILL, and + * that the IMP SHALL deliver the signal to all processes in the + * job's container (using cgroup.procs if able). + */ + if (imp_state->cgroup->use_cgroup_kill) + count = cgroup_kill (imp_state->cgroup, SIGKILL); + + /* If cgroup wasn't used or fails, try with pid_kill_children + */ + if (count < 0) + count = pid_kill_children (getpid (), SIGKILL); + + /* O/w, log an error, not much more to do + */ + if (count < 0) + imp_warn ("Failed to forward SIGKILL: %s", strerror (errno)); + } + else if (imp_child > 0) + kill (imp_child, signum); +} + +void imp_setup_signal_forwarding (struct imp_state *imp) +{ + struct sigaction sa; + sigset_t mask; + int i; + int signals[] = { + SIGTERM, + SIGINT, + SIGHUP, + SIGCONT, + SIGALRM, + SIGWINCH, + SIGTTIN, + SIGTTOU, + SIGUSR1, + }; + int nsignals = sizeof (signals) / sizeof (signals[0]); + + imp_state = imp; + + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = fwd_signal; + sa.sa_flags = SA_RESTART; + sigemptyset(&sa.sa_mask); + + sigfillset (&mask); + for (i = 0; i < nsignals; i++) { + sigdelset (&mask, signals[i]); + if (sigaction(signals[i], &sa, NULL) < 0) + imp_warn ("sigaction (signal=%d): %s", + signals[i], + strerror (errno)); + } + if (sigprocmask (SIG_SETMASK, &mask, NULL) < 0) + imp_die (1, "failed to block signals: %s", strerror (errno)); +} + +void imp_raise (int sig) +{ + signal (sig, SIG_DFL); + if (raise (sig) == 0) + pause (); + /* If we get here, either raise(3) failed or for some reason signal + * failed to kill the IMP during pause(). Exit with standard 128+sig. + */ + imp_die (128 + sig, "failed to raise signal %d", sig); +} + +/* vi: ts=4 sw=4 expandtab + */ diff --git a/src/imp/signals.h b/src/imp/signals.h new file mode 100644 index 00000000..0bf0208a --- /dev/null +++ b/src/imp/signals.h @@ -0,0 +1,34 @@ +/************************************************************\ + * Copyright 2024 Lawrence Livermore National Security, LLC + * (c.f. AUTHORS, NOTICE.LLNS, COPYING) + * + * This file is part of the Flux resource manager framework. + * For details, see https://github.com/flux-framework. + * + * SPDX-License-Identifier: LGPL-3.0 +\************************************************************/ + +#ifndef HAVE_IMP_SIGNALS_H +#define HAVE_IMP_SIGNALS_H 1 + +#include +#include "imp_state.h" + +/* Set the target of IMP signal forwarding + */ +void imp_set_signal_child (pid_t child); + +/* Setup RFC 15 standard IMP signal forwarding + */ +void imp_setup_signal_forwarding (struct imp_state *imp); + +void imp_sigblock_all (void); + +void imp_sigunblock_all (void); + +/* Set default signal disposition and then raise signal 'sig'. + * If raise fails for any reason, then exit with standard 128+sig. + */ +void imp_raise (int sig) __attribute__ ((noreturn));; + +#endif /* !HAVE_IMP_SIGNALS_H */ diff --git a/src/test/docker/docker-run-checks.sh b/src/test/docker/docker-run-checks.sh index 5476678f..b917ad6f 100755 --- a/src/test/docker/docker-run-checks.sh +++ b/src/test/docker/docker-run-checks.sh @@ -177,7 +177,7 @@ else docker run --rm \ --workdir=$WORKDIR \ --volume=$TOP:$WORKDIR \ - -v /sys/fs/cgroup:/sys/fs/cgroup:ro \ + -v /sys/fs/cgroup:/sys/fs/cgroup:rw \ ${PLATFORM} \ $MOUNT_HOME_ARGS \ -e CC \ diff --git a/t/t2000-imp-exec.t b/t/t2000-imp-exec.t index 3441cd9a..98a54355 100755 --- a/t/t2000-imp-exec.t +++ b/t/t2000-imp-exec.t @@ -18,6 +18,27 @@ echo "# Using ${flux_imp}" fake_imp_input() { printf '{"J":"%s"}' $(echo $1 | $sign) } +# Some shells do not support passing environment variable in the same +# line as the invocation. Therefore add a sign-none.toml version of +# the fake_imp_input() helper function: (sign-none.toml is defined later) +fake_input_sign_none() { + printf '{"J":"%s"}' \ + $(echo foo | env FLUX_IMP_CONFIG_PATTERN=sign-none.toml $sign) +} +wait_for_file() { + count=0 && + while ! test -f $1; do + sleep 0.1 + count=$((count+1)) + test $count -gt 20 && break + test_debug "echo retrying count=${count}" + done +} +imp_exec_sign_none() { + fake_input_sign_none | \ + $SUDO FLUX_IMP_CONFIG_PATTERN=sign-none.toml \ + $flux_imp exec $@ +} cat <helper.sh #!/bin/sh @@ -26,6 +47,13 @@ test -z "\$IMP_HELPER_FAIL" EOF chmod +x helper.sh +cat <sleeper.sh +#!/bin/sh +printf "\$PPID\n" >$(pwd)/sleeper.pid +exec /bin/sleep "\$@" +EOF +chmod +x sleeper.sh + test_expect_success 'create config allowing current user to exec imp' ' cat <<-EOF >imp-test.toml allow-sudo = true @@ -192,62 +220,115 @@ test_expect_success 'flux-imp exec: shell must be in allowed users' ' grep -i "not in allowed-users list" nousers.log ' test_expect_success SUDO 'flux-imp exec: user must be in allowed-shells' ' - ( export FLUX_IMP_CONFIG_PATTERN=sign-none.toml && - fake_imp_input foo | \ - test_must_fail $SUDO FLUX_IMP_CONFIG_PATTERN=sign-none.toml \ - $flux_imp exec printf good >badshell.log 2>&1 - ) && + test_must_fail imp_exec_sign_none printf good >badshell.log 2>&1 && + test_debug "cat badshell.log" && grep -i "not in allowed-shells" badshell.log ' test_expect_success SUDO 'flux-imp exec works under sudo' ' - ( export FLUX_IMP_CONFIG_PATTERN=sign-none.toml && - fake_imp_input foo | \ - $SUDO FLUX_IMP_CONFIG_PATTERN=sign-none.toml \ - $flux_imp exec id -u >id-sudo.out - ) && + imp_exec_sign_none id -u >id-sudo.out && test_debug "echo expecting uid=$(id -u), got $(cat id-sudo.out)" && id -u > id-sudo.expected && test_cmp id-sudo.expected id-sudo.out ' test_expect_success SUDO 'flux-imp exec passes more than one argument to shell' ' - ( export FLUX_IMP_CONFIG_PATTERN=sign-none.toml && - fake_imp_input foo | \ - $SUDO FLUX_IMP_CONFIG_PATTERN=sign-none.toml \ - $flux_imp exec id --zero --user >id-sudo2.out - ) && + imp_exec_sign_none id --zero --user >id-sudo2.out && test_debug "echo expecting uid=$(id -u), got $(cat -v id-sudo2.out)" && id --zero --user > id-sudo2.expected && test_cmp id-sudo2.expected id-sudo2.out ' test "$chain_lint" = "t" || test_set_prereq NO_CHAIN_LINT test_expect_success SUDO,NO_CHAIN_LINT 'flux-imp exec: setuid IMP lingers' ' - cat <<-EOF >sleeper.sh && - #!/bin/sh - printf "\$PPID\n" >$(pwd)/sleeper.pid - exec /bin/sleep "\$@" - EOF - chmod +x sleeper.sh && - ( export FLUX_IMP_CONFIG_PATTERN=sign-none.toml && \ - exec <&- 1>&- 2>&- && \ - fake_imp_input foo | \ - $SUDO FLUX_IMP_CONFIG_PATTERN=sign-none.toml \ - $flux_imp exec $(pwd)/sleeper.sh 15 >linger.out 2>&1 & \ - ) && - count=0 && - while ! test -f sleeper.pid; do - sleep 0.1 - count=$((count+1)) - test $count -gt 20 && break - test_debug "echo retrying count=${count}" - done && + imp_exec_sign_none $(pwd)/sleeper.sh 15 >linger.out 2>&1 & + imp_pid=$! && + test_when_finished "rm -f sleeper.pid" && + wait_for_file sleeper.pid && test_debug "cat sleeper.pid" test -f sleeper.pid && pid=$(cat sleeper.pid) && - test_debug "pstree -lup $pid" && test $(ps --no-header -o comm -p ${pid}) = "flux-imp" && kill -TERM $pid && - wait + test_expect_code 143 wait $imp_pid +' +test_expect_success SUDO,NO_CHAIN_LINT 'flux-imp exec: SIGUSR1 sends SIGKILL' ' + imp_exec_sign_none $(pwd)/sleeper.sh 15 >sigkill.out 2>&1 & + imp_pid=$! && + test_when_finished "rm -f sleeper.pid" && + wait_for_file sleeper.pid && + test -f sleeper.pid && + pid=$(cat sleeper.pid) && + kill -USR1 $pid && + test_expect_code 137 wait $imp_pid +' +# Notes about cgroup kill testing below: +# - systemd-run is used as a convenient way to execute the IMP inside a +# cgroup named with imp-shell prefix defined in RFC 15. +# - systemd-run is run with --collect so that a failure shouldn't leave +# a stale transient unit on the system +# - systemd-run is run under sudo instead of `systemd-run sudo flux-imp` +# because the IMP will send SIGKILL to all processes except itself in the +# cgroup.procs file. If run directly under sudo, then sudo is a parent of +# the IMP in the cgroup and gets SIGKILLed. +# +command -v systemd-run >/dev/null 2>&1 \ + && test_have_prereq SUDO \ + && $SUDO systemd-run --collect /bin/true >/dev/null 2>&1 \ + && test_set_prereq SYSTEMD_RUN + +test_expect_success SUDO,NO_CHAIN_LINT,SYSTEMD_RUN \ + 'flux-imp exec: SIGURS1 sends SIGKILL using cgroup kill' ' + FLUX_IMP_CONFIG_PATTERN=$(pwd)/sign-none.toml \ + fake_imp_input foo | \ + $SUDO systemd-run --collect --scope \ + --unit=imp-shell-$$ \ + -E FLUX_IMP_CONFIG_PATTERN=sign-none.toml \ + $flux_imp exec $(pwd)/sleeper.sh 5 & + pid=$! && + test_when_finished "rm -f sleeper.pid" && + wait_for_file sleeper.pid && + kill -USR1 $(cat sleeper.pid) && + test_expect_code 137 wait $pid ' + +CGROUP_PATH="$(awk '/^cgroup2/ {print $2}' /proc/self/mounts)/imp-shell.$$" +test_have_prereq SUDO && + $SUDO mkdir $CGROUP_PATH && + test_set_prereq CGROUPFS && + cleanup "$SUDO rmdir $CGROUP_PATH" + +cat <<'EOF' >run-in-cgroup.sh +#!/bin/sh +path=$1 +shift +test -d $path || mkdir -p $path +echo "moving pid $$ to $path" >&2 +echo $$ >${path}/cgroup.procs +echo "running $@ as $(id -u)" >&2 +exec "$@" +EOF +chmod +x run-in-cgroup.sh + +# Rewrite sleeper script so it results in a hierarchy of processes +cat <sleeper.sh +#!/bin/sh +printf "\$PPID\n" >$(pwd)/sleeper.pid +(/bin/sleep "\$@") +EOF +chmod +x sleeper.sh + +test_expect_success SUDO,CGROUPFS,NO_CHAIN_LINT \ + 'flux-imp exec: SIGUSR1 sends SIGKILL via cgroup kill' ' + fake_input_sign_none | \ + $SUDO FLUX_IMP_CONFIG_PATTERN=sign-none.toml \ + ./run-in-cgroup.sh "$CGROUP_PATH" \ + $flux_imp exec $(pwd)/sleeper.sh 15 & + imp_pid=$! && + test_when_finished "rm -f sleeper.pid" && + wait_for_file sleeper.pid && + kill -USR1 $(cat sleeper.pid) && + test_expect_code 137 wait $imp_pid && + test_must_be_empty ${CGROUP_PATH}/cgroup.procs +' + $flux_imp version | grep -q pam || test_set_prereq NO_PAM test_expect_success NO_PAM,SUDO 'flux-imp exec: fails if not built with PAM but pam-support=true' ' ( export FLUX_IMP_CONFIG_PATTERN=pam-test.toml &&