From 9dc340d42ca43b50d933b3aa8941d2226f35b0f5 Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Thu, 31 Oct 2024 06:58:03 -0700 Subject: [PATCH 1/4] sdexec: drop obsolete comment Problem: the sdexec code references 'flux imp kill' in an expanatory comment, but we plan to remove the kill subcommand from the IMP and have it forward signals to the cgroup. Drop that comment. --- src/modules/sdexec/sdexec.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/modules/sdexec/sdexec.c b/src/modules/sdexec/sdexec.c index 1cd19461f9c8..28c17a9600fb 100644 --- a/src/modules/sdexec/sdexec.c +++ b/src/modules/sdexec/sdexec.c @@ -824,8 +824,6 @@ static void kill_continuation (flux_future_t *f, void *arg) * only the pids of units started with sdexec.exec since the sdexec module * was loaded. Since this sends an sdbus RPC, the response is handled in * kill_continuation() when the sdbus response is received. - * N.B. in a typical system instance, job-exec would remotely execute - * flux-imp kill and this would not be used. */ static void kill_cb (flux_t *h, flux_msg_handler_t *mh, From dc5e0925f8e6d6dc9733ca248651f1e913ac3202 Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Thu, 31 Oct 2024 07:09:26 -0700 Subject: [PATCH 2/4] libsdexec: support SendSIGKILL boolean property Problem: sdexec_start_transient_unit() does not support the SendSIGKILL attribute, which may need to be set to "off" for Flux guest jobs. Add support for the SendSIGKILL atttribute. To use, set the subprocess command option "SDEXEC_PROP_SendSIGKILL" to "off" (or other valid systemd boolean config value). --- src/common/libsdexec/start.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/common/libsdexec/start.c b/src/common/libsdexec/start.c index 949d4f9c1ef7..d2af62b3f341 100644 --- a/src/common/libsdexec/start.c +++ b/src/common/libsdexec/start.c @@ -155,6 +155,26 @@ static int prop_add_bool (json_t *prop, const char *name, int val) return 0; } +// per systemd.syntax(7), boolean values are: 1|yes|true|on, 0|no|false|off +static bool is_true (const char *s) +{ + if (streq (s, "1") + || !strcasecmp (s, "yes") + || !strcasecmp (s, "true") + || !strcasecmp (s, "on")) + return true; + return false; +} +static bool is_false (const char *s) +{ + if (streq (s, "0") + || !strcasecmp (s, "no") + || !strcasecmp (s, "false") + || !strcasecmp (s, "off")) + return true; + return false; +} + static int prop_add_u32 (json_t *prop, const char *name, uint32_t val) { json_int_t i = val; @@ -254,6 +274,17 @@ static int prop_add (json_t *prop, const char *name, const char *val) } free (bitmap); } + else if (streq (name, "SendSIGKILL")) { + bool value; + if (is_false (val)) + value = false; + else if (is_true (val)) + value = true; + else + return -1; + if (prop_add_bool (prop, name, value) < 0) + return -1; + } else { if (prop_add_string (prop, name, val) < 0) return -1; From 0bb8962666d79a0be0173f797decce40843df259 Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Thu, 31 Oct 2024 09:36:25 -0700 Subject: [PATCH 3/4] testsuite: cover sdexec KillMode, SendSIGKILL Problem: there is no test coverage that confirms sdexec can set KillMode and SendSIGKILL. Add some tests. --- t/t2409-sdexec.t | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/t/t2409-sdexec.t b/t/t2409-sdexec.t index 243d4dd7b112..ab1aec209a9b 100755 --- a/t/t2409-sdexec.t +++ b/t/t2409-sdexec.t @@ -164,6 +164,33 @@ test_expect_success 'sdexec can set unit Description property' ' t2409-desc.service >desc.out && test_cmp desc.exp desc.out ' +test_expect_success 'sdexec can set unit SendSIGKILL property to no' ' + cat >sigkill.exp <<-EOT && + SendSIGKILL=no + EOT + $sdexec -r 0 \ + --setopt=SDEXEC_NAME="t2409-sigkill.service" \ + --setopt=SDEXEC_PROP_SendSIGKILL=no \ + $systemctl --user show --property SendSIGKILL \ + t2409-sigkill.service >sigkill.out && + test_cmp sigkill.exp sigkill.out +' +test_expect_success 'setting SendSIGKILL to an invalid value fails' ' + test_must_fail $sdexec -r 0 --setopt=SDEXEC_PROP_SendSIGKILL=zzz \ + $true 2>sigkill_badval.err && + grep "error setting property" sigkill_badval.err +' +test_expect_success 'sdexec can set unit KillMode property to process' ' + cat >killmode.exp <<-EOT && + KillMode=process + EOT + $sdexec -r 0 \ + --setopt=SDEXEC_NAME="t2409-killmode.service" \ + --setopt=SDEXEC_PROP_KillMode=process \ + $systemctl --user show --property KillMode \ + t2409-killmode.service >killmode.out && + test_cmp killmode.exp killmode.out +' # Check that we can set resource control attributes on our transient units, # but expect resource control testing to occur elsewhere. # See also: From 2ea80ac8894f6308d51c619ec20965cb8482165a Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Thu, 31 Oct 2024 09:20:37 -0700 Subject: [PATCH 4/4] job-exec: set KillMode=process SendSIGKILL=no Problem: For multi-user jobs spawned via SDEXEC, the systemd user instance running as the flux user does not have permission to kill guest processes, yet it does try and in the process may kill off the only process that does have permission to continue cleanup efforts, the IMP. When the job is run by the IMP and sdexec, Set KillMode=process so that systemd only delivers signals to the IMP, which it should forward to the shell and/or cgroup per RFC 15. Also set SendSIGKILL to "off" so that SIGKILL is never deployed against the IMP. Fixes #6399 --- src/modules/job-exec/exec.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/modules/job-exec/exec.c b/src/modules/job-exec/exec.c index 9d67fb333b46..6d0f054f51a8 100644 --- a/src/modules/job-exec/exec.c +++ b/src/modules/job-exec/exec.c @@ -607,6 +607,20 @@ static int exec_init (struct jobinfo *job) flux_log_error (job->h, "exec_init: flux_cmd_setenvf"); goto err; } + /* The systemd user instance running as user flux is not privileged + * to signal guest processes, therefore only signal the IMP and + * never use SIGKILL. See flux-framework/flux-core#6399 + */ + if (streq (service, "sdexec")) { + if (flux_cmd_setopt (cmd, "SDEXEC_PROP_KillMode", "process") < 0 + || flux_cmd_setopt (cmd, + "SDEXEC_PROP_SendSIGKILL", + "off") < 0) { + flux_log_error (job->h, + "Unable to set multiuser sdexec options"); + return -1; + } + } if (flux_cmd_argv_append (cmd, config_get_imp_path ()) < 0 || flux_cmd_argv_append (cmd, "exec") < 0) { flux_log_error (job->h, "exec_init: flux_cmd_argv_append");