diff --git a/src/modules/job-exec/job-exec.c b/src/modules/job-exec/job-exec.c index e71a5f4393ff..646acd13ef4e 100644 --- a/src/modules/job-exec/job-exec.c +++ b/src/modules/job-exec/job-exec.c @@ -438,13 +438,21 @@ static void kill_shell_timer_cb (flux_reactor_t *r, { struct jobinfo *job = arg; struct idset *active_ranks; + int actual_kill_signal = kill_signal; + + /* RFC 15 states that the IMP handles SIGUSR1 by sending SIGKILL to + * the entire cgroup. Sending SIGKILL to the IMP is not productive. + */ + if (job->multiuser) + actual_kill_signal = SIGUSR1; flux_log (job->h, LOG_DEBUG, - "Sending %s to job shell for job %s", - sigutil_signame (kill_signal), + "Sending %s to %s for job %s", + sigutil_signame (actual_kill_signal), + job->multiuser ? "IMP" : "job shell", idf58 (job->id)); - (*job->impl->kill) (job, kill_signal); + (*job->impl->kill) (job, actual_kill_signal); job->kill_shell_count++; /* Since we've transitioned to killing the shell directly, stop the diff --git a/t/job-exec/imp-fail.sh b/t/job-exec/imp-fail.sh index 4ab0fc921aeb..c29dcbdfef57 100755 --- a/t/job-exec/imp-fail.sh +++ b/t/job-exec/imp-fail.sh @@ -14,16 +14,6 @@ case "$cmd" in printf "test-imp: Going to fail on rank 1\n" >&2 if test $(flux getattr rank) = 1; then exit 0; fi exec "$@" ;; - kill) - # Note: kill must be implemented in test since job-exec - # module will run `flux-imp kill PID`. - # - signal=$2; - pid=$3; - printf "test-imp: kill -$signal $pid\n" >&2 - shift 3; - printf "test-imp: Kill pid $pid signal $signal\n" >&2 - kill -$signal $pid ;; *) printf "test-imp: Fatal: Unknown cmd=$cmd\n" >&2; exit 1 ;; esac diff --git a/t/t2404-job-exec-multiuser.t b/t/t2404-job-exec-multiuser.t index 7a7ee1c1d524..aeea73dfb546 100755 --- a/t/t2404-job-exec-multiuser.t +++ b/t/t2404-job-exec-multiuser.t @@ -81,7 +81,7 @@ test_expect_success 'job-exec: reconfig and reload module' ' flux config reload && flux module reload -f job-exec ' -test_expect_success NO_ASAN 'job-exec: kill multiuser job uses the IMP' ' +test_expect_success NO_ASAN 'job-exec: kill multiuser job works' ' FAKE_USERID=42 && flux run --dry-run -n2 -N2 sleep 1000 | \ flux python ${SIGN_AS} ${FAKE_USERID} > sleep-job.signed && @@ -91,8 +91,7 @@ test_expect_success NO_ASAN 'job-exec: kill multiuser job uses the IMP' ' jq -e ".userid == 42" < ${id}.json && flux job wait-event -p exec -vt 30 ${id} shell.start && flux cancel ${id} && - test_expect_code 143 run_timeout 30 flux job status -v ${id} && - flux dmesg | grep "test-imp: Kill .*signal 15" + test_expect_code 143 run_timeout 30 flux job status -v ${id} ' # Configure failing IMP