From faed24aec8859d232fe3c0da415265e0de099ce4 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Sun, 15 Oct 2017 18:08:47 +0200 Subject: [PATCH] kill: New option --wait. Simply running "dss kill" during system shutdown to terminate the dss process does not work as expected because the kill subcommand exits after the signal has been sent, which might be long before the targeted dss process terminates. For example, the dss main process might be running its exit hook to inform the system administrator about the fact that the dss service is going down when the shutdown procedure already has deactivated the network. Or the shutdown procedure kills the exit hook with SIGKILL during its normal "killing remaining processes" phase before file systems are unmounted. With the --wait option, the kill subcommand will not return until the dss process has died or the timeout expires. We hardcode the timeout in send_signal() for the time being. It can be made configurable if this turns out to be necessary. --- dss.c | 31 +++++++++++++++++++++++++------ dss.suite | 21 +++++++++++++++++++++ err.h | 1 + 3 files changed, 47 insertions(+), 6 deletions(-) diff --git a/dss.c b/dss.c index e9bd56a..60c0591 100644 --- a/dss.c +++ b/dss.c @@ -282,11 +282,13 @@ static char *get_config_file_name(void) return config_file; } -static int send_signal(int sig) +static int send_signal(int sig, bool wait) { pid_t pid; char *config_file = get_config_file_name(); int ret = get_dss_pid(config_file, &pid); + unsigned ms = 32; + struct timespec ts; free(config_file); if (ret < 0) @@ -298,7 +300,23 @@ static int send_signal(int sig) ret = kill(pid, sig); if (ret < 0) return -ERRNO_TO_DSS_ERROR(errno); - return 1; + if (!wait) + return 1; + while (ms < 5000) { + ts.tv_sec = ms / 1000; + ts.tv_nsec = (ms % 1000) * 1000 * 1000; + ret = nanosleep(&ts, NULL); + if (ret < 0) + return -ERRNO_TO_DSS_ERROR(errno); + ret = kill(pid, 0); + if (ret < 0) { + if (errno != ESRCH) + return -ERRNO_TO_DSS_ERROR(errno); + return 1; + } + ms *= 2; + } + return -E_KILL_TIMEOUT; } struct signal_info { @@ -357,6 +375,7 @@ static const struct signal_info signal_table[] = { static int com_kill(void) { + bool w_given = OPT_GIVEN(KILL, WAIT); const char *arg = OPT_STRING_VAL(KILL, SIGNAL); int ret, i; @@ -367,17 +386,17 @@ static int com_kill(void) return ret; if (val < 0 || val > SIGRTMAX) return -ERRNO_TO_DSS_ERROR(EINVAL); - return send_signal(val); + return send_signal(val, w_given); } if (strncasecmp(arg, "sig", 3) == 0) arg += 3; if (strcasecmp(arg, "CLD") == 0) - return send_signal(SIGCHLD); + return send_signal(SIGCHLD, w_given); if (strcasecmp(arg, "IOT") == 0) - return send_signal(SIGABRT); + return send_signal(SIGABRT, w_given); for (i = 0; i < SIGNAL_TABLE_SIZE; i++) if (strcasecmp(arg, signal_table[i].name) == 0) - return send_signal(signal_table[i].num); + return send_signal(signal_table[i].num, w_given); DSS_ERROR_LOG(("invalid sigspec: %s\n", arg)); return -ERRNO_TO_DSS_ERROR(EINVAL); } diff --git a/dss.suite b/dss.suite index e69cc28..d4926c3 100644 --- a/dss.suite +++ b/dss.suite @@ -475,6 +475,27 @@ caption = Subcommands Sending SIGHUP causes the running dss process to reload its config file. [/help] + [option wait] + short_opt = w + summary = wait until the signalled process has terminated + [help] + This option is handy for system shutdown scripts which would like + to terminate the dss daemon process. + + Without --wait the dss process which executes the kill subcommand + exits right after the kill(2) system call returns. At this point the + signalled process might still be alive (even if SIGKILL was sent). + If --wait is given, the process waits until the signalled process + has terminated or the timeout expires. + + If --wait is not given, the kill subcommand exits successfully if + and only if the signal was sent (i.e., if there exists another dss + process to receive the signal). With --wait it exits successfully + if, additionally, the signalled process has terminated before the + timeout expires. + + It makes only sense to use the option for signals which terminate dss. + [/help] [section copyright] Written by Andre Noll diff --git a/err.h b/err.h index 3d4e136..c3e207e 100644 --- a/err.h +++ b/err.h @@ -54,6 +54,7 @@ static inline char *dss_strerror(int num) DSS_ERROR(BAD_EXIT_CODE, "unexpected exit code"), \ DSS_ERROR(SIGNAL_SIG_ERR, "signal() returned SIG_ERR"), \ DSS_ERROR(SIGNAL, "caught terminating signal"), \ + DSS_ERROR(KILL_TIMEOUT, "signal timeout expired"), \ DSS_ERROR(BUG, "values of beta might cause dom!"), \ DSS_ERROR(NOT_RUNNING, "dss not running"), \ DSS_ERROR(ALREADY_RUNNING, "dss already running"), \ -- 2.39.5