From 7f08e8b0eb9570f1eb787dbb4e10d56585b36bbf Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Mon, 7 Aug 2017 21:41:00 +0200 Subject: [PATCH] server: Exit cleanly on SIGINT/SIGTERM. Currently signal_post_select() kills all child processes and then calls exit(3) if SIGNINT or SIGTERM was received. This leaves all file descriptors open and memory blocks allocated, which makes debugging memory leaks difficult because the valgrind output is hard to read. This patch changes the server to cleanly shutdown the scheduler and deallocate resources (close file descriptors, free memory, destroy locks and shared memory areas) before exit(3) is called. --- error.h | 1 + server.c | 36 +++++++++++++++++------------------- 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/error.h b/error.h index 0c741268..02f42246 100644 --- a/error.h +++ b/error.h @@ -225,6 +225,7 @@ PARA_ERROR(TARGET_EXISTS, "requested target is already present"),\ PARA_ERROR(TARGET_NOT_FOUND, "requested target not found"), \ PARA_ERROR(TASK_STARTED, "task started"), \ + PARA_ERROR(DEADLY_SIGNAL, "termination request by signal"), \ PARA_ERROR(TOO_MANY_CLIENTS, "maximal number of stat clients exceeded"), \ PARA_ERROR(UCRED_PERM, "permission denied"), \ PARA_ERROR(UDP_OVERRUN, "output buffer overrun"), \ diff --git a/server.c b/server.c index dd0acdad..13c8c85f 100644 --- a/server.c +++ b/server.c @@ -325,27 +325,22 @@ static int signal_post_select(struct sched *s, __a_unused void *context) PARA_EMERG_LOG("terminating on signal %d\n", signum); kill(0, SIGTERM); /* - * We must wait for afs because afs catches SIGINT/SIGTERM. - * Before reacting to the signal, afs might want to use the + * We must wait for all of our children to die. For the afs + * process or a command handler might want to use the * shared memory area and the mmd mutex. If we destroy this * mutex too early and afs tries to lock the shared memory * area, the call to mutex_lock() will fail and terminate the * afs process. This leads to dirty osl tables. - * - * There's no such problem with the other children of the - * server process (the command handlers) as these reset their - * SIGINT/SIGTERM handlers to the default action, i.e. these - * processes get killed immediately by the above kill(). */ - PARA_INFO_LOG("waiting for afs (pid %d) to die\n", - (int)afs_pid); - waitpid(afs_pid, NULL, 0); + PARA_INFO_LOG("waiting for child processes to die\n"); + mutex_unlock(mmd_mutex); + while (wait(NULL) != -1 || errno != ECHILD) + ; /* still at least one child alive */ + mutex_lock(mmd_mutex); cleanup: free(mmd->afd.afhi.chunk_table); - close_listed_fds(); - mutex_destroy(mmd_mutex); - shm_detach(mmd); - exit(EXIT_FAILURE); + task_notify_all(s, E_DEADLY_SIGNAL); + return -E_DEADLY_SIGNAL; } return 0; } @@ -649,22 +644,25 @@ int main(int argc, char *argv[]) server_init(argc, argv, sct); mutex_lock(mmd_mutex); ret = schedule(&sched); + /* + * We hold the mmd lock: it was re-acquired in server_select() + * after the select call. + */ + mutex_unlock(mmd_mutex); sched_shutdown(&sched); signal_shutdown(signal_task); if (!process_is_command_handler()) { /* parent (server) */ + mutex_destroy(mmd_mutex); + shm_detach(mmd); if (ret < 0) PARA_EMERG_LOG("%s\n", para_strerror(-ret)); } else { - /* - * We hold the mmd lock: it was re-acquired in server_select() - * after the select call. - */ - mutex_unlock(mmd_mutex); alarm(ALARM_TIMEOUT); close_listed_fds(); ret = handle_connect(sct->child_fd); } vss_shutdown(); + shm_detach(mmd); lls_free_parse_result(server_lpr, CMD_PTR); if (server_lpr != cmdline_lpr) lls_free_parse_result(cmdline_lpr, CMD_PTR); -- 2.39.5