From a0b1604a44be6bb89c91ae760f1ccf00768eb09a Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Sun, 26 Apr 2009 19:45:59 +0200 Subject: [PATCH] Avoid dirty afs tables on shutdown. The server process must wait for afs because afs catches SIGINT/SIGTERM and may thus not respond immediately to these SIGNALS. Before reacting to the signal, afs might want to use the shared memory area and the mmd mutex. If the server process destroys this mutex too early and afs tries to lock the shared memory area, the call to mutex_lock() will fail and terminate the afs process. This leads to dirty osl tables. There's no such problem with the other children of the server process (the command handlers) as these reset their SIGINT/SIGTERM handlers to the default action, i.e. these processes get killed immediately by the above kill(). --- server.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/server.c b/server.c index a4115eb6..da55b999 100644 --- a/server.c +++ b/server.c @@ -291,15 +291,32 @@ static void signal_post_select(struct sched *s, struct task *t) if (pid != mmd->afs_pid) continue; PARA_EMERG_LOG("fatal: afs died\n"); - goto genocide; + kill(0, SIGTERM); + goto cleanup; } break; /* die on sigint/sigterm. Kill all children too. */ case SIGINT: case SIGTERM: PARA_EMERG_LOG("terminating on signal %d\n", st->signum); -genocide: kill(0, SIGTERM); + /* + * We must wait for afs because afs catches SIGINT/SIGTERM. + * Before reacting to the signal, afs might want to use the + * shared memory area and the mmd mutex. If we destroy this + * mutex too early and afs tries to lock the shared memory + * area, the call to mutex_lock() will fail and terminate the + * afs process. This leads to dirty osl tables. + * + * There's no such problem with the other children of the + * server process (the command handlers) as these reset their + * SIGINT/SIGTERM handlers to the default action, i.e. these + * processes get killed immediately by the above kill(). + */ + PARA_INFO_LOG("waiting for afs (pid %d) to die\n", + (int)mmd->afs_pid); + waitpid(mmd->afs_pid, NULL, 0); +cleanup: free(mmd->afd.afhi.chunk_table); free(mmd->afd.afhi.info_string); close_listed_fds(); -- 2.39.5