[PATCH v2 0/2] virnetdaemon: Wait for "daemon-stop" thread to finish before quitting

This is a v2 of: https://www.redhat.com/archives/libvir-list/2020-November/msg00639.html but it implements a different approach per Nikolay's suggestion. Michal Prívozník (2): DO NOT MERGE virnetdaemon: Wait for "daemon-stop" thread to finish before quitting src/libvirt_remote.syms | 1 + src/remote/remote_daemon.c | 73 +++++++++++++++++++++++--------------- src/rpc/virnetdaemon.c | 17 +++++++++ src/rpc/virnetdaemon.h | 3 ++ 4 files changed, 66 insertions(+), 28 deletions(-) -- 2.26.2

This is to help reproduce the race. Build and attach gdb and: handle SIGUSR1 nostop pass handle SIGINT nostop pass and then: kill -SIGUSR1 $(pgrep libvirtd); sleep 1; kill -SIGINT $(pgrep libvirtd) --- src/remote/remote_daemon.c | 64 +++++++++++++++++++++----------------- 1 file changed, 36 insertions(+), 28 deletions(-) diff --git a/src/remote/remote_daemon.c b/src/remote/remote_daemon.c index 7607da94be..5b68c9d2f9 100644 --- a/src/remote/remote_daemon.c +++ b/src/remote/remote_daemon.c @@ -483,6 +483,37 @@ static void daemonReloadHandler(virNetDaemonPtr dmn G_GNUC_UNUSED, } } + +static void daemonStopWorker(void *opaque) +{ + virNetDaemonPtr dmn = opaque; + + sleep(10); + + VIR_DEBUG("Begin stop dmn=%p", dmn); + + ignore_value(virStateStop()); + + VIR_DEBUG("Completed stop dmn=%p", dmn); + + /* Exit daemon cleanly */ + virNetDaemonQuit(dmn); +} + + +/* We do this in a thread to not block the main loop */ +static void daemonStop(virNetDaemonPtr dmn, + siginfo_t *sig G_GNUC_UNUSED, + void *opaque G_GNUC_UNUSED) +{ + virThread thr; + virObjectRef(dmn); + if (virThreadCreateFull(&thr, false, daemonStopWorker, + "daemon-stop", false, dmn) < 0) + virObjectUnref(dmn); +} + + static int daemonSetupSignals(virNetDaemonPtr dmn) { if (virNetDaemonAddSignalHandler(dmn, SIGINT, daemonShutdownHandler, NULL) < 0) @@ -493,6 +524,8 @@ static int daemonSetupSignals(virNetDaemonPtr dmn) return -1; if (virNetDaemonAddSignalHandler(dmn, SIGHUP, daemonReloadHandler, NULL) < 0) return -1; + if (virNetDaemonAddSignalHandler(dmn, SIGUSR1, daemonStop, NULL) < 0) + return -1; return 0; } @@ -511,32 +544,6 @@ static void daemonInhibitCallback(bool inhibit, void *opaque) static GDBusConnection *sessionBus; static GDBusConnection *systemBus; -static void daemonStopWorker(void *opaque) -{ - virNetDaemonPtr dmn = opaque; - - VIR_DEBUG("Begin stop dmn=%p", dmn); - - ignore_value(virStateStop()); - - VIR_DEBUG("Completed stop dmn=%p", dmn); - - /* Exit daemon cleanly */ - virNetDaemonQuit(dmn); -} - - -/* We do this in a thread to not block the main loop */ -static void daemonStop(virNetDaemonPtr dmn) -{ - virThread thr; - virObjectRef(dmn); - if (virThreadCreateFull(&thr, false, daemonStopWorker, - "daemon-stop", false, dmn) < 0) - virObjectUnref(dmn); -} - - static GDBusMessage * handleSessionMessageFunc(GDBusConnection *connection G_GNUC_UNUSED, GDBusMessage *message, @@ -550,7 +557,7 @@ handleSessionMessageFunc(GDBusConnection *connection G_GNUC_UNUSED, if (virGDBusMessageIsSignal(message, "org.freedesktop.DBus.Local", "Disconnected")) - daemonStop(dmn); + daemonStop(dmn, NULL, NULL); return message; } @@ -569,7 +576,7 @@ handleSystemMessageFunc(GDBusConnection *connection G_GNUC_UNUSED, VIR_DEBUG("dmn=%p", dmn); - daemonStop(dmn); + daemonStop(dmn, NULL, NULL); } @@ -1247,5 +1254,6 @@ int main(int argc, char **argv) { VIR_FREE(remote_config_file); daemonConfigFree(config); + sleep(10); return ret; } -- 2.26.2

When the host is shutting down then we get PrepareForShutdown signal on DBus to which we react by creating a thread which runs virStateStop() and thus qemuStateStop(). But if scheduling the thread is delayed just a but it may happen that we receive SIGTERM (sent by systemd) to which we respond by quitting our event loop and cleaning up everything (including drivers). And only after that the thread gets to run only to find qemu_driver being NULL. What we can do is to delay exiting event loop and join the thread that's executing virStateStop(). If the join doesn't happen in given timeout (currently 30 seconds) then libvirtd shuts down forcefully anyways (see virNetDaemonRun()). Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1895359 Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/libvirt_remote.syms | 1 + src/remote/remote_daemon.c | 15 ++++++++++++--- src/rpc/virnetdaemon.c | 17 +++++++++++++++++ src/rpc/virnetdaemon.h | 3 +++ 4 files changed, 33 insertions(+), 3 deletions(-) diff --git a/src/libvirt_remote.syms b/src/libvirt_remote.syms index d398d20880..3cd84a0854 100644 --- a/src/libvirt_remote.syms +++ b/src/libvirt_remote.syms @@ -88,6 +88,7 @@ virNetDaemonQuit; virNetDaemonRemoveShutdownInhibition; virNetDaemonRun; virNetDaemonSetShutdownCallbacks; +virNetDaemonSetStateStopWorkerThread; virNetDaemonUpdateServices; diff --git a/src/remote/remote_daemon.c b/src/remote/remote_daemon.c index 5b68c9d2f9..75bb15a439 100644 --- a/src/remote/remote_daemon.c +++ b/src/remote/remote_daemon.c @@ -506,11 +506,20 @@ static void daemonStop(virNetDaemonPtr dmn, siginfo_t *sig G_GNUC_UNUSED, void *opaque G_GNUC_UNUSED) { - virThread thr; + virThreadPtr thr; virObjectRef(dmn); - if (virThreadCreateFull(&thr, false, daemonStopWorker, - "daemon-stop", false, dmn) < 0) + + thr = g_new0(virThread, 1); + + if (virThreadCreateFull(thr, true, + daemonStopWorker, + "daemon-stop", false, dmn) < 0) { virObjectUnref(dmn); + g_free(thr); + return; + } + + virNetDaemonSetStateStopWorkerThread(dmn, &thr); } diff --git a/src/rpc/virnetdaemon.c b/src/rpc/virnetdaemon.c index 5f0f078fac..e337ff1fde 100644 --- a/src/rpc/virnetdaemon.c +++ b/src/rpc/virnetdaemon.c @@ -71,6 +71,7 @@ struct _virNetDaemon { virNetDaemonShutdownCallback shutdownPrepareCb; virNetDaemonShutdownCallback shutdownWaitCb; + virThreadPtr stateStopThread; int finishTimer; bool quit; bool finished; @@ -108,6 +109,7 @@ virNetDaemonDispose(void *obj) #endif /* !WIN32 */ VIR_FORCE_CLOSE(dmn->autoShutdownInhibitFd); + VIR_FREE(dmn->stateStopThread); virHashFree(dmn->servers); @@ -773,6 +775,9 @@ daemonShutdownWait(void *opaque) if (dmn->shutdownWaitCb && dmn->shutdownWaitCb() < 0) goto finish; + if (dmn->stateStopThread) + virThreadJoin(dmn->stateStopThread); + graceful = true; finish: @@ -891,6 +896,18 @@ virNetDaemonRun(virNetDaemonPtr dmn) } +void +virNetDaemonSetStateStopWorkerThread(virNetDaemonPtr dmn, + virThreadPtr *thr) +{ + virObjectLock(dmn); + + VIR_DEBUG("Setting state stop worker thread on dmn=%p to thr=%p", dmn, thr); + dmn->stateStopThread = g_steal_pointer(thr); + virObjectUnlock(dmn); +} + + void virNetDaemonQuit(virNetDaemonPtr dmn) { diff --git a/src/rpc/virnetdaemon.h b/src/rpc/virnetdaemon.h index 6ae5305e53..fcc6e1fdff 100644 --- a/src/rpc/virnetdaemon.h +++ b/src/rpc/virnetdaemon.h @@ -69,6 +69,9 @@ int virNetDaemonAddSignalHandler(virNetDaemonPtr dmn, void virNetDaemonUpdateServices(virNetDaemonPtr dmn, bool enabled); +void virNetDaemonSetStateStopWorkerThread(virNetDaemonPtr dmn, + virThreadPtr *thr); + void virNetDaemonRun(virNetDaemonPtr dmn); void virNetDaemonQuit(virNetDaemonPtr dmn); -- 2.26.2

On a Friday in 2020, Michal Privoznik wrote:
When the host is shutting down then we get PrepareForShutdown signal on DBus to which we react by creating a thread which runs virStateStop() and thus qemuStateStop(). But if scheduling the thread is delayed just a but it may happen that we receive SIGTERM (sent by systemd) to which we respond by quitting our event loop and cleaning up everything (including drivers). And only after that the thread gets to run only to find qemu_driver being NULL.
What we can do is to delay exiting event loop and join the thread that's executing virStateStop(). If the join doesn't happen in given timeout (currently 30 seconds) then libvirtd shuts down forcefully anyways (see virNetDaemonRun()).
Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1895359
Also, https://bugzilla.redhat.com/show_bug.cgi?id=1739564
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/libvirt_remote.syms | 1 + src/remote/remote_daemon.c | 15 ++++++++++++--- src/rpc/virnetdaemon.c | 17 +++++++++++++++++ src/rpc/virnetdaemon.h | 3 +++ 4 files changed, 33 insertions(+), 3 deletions(-)
Reviewed-by: Ján Tomko <jtomko@redhat.com> Thanks for fixing this. Jano
participants (2)
-
Ján Tomko
-
Michal Privoznik