[PATCH 0/2] qemu: Check if driver is still available in qemuStateStop()

The second patch is the interesting one. Michal Prívozník (2): DO NOT MERGE qemu: Check if driver is still available in qemuStateStop() src/qemu/qemu_driver.c | 7 ++++- src/remote/remote_daemon.c | 64 +++++++++++++++++++++----------------- 2 files changed, 42 insertions(+), 29 deletions(-) -- 2.26.2

This is to help reproduce the race. Build and attach gdb and: handle SIGUSR1 nostop pass handle SIGINT nostop pass and then: kill -SIGUSR1 $(pgrep libvirtd); sleep 1; kill -SIGINT $(pgrep libvirtd) Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/remote/remote_daemon.c | 64 +++++++++++++++++++++----------------- 1 file changed, 36 insertions(+), 28 deletions(-) diff --git a/src/remote/remote_daemon.c b/src/remote/remote_daemon.c index 7607da94be..5b68c9d2f9 100644 --- a/src/remote/remote_daemon.c +++ b/src/remote/remote_daemon.c @@ -483,6 +483,37 @@ static void daemonReloadHandler(virNetDaemonPtr dmn G_GNUC_UNUSED, } } + +static void daemonStopWorker(void *opaque) +{ + virNetDaemonPtr dmn = opaque; + + sleep(10); + + VIR_DEBUG("Begin stop dmn=%p", dmn); + + ignore_value(virStateStop()); + + VIR_DEBUG("Completed stop dmn=%p", dmn); + + /* Exit daemon cleanly */ + virNetDaemonQuit(dmn); +} + + +/* We do this in a thread to not block the main loop */ +static void daemonStop(virNetDaemonPtr dmn, + siginfo_t *sig G_GNUC_UNUSED, + void *opaque G_GNUC_UNUSED) +{ + virThread thr; + virObjectRef(dmn); + if (virThreadCreateFull(&thr, false, daemonStopWorker, + "daemon-stop", false, dmn) < 0) + virObjectUnref(dmn); +} + + static int daemonSetupSignals(virNetDaemonPtr dmn) { if (virNetDaemonAddSignalHandler(dmn, SIGINT, daemonShutdownHandler, NULL) < 0) @@ -493,6 +524,8 @@ static int daemonSetupSignals(virNetDaemonPtr dmn) return -1; if (virNetDaemonAddSignalHandler(dmn, SIGHUP, daemonReloadHandler, NULL) < 0) return -1; + if (virNetDaemonAddSignalHandler(dmn, SIGUSR1, daemonStop, NULL) < 0) + return -1; return 0; } @@ -511,32 +544,6 @@ static void daemonInhibitCallback(bool inhibit, void *opaque) static GDBusConnection *sessionBus; static GDBusConnection *systemBus; -static void daemonStopWorker(void *opaque) -{ - virNetDaemonPtr dmn = opaque; - - VIR_DEBUG("Begin stop dmn=%p", dmn); - - ignore_value(virStateStop()); - - VIR_DEBUG("Completed stop dmn=%p", dmn); - - /* Exit daemon cleanly */ - virNetDaemonQuit(dmn); -} - - -/* We do this in a thread to not block the main loop */ -static void daemonStop(virNetDaemonPtr dmn) -{ - virThread thr; - virObjectRef(dmn); - if (virThreadCreateFull(&thr, false, daemonStopWorker, - "daemon-stop", false, dmn) < 0) - virObjectUnref(dmn); -} - - static GDBusMessage * handleSessionMessageFunc(GDBusConnection *connection G_GNUC_UNUSED, GDBusMessage *message, @@ -550,7 +557,7 @@ handleSessionMessageFunc(GDBusConnection *connection G_GNUC_UNUSED, if (virGDBusMessageIsSignal(message, "org.freedesktop.DBus.Local", "Disconnected")) - daemonStop(dmn); + daemonStop(dmn, NULL, NULL); return message; } @@ -569,7 +576,7 @@ handleSystemMessageFunc(GDBusConnection *connection G_GNUC_UNUSED, VIR_DEBUG("dmn=%p", dmn); - daemonStop(dmn); + daemonStop(dmn, NULL, NULL); } @@ -1247,5 +1254,6 @@ int main(int argc, char **argv) { VIR_FREE(remote_config_file); daemonConfigFree(config); + sleep(10); return ret; } -- 2.26.2

When the host is shutting down then we get PrepareForShutdown signal on DBus to which we react by creating a thread which runs virStateStop() and thus qemuStateStop(). But if scheduling the thread is delayed just a but it may happen that we receive SIGTERM (sent by systemd) to which we respond by quitting our event loop and cleaning up everything (including drivers). And only after that the thread gets to run only to find qemu_driver being NULL. At this point there is nothing left to do anyways, the event loop is gone so no API call that qemuStateStop() does in attempt to save running domains can ever succeed. But to be fair, if there was a domain running we would have registered shutdown inhibitor so we would not get killed by signal. So there is nothing left to do for qemuStateStop() anyway. Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1895359 Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_driver.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 05f8eb2cb7..9aa0ce4ec8 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -1046,7 +1046,12 @@ qemuStateStop(void) int state; virDomainPtr *domains = NULL; g_autofree unsigned int *flags = NULL; - g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(qemu_driver); + g_autoptr(virQEMUDriverConfig) cfg = NULL; + + if (!qemu_driver) + return -1; + + cfg = virQEMUDriverGetConfig(qemu_driver); if (!(conn = virConnectOpen(cfg->uri))) goto cleanup; -- 2.26.2

On 12.11.2020 21:45, Michal Privoznik wrote:
When the host is shutting down then we get PrepareForShutdown signal on DBus to which we react by creating a thread which runs virStateStop() and thus qemuStateStop(). But if scheduling the thread is delayed just a but it may happen that we receive SIGTERM (sent by systemd) to which we respond by quitting our event loop and cleaning up everything (including drivers). And only after that the thread gets to run only to find qemu_driver being NULL. At this point there is nothing left to do anyways, the event loop is gone so no API call that qemuStateStop() does in attempt to save running domains can ever succeed.
But to be fair, if there was a domain running we would have registered shutdown inhibitor so we would not get killed by signal. So there is nothing left to do for qemuStateStop() anyway.
Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1895359
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_driver.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 05f8eb2cb7..9aa0ce4ec8 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -1046,7 +1046,12 @@ qemuStateStop(void) int state; virDomainPtr *domains = NULL; g_autofree unsigned int *flags = NULL; - g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(qemu_driver); + g_autoptr(virQEMUDriverConfig) cfg = NULL; + + if (!qemu_driver) + return -1; + + cfg = virQEMUDriverGetConfig(qemu_driver);
if (!(conn = virConnectOpen(cfg->uri))) goto cleanup;
Hi, Michal. There is a machinery to handle such issues now in libvirt. https://www.redhat.com/archives/libvir-list/2020-November/msg00667.html Nikolay
participants (2)
-
Michal Privoznik
-
Nikolay Shirokovskiy