Ever since we introduced fake reboot, we call qemuProcessKill as a
reaction to SHUTDOWN event. Unfortunately, qemu doesn't guarantee it
flushed all internal buffers before sending SHUTDOWN, in which case
killing the process forcibly may result in (virtual) disk corruption.
By sending just SIGTERM without SIGKILL we give qemu time to to flush
all buffers and exit. Once qemu exits, we will see an EOF on monitor
connection and tear down the domain. In case qemu ignores SIGTERM or
just hangs there, the process stays running but that's not any different
from a possible hang anytime during the shutdown process so I think it's
just fine.
Also qemu (since 0.14 until it's fixed) has a bug in SIGTERM processing
which causes it not to exit but instead send new SHUTDOWN event and keep
waiting. I think the best we can do is to ignore duplicate SHUTDOWN
events to avoid a SHUTDOWN-SIGTERM loop and leave the domain in paused
state.
---
src/qemu/qemu_driver.c | 2 +-
src/qemu/qemu_process.c | 25 ++++++++++++++++++-------
src/qemu/qemu_process.h | 2 +-
3 files changed, 20 insertions(+), 9 deletions(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index d2626ff..9ff800f 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -1870,7 +1870,7 @@ qemuDomainDestroyFlags(virDomainPtr dom,
* can kill the process even if a job is active. Killing
* it now means the job will be released
*/
- qemuProcessKill(vm);
+ qemuProcessKill(vm, false);
if (qemuDomainObjBeginJobWithDriver(driver, vm, QEMU_JOB_DESTROY) < 0)
goto cleanup;
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 24d1dc7..dbd697d 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -419,7 +419,7 @@ endjob:
cleanup:
if (vm) {
if (ret == -1)
- qemuProcessKill(vm);
+ qemuProcessKill(vm, false);
if (virDomainObjUnref(vm) > 0)
virDomainObjUnlock(vm);
}
@@ -437,6 +437,12 @@ qemuProcessHandleShutdown(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
VIR_DEBUG("vm=%p", vm);
virDomainObjLock(vm);
+ if (priv->gotShutdown) {
+ VIR_DEBUG("Ignoring repeated SHUTDOWN event from domain %s",
+ vm->def->name);
+ goto cleanup;
+ }
+
priv->gotShutdown = true;
if (priv->fakeReboot) {
virDomainObjRef(vm);
@@ -446,16 +452,17 @@ qemuProcessHandleShutdown(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
qemuProcessFakeReboot,
vm) < 0) {
VIR_ERROR(_("Failed to create reboot thread, killing domain"));
- qemuProcessKill(vm);
+ qemuProcessKill(vm, true);
if (virDomainObjUnref(vm) == 0)
vm = NULL;
}
} else {
- qemuProcessKill(vm);
+ qemuProcessKill(vm, true);
}
+
+cleanup:
if (vm)
virDomainObjUnlock(vm);
-
return 0;
}
@@ -3183,10 +3190,11 @@ cleanup:
}
-void qemuProcessKill(virDomainObjPtr vm)
+void qemuProcessKill(virDomainObjPtr vm, bool gracefully)
{
int i;
- VIR_DEBUG("vm=%s pid=%d", vm->def->name, vm->pid);
+ VIR_DEBUG("vm=%s pid=%d gracefully=%d",
+ vm->def->name, vm->pid, gracefully);
if (!virDomainObjIsActive(vm)) {
VIR_DEBUG("VM '%s' not active", vm->def->name);
@@ -3216,6 +3224,9 @@ void qemuProcessKill(virDomainObjPtr vm)
break;
}
+ if (i == 0 && gracefully)
+ break;
+
usleep(200 * 1000);
}
}
@@ -3300,7 +3311,7 @@ void qemuProcessStop(struct qemud_driver *driver,
}
/* shut it off for sure */
- qemuProcessKill(vm);
+ qemuProcessKill(vm, false);
/* Stop autodestroy in case guest is restarted */
qemuProcessAutoDestroyRemove(driver, vm);
diff --git a/src/qemu/qemu_process.h b/src/qemu/qemu_process.h
index 96ba3f3..ef422c4 100644
--- a/src/qemu/qemu_process.h
+++ b/src/qemu/qemu_process.h
@@ -68,7 +68,7 @@ int qemuProcessAttach(virConnectPtr conn,
virDomainChrSourceDefPtr monConfig,
bool monJSON);
-void qemuProcessKill(virDomainObjPtr vm);
+void qemuProcessKill(virDomainObjPtr vm, bool gracefully);
int qemuProcessAutoDestroyInit(struct qemud_driver *driver);
void qemuProcessAutoDestroyRun(struct qemud_driver *driver,
--
1.7.6.1