This patch introduces support for domain destroying via 'quit' monitor
command which gives qemu time to flush caches and therefore prevent
disks corruption. However, qemu can be unresponsive and to prevent
waiting indefinitely, execute command in a separate thread and wait
reasonable time (QEMU_QUIT_WAIT_SECONDS) on a condition. If we hit
timeout, qemu is qemuProcessKill'ed which causes monitor close and
therefore also thread being terminable.
The synchronization between qemu driver and monitor-quit thread is done
through mutex and condition. However, this alone is not enough. If a
condition is signalized but without anybody listening signal is lost. To
prevent this a boolean variable is used that is set iff nobody is
listening but condition would be signalized, or iff driver is waiting on
given condition.
---
diff to v1:
- more debug printings
- don't test (!flags) but (!(flags & VIR_DOMAIN_DESTROY_FLUSH_CACHE))
include/libvirt/libvirt.h.in | 11 ++-
src/qemu/qemu_driver.c | 146 ++++++++++++++++++++++++++++++++++++++++--
src/qemu/qemu_monitor.c | 13 ++++
src/qemu/qemu_monitor.h | 2 +
src/qemu/qemu_monitor_json.c | 22 ++++++
src/qemu/qemu_monitor_json.h | 1 +
src/qemu/qemu_monitor_text.c | 20 ++++++
src/qemu/qemu_monitor_text.h | 1 +
tools/virsh.c | 9 ++-
9 files changed, 213 insertions(+), 12 deletions(-)
diff --git a/include/libvirt/libvirt.h.in b/include/libvirt/libvirt.h.in
index aa29fb6..0610b53 100644
--- a/include/libvirt/libvirt.h.in
+++ b/include/libvirt/libvirt.h.in
@@ -919,10 +919,13 @@ virConnectPtr virDomainGetConnect (virDomainPtr
domain);
* Domain creation and destruction
*/
-/*
- * typedef enum {
- * } virDomainDestroyFlagsValues;
- */
+
+typedef enum {
+ VIR_DOMAIN_DESTROY_FLUSH_CACHE = 1 << 0, /* In hypervisors supporting this,
+ try to get cache flushed prior
+ to destroying the domain */
+} virDomainDestroyFlagsValues;
+
virDomainPtr virDomainCreateXML (virConnectPtr conn,
const char *xmlDesc,
unsigned int flags);
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index c8dda73..44f4bbd 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -1564,6 +1564,47 @@ cleanup:
return ret;
}
+struct qemuDomainDestroyHelperData {
+ struct qemud_driver *driver;
+ virDomainObjPtr vm;
+ qemuDomainObjPrivatePtr priv;
+ virMutex lock;
+ virCond cond;
+ bool guard;
+};
+
+static void
+qemuDomainDestroyHelper(void *opaque)
+{
+ struct qemuDomainDestroyHelperData *data = opaque;
+ struct qemud_driver *driver = data->driver;
+ virDomainObjPtr vm = data->vm;
+ qemuDomainObjPrivatePtr priv = data->priv;
+
+ /* Job was already obtained by caller */
+ VIR_DEBUG("Destroy thread started, sending 'quit' command.");
+ qemuDomainObjEnterMonitorWithDriver(driver, vm);
+ qemuMonitorQuit(priv->mon);
+ qemuDomainObjExitMonitorWithDriver(driver, vm);
+ VIR_DEBUG("Command finished.");
+
+ /* To avoid loosing signal, we need to remember
+ * we tried to send one, but nobody was waiting
+ * for it.
+ */
+ virMutexLock(&data->lock);
+ if (data->guard) {
+ VIR_DEBUG("Driver is listening, signalize on condition");
+ virCondSignal(&data->cond);
+ } else {
+ VIR_DEBUG("Nobody's listening, setting guard variable");
+ data->guard = true;
+ }
+ virMutexUnlock(&data->lock);
+ VIR_DEBUG("Destroy thread exiting");
+}
+
+#define QEMU_QUIT_WAIT_SECONDS 5
static int
qemuDomainDestroyFlags(virDomainPtr dom,
@@ -1574,8 +1615,13 @@ qemuDomainDestroyFlags(virDomainPtr dom,
int ret = -1;
virDomainEventPtr event = NULL;
qemuDomainObjPrivatePtr priv;
+ bool use_thread = false;
+ bool use_kill = false;
+ virThread destroy_thread;
+ struct qemuDomainDestroyHelperData data;
+ unsigned long long then;
- virCheckFlags(0, -1);
+ virCheckFlags(VIR_DOMAIN_DESTROY_FLUSH_CACHE, -1);
qemuDriverLock(driver);
vm = virDomainFindByUUID(&driver->domains, dom->uuid);
@@ -1590,12 +1636,14 @@ qemuDomainDestroyFlags(virDomainPtr dom,
priv = vm->privateData;
priv->fakeReboot = false;
- /* Although qemuProcessStop does this already, there may
- * be an outstanding job active. We want to make sure we
- * can kill the process even if a job is active. Killing
- * it now means the job will be released
- */
- qemuProcessKill(vm);
+ if (!(flags & VIR_DOMAIN_DESTROY_FLUSH_CACHE)) {
+ /* Although qemuProcessStop does this already, there may
+ * be an outstanding job active. We want to make sure we
+ * can kill the process even if a job is active. Killing
+ * it now means the job will be released
+ */
+ qemuProcessKill(vm);
+ }
if (qemuDomainObjBeginJobWithDriver(driver, vm, QEMU_JOB_DESTROY) < 0)
goto cleanup;
@@ -1606,6 +1654,78 @@ qemuDomainDestroyFlags(virDomainPtr dom,
goto endjob;
}
+ if (flags & VIR_DOMAIN_DESTROY_FLUSH_CACHE) {
+ /* Try to shutdown domain gracefully.
+ * Send 'quit' to qemu. However, qemu can be unresponsive.
+ * Therefore create a separate thread in which we execute
+ * that monitor comand. Wait max QEMU_QUIT_WAIT_SECONDS.
+ */
+ VIR_DEBUG("Trying to gracefully shutdown %s", vm->def->name);
+ if (virMutexInit(&data.lock) < 0) {
+ virReportOOMError();
+ goto endjob;
+ }
+
+ if (virCondInit(&data.cond) < 0) {
+ qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("cannot initialize thread condition"));
+ virMutexDestroy(&data.lock);
+ goto endjob;
+ }
+
+ data.driver = driver;
+ data.vm = vm;
+ data.priv = priv;
+ data.guard = false;
+
+ VIR_DEBUG("Spawning thread to run 'quit' command");
+ if (virThreadCreate(&destroy_thread, true,
+ qemuDomainDestroyHelper, &data) < 0) {
+ virReportSystemError(errno, "%s",
+ _("unable to create destroy thread"));
+ ignore_value(virCondDestroy(&data.cond));
+ virMutexDestroy(&data.lock);
+ goto endjob;
+ }
+ use_thread = true;
+
+ if (virTimeMs(&then) < 0)
+ goto endjob;
+
+ then += QEMU_QUIT_WAIT_SECONDS * 1000;
+
+ /* How synchronization with destroy thread works:
+ * Basically wait on data.cond. But because conditions
+ * does not 'remember' that they have been signalized
+ * data.guard is used. Practically, data.guard says
+ * to destroy thread if we are waiting on condition
+ * and to us whether we should even try.
+ */
+ virMutexLock(&data.lock);
+ if (!data.guard) {
+ data.guard = true;
+ VIR_DEBUG("Waiting for thread to exit");
+ if (virCondWaitUntil(&data.cond, &data.lock, then) < 0) {
+ if (errno == ETIMEDOUT) {
+ VIR_DEBUG("Condition timeout");
+ use_kill = true;
+ data.guard = false;
+ } else {
+ virReportSystemError(errno, "%s", _("unable to wait
"
+ "on condition"));
+ virMutexUnlock(&data.lock);
+ goto endjob;
+ }
+ }
+ } else {
+ VIR_DEBUG("Guard variable set, thread exited");
+ }
+ virMutexUnlock(&data.lock);
+
+ if (use_kill)
+ qemuProcessKill(vm);
+ }
+
qemuProcessStop(driver, vm, 0, VIR_DOMAIN_SHUTOFF_DESTROYED);
event = virDomainEventNewFromObj(vm,
VIR_DOMAIN_EVENT_STOPPED,
@@ -1626,6 +1746,18 @@ endjob:
vm = NULL;
cleanup:
+ if (use_thread) {
+ VIR_DEBUG("Thread used, clean it up");
+ virMutexLock(&data.lock);
+ if (!data.guard) {
+ data.guard = true;
+ ignore_value(virCondWait(&data.cond, &data.lock));
+ }
+ virMutexUnlock(&data.lock);
+ ignore_value(virCondDestroy(&data.cond));
+ virMutexDestroy(&data.lock);
+ virThreadJoin(&destroy_thread);
+ }
if (vm)
virDomainObjUnlock(vm);
if (event)
diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
index db6107c..41b9c5c 100644
--- a/src/qemu/qemu_monitor.c
+++ b/src/qemu/qemu_monitor.c
@@ -2475,3 +2475,16 @@ int qemuMonitorBlockJob(qemuMonitorPtr mon,
ret = qemuMonitorTextBlockJob(mon, device, bandwidth, info, mode);
return ret;
}
+
+int qemuMonitorQuit(qemuMonitorPtr mon)
+{
+ int ret;
+
+ VIR_DEBUG("mon=%p", mon);
+
+ if (mon->json)
+ ret = qemuMonitorJSONQuit(mon);
+ else
+ ret = qemuMonitorTextQuit(mon);
+ return ret;
+}
diff --git a/src/qemu/qemu_monitor.h b/src/qemu/qemu_monitor.h
index f241c9e..3fe6bb9 100644
--- a/src/qemu/qemu_monitor.h
+++ b/src/qemu/qemu_monitor.h
@@ -475,6 +475,8 @@ int qemuMonitorBlockJob(qemuMonitorPtr mon,
virDomainBlockJobInfoPtr info,
int mode);
+int qemuMonitorQuit(qemuMonitorPtr mon);
+
/**
* When running two dd process and using <> redirection, we need a
* shell that will not truncate files. These two strings serve that
diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c
index 7adfb26..1f078cd 100644
--- a/src/qemu/qemu_monitor_json.c
+++ b/src/qemu/qemu_monitor_json.c
@@ -2956,3 +2956,25 @@ int qemuMonitorJSONBlockJob(qemuMonitorPtr mon,
virJSONValueFree(reply);
return ret;
}
+
+
+int qemuMonitorJSONQuit(qemuMonitorPtr mon)
+{
+ int ret = -1;
+ virJSONValuePtr cmd = NULL;
+ virJSONValuePtr reply = NULL;
+
+ cmd = qemuMonitorJSONMakeCommand("quit", NULL);
+
+ if (!cmd)
+ return -1;
+
+ ret =qemuMonitorJSONCommand(mon, cmd, &reply);
+
+ if (ret == 0)
+ ret = qemuMonitorJSONCheckError(cmd, reply);
+
+ virJSONValueFree(cmd);
+ virJSONValueFree(reply);
+ return ret;
+}
diff --git a/src/qemu/qemu_monitor_json.h b/src/qemu/qemu_monitor_json.h
index 9512793..2a7df76 100644
--- a/src/qemu/qemu_monitor_json.h
+++ b/src/qemu/qemu_monitor_json.h
@@ -231,4 +231,5 @@ int qemuMonitorJSONBlockJob(qemuMonitorPtr mon,
virDomainBlockJobInfoPtr info,
int mode);
+int qemuMonitorJSONQuit(qemuMonitorPtr mon);
#endif /* QEMU_MONITOR_JSON_H */
diff --git a/src/qemu/qemu_monitor_text.c b/src/qemu/qemu_monitor_text.c
index 52d924a..ca9c21b 100644
--- a/src/qemu/qemu_monitor_text.c
+++ b/src/qemu/qemu_monitor_text.c
@@ -3063,3 +3063,23 @@ cleanup:
VIR_FREE(reply);
return ret;
}
+
+
+int qemuMonitorTextQuit(qemuMonitorPtr mon)
+{
+ const char *cmd = "quit";
+ char *reply = NULL;
+ int ret = -1;
+
+ if (qemuMonitorHMPCommand(mon, cmd, &reply) < 0) {
+ qemuReportError(VIR_ERR_INTERNAL_ERROR,
+ "%s", _("cannot run monitor command"));
+ goto cleanup;
+ }
+
+ ret = 0;
+
+cleanup:
+ VIR_FREE(reply);
+ return ret;
+}
diff --git a/src/qemu/qemu_monitor_text.h b/src/qemu/qemu_monitor_text.h
index b250738..9ade938 100644
--- a/src/qemu/qemu_monitor_text.h
+++ b/src/qemu/qemu_monitor_text.h
@@ -224,4 +224,5 @@ int qemuMonitorTextBlockJob(qemuMonitorPtr mon,
virDomainBlockJobInfoPtr info,
int mode);
+int qemuMonitorTextQuit(qemuMonitorPtr mon);
#endif /* QEMU_MONITOR_TEXT_H */
diff --git a/tools/virsh.c b/tools/virsh.c
index 7d849ec..4c7962b 100644
--- a/tools/virsh.c
+++ b/tools/virsh.c
@@ -2567,6 +2567,8 @@ static const vshCmdInfo info_destroy[] = {
static const vshCmdOptDef opts_destroy[] = {
{"domain", VSH_OT_DATA, VSH_OFLAG_REQ, N_("domain name, id or
uuid")},
+ {"flush", VSH_OT_BOOL, 0, N_("try to flush hypervisor's cache
prior to "
+ "destroying the domain")},
{NULL, 0, 0, NULL}
};
@@ -2576,6 +2578,7 @@ cmdDestroy(vshControl *ctl, const vshCmd *cmd)
virDomainPtr dom;
bool ret = true;
const char *name;
+ int flags = 0;
if (!vshConnectionUsability(ctl, ctl->conn))
return false;
@@ -2583,7 +2586,11 @@ cmdDestroy(vshControl *ctl, const vshCmd *cmd)
if (!(dom = vshCommandOptDomain(ctl, cmd, &name)))
return false;
- if (virDomainDestroy(dom) == 0) {
+ if (vshCommandOptBool(cmd, "flush"))
+ flags |= VIR_DOMAIN_DESTROY_FLUSH_CACHE;
+
+ if ((!flags && virDomainDestroy(dom) == 0) ||
+ virDomainDestroyFlags(dom, flags) == 0) {
vshPrint(ctl, _("Domain %s destroyed\n"), name);
} else {
vshError(ctl, _("Failed to destroy domain %s"), name);
--
1.7.3.4