[libvirt] [PATCH] qemu: Allow graceful domain destroy

20 Aug 2011

This patch introduces support for domain destroying via 'quit' monitor
command which gives qemu time to flush caches and therefore prevent
disks corruption. However, qemu can be unresponsive and to prevent
waiting indefinitely, execute command in a separate thread and wait
reasonable time (QEMU_QUIT_WAIT_SECONDS) on a condition. If we hit
timeout, qemu is qemuProcessKill'ed which causes monitor close and
therefore also thread being terminable.

The synchronization between qemu driver and monitor-quit thread is done
through mutex and condition. However, this alone is not enough. If a
condition is signalized but without anybody listening signal is lost. To
prevent this a boolean variable is used that is set iff nobody is
listening but condition would be signalized, or iff driver is waiting on
given condition.
---
 include/libvirt/libvirt.h.in |   11 ++-
 src/qemu/qemu_driver.c       |  132 +++++++++++++++++++++++++++++++++++++++--
 src/qemu/qemu_monitor.c      |   13 ++++
 src/qemu/qemu_monitor.h      |    2 +
 src/qemu/qemu_monitor_json.c |   22 +++++++
 src/qemu/qemu_monitor_json.h |    1 +
 src/qemu/qemu_monitor_text.c |   20 ++++++
 src/qemu/qemu_monitor_text.h |    1 +
 tools/virsh.c                |    8 ++-
 9 files changed, 198 insertions(+), 12 deletions(-)

diff --git a/include/libvirt/libvirt.h.in b/include/libvirt/libvirt.h.in
index aa29fb6..fa98b8d 100644
--- a/include/libvirt/libvirt.h.in
+++ b/include/libvirt/libvirt.h.in
@@ -919,10 +919,13 @@ virConnectPtr           virDomainGetConnect     (virDomainPtr domain);
  * Domain creation and destruction
  */
 
-/*
- * typedef enum {
- * } virDomainDestroyFlagsValues;
- */
+
+typedef enum {
+    VIR_DOMAIN_DESTROY_MONITOR = 1 << 0, /* In hypervisors supporting this,
+                                            try to send 'quit' command prior
+                                            to killing hypervisor */
+} virDomainDestroyFlagsValues;
+
 virDomainPtr            virDomainCreateXML      (virConnectPtr conn,
                                                  const char *xmlDesc,
                                                  unsigned int flags);
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 421a98e..6585cb4 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -1564,6 +1564,42 @@ cleanup:
     return ret;
 }
 
+struct qemuDomainDestroyHelperData {
+    struct qemud_driver *driver;
+    virDomainObjPtr vm;
+    qemuDomainObjPrivatePtr priv;
+    virMutex lock;
+    virCond cond;
+    bool guard;
+};
+
+static void
+qemuDomainDestroyHelper(void *opaque)
+{
+    struct qemuDomainDestroyHelperData *data = opaque;
+    struct qemud_driver *driver = data->driver;
+    virDomainObjPtr vm = data->vm;
+    qemuDomainObjPrivatePtr priv = data->priv;
+
+    /* Job was already obtained by caller */
+    qemuDomainObjEnterMonitorWithDriver(driver, vm);
+    qemuMonitorQuit(priv->mon);
+    qemuDomainObjExitMonitorWithDriver(driver, vm);
+
+    /* To avoid loosing signal, we need to remember
+     * we tried to send one, but nobody was waiting
+     * for it.
+     */
+    virMutexLock(&data->lock);
+    if (data->guard) {
+        virCondSignal(&data->cond);
+    } else {
+        data->guard = true;
+    }
+    virMutexUnlock(&data->lock);
+}
+
+#define QEMU_QUIT_WAIT_SECONDS 5
 
 static int
 qemuDomainDestroyFlags(virDomainPtr dom,
@@ -1574,8 +1610,13 @@ qemuDomainDestroyFlags(virDomainPtr dom,
     int ret = -1;
     virDomainEventPtr event = NULL;
     qemuDomainObjPrivatePtr priv;
+    bool use_thread = false;
+    bool use_kill = false;
+    virThread destroy_thread;
+    struct qemuDomainDestroyHelperData data;
+    unsigned long long then;
 
-    virCheckFlags(0, -1);
+    virCheckFlags(VIR_DOMAIN_DESTROY_MONITOR, -1);
 
     qemuDriverLock(driver);
     vm  = virDomainFindByUUID(&driver->domains, dom->uuid);
@@ -1590,12 +1631,14 @@ qemuDomainDestroyFlags(virDomainPtr dom,
     priv = vm->privateData;
     priv->fakeReboot = false;
 
-    /* Although qemuProcessStop does this already, there may
-     * be an outstanding job active. We want to make sure we
-     * can kill the process even if a job is active. Killing
-     * it now means the job will be released
-     */
-    qemuProcessKill(vm);
+    if (!flags) {
+        /* Although qemuProcessStop does this already, there may
+         * be an outstanding job active. We want to make sure we
+         * can kill the process even if a job is active. Killing
+         * it now means the job will be released
+         */
+        qemuProcessKill(vm);
+    }
 
     if (qemuDomainObjBeginJobWithDriver(driver, vm, QEMU_JOB_DESTROY) < 0)
         goto cleanup;
@@ -1606,6 +1649,70 @@ qemuDomainDestroyFlags(virDomainPtr dom,
         goto endjob;
     }
 
+    if (flags & VIR_DOMAIN_DESTROY_MONITOR) {
+        /* Try to shutdown domain gracefully.
+         * Send 'quit' to qemu. However, qemu can be unresponsive.
+         * Therefore create a separate thread in which we execute
+         * that monitor comand. Wait max QEMU_QUIT_WAIT_SECONDS.
+         */
+        if (virMutexInit(&data.lock) < 0) {
+            virReportOOMError();
+            goto endjob;
+        }
+
+        if (virCondInit(&data.cond) < 0) {
+            qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                            _("cannot initialize thread condition"));
+            virMutexDestroy(&data.lock);
+            goto endjob;
+        }
+
+        data.driver = driver;
+        data.vm = vm;
+        data.priv = priv;
+        data.guard = false;
+
+        if (virThreadCreate(&destroy_thread, true,
+                            qemuDomainDestroyHelper, &data) < 0) {
+            virReportSystemError(errno, "%s",
+                                 _("unable to create destroy thread"));
+            ignore_value(virCondDestroy(&data.cond));
+            virMutexDestroy(&data.lock);
+            goto endjob;
+        }
+        use_thread = true;
+
+        if (virTimeMs(&then) < 0)
+            goto endjob;
+
+        then += QEMU_QUIT_WAIT_SECONDS * 1000;
+
+        /* How synchronization with destroy thread works:
+         * Basically wait on data.cond. But because conditions
+         * does not 'remember' that they have been signalized
+         * data.guard is used. Practically, data.guard says
+         * to destroy thread if we are waiting on condition
+         * and to us whether we should even try.
+         */
+        virMutexLock(&data.lock);
+        if (!data.guard) {
+            data.guard = true;
+            if (virCondWaitUntil(&data.cond, &data.lock, then) < 0) {
+                if (errno == ETIMEDOUT) {
+                    use_kill = true;
+                    data.guard = false;
+                } else {
+                    virMutexUnlock(&data.lock);
+                    goto endjob;
+                }
+            }
+        }
+        virMutexUnlock(&data.lock);
+
+        if (use_kill)
+            qemuProcessKill(vm);
+    }
+
     qemuProcessStop(driver, vm, 0, VIR_DOMAIN_SHUTOFF_DESTROYED);
     event = virDomainEventNewFromObj(vm,
                                      VIR_DOMAIN_EVENT_STOPPED,
@@ -1626,6 +1733,17 @@ endjob:
         vm = NULL;
 
 cleanup:
+    if (use_thread) {
+        virMutexLock(&data.lock);
+        if (!data.guard) {
+            data.guard = true;
+            ignore_value(virCondWait(&data.cond, &data.lock));
+        }
+        virMutexUnlock(&data.lock);
+        ignore_value(virCondDestroy(&data.cond));
+        virMutexDestroy(&data.lock);
+        virThreadJoin(&destroy_thread);
+    }
     if (vm)
         virDomainObjUnlock(vm);
     if (event)
diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
index db6107c..41b9c5c 100644
--- a/src/qemu/qemu_monitor.c
+++ b/src/qemu/qemu_monitor.c
@@ -2475,3 +2475,16 @@ int qemuMonitorBlockJob(qemuMonitorPtr mon,
         ret = qemuMonitorTextBlockJob(mon, device, bandwidth, info, mode);
     return ret;
 }
+
+int qemuMonitorQuit(qemuMonitorPtr mon)
+{
+    int ret;
+
+    VIR_DEBUG("mon=%p", mon);
+
+    if (mon->json)
+        ret = qemuMonitorJSONQuit(mon);
+    else
+        ret = qemuMonitorTextQuit(mon);
+    return ret;
+}
diff --git a/src/qemu/qemu_monitor.h b/src/qemu/qemu_monitor.h
index f241c9e..3fe6bb9 100644
--- a/src/qemu/qemu_monitor.h
+++ b/src/qemu/qemu_monitor.h
@@ -475,6 +475,8 @@ int qemuMonitorBlockJob(qemuMonitorPtr mon,
                         virDomainBlockJobInfoPtr info,
                         int mode);
 
+int qemuMonitorQuit(qemuMonitorPtr mon);
+
 /**
  * When running two dd process and using <> redirection, we need a
  * shell that will not truncate files.  These two strings serve that
diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c
index 7adfb26..1f078cd 100644
--- a/src/qemu/qemu_monitor_json.c
+++ b/src/qemu/qemu_monitor_json.c
@@ -2956,3 +2956,25 @@ int qemuMonitorJSONBlockJob(qemuMonitorPtr mon,
     virJSONValueFree(reply);
     return ret;
 }
+
+
+int qemuMonitorJSONQuit(qemuMonitorPtr mon)
+{
+    int ret = -1;
+    virJSONValuePtr cmd = NULL;
+    virJSONValuePtr reply = NULL;
+
+    cmd = qemuMonitorJSONMakeCommand("quit", NULL);
+
+    if (!cmd)
+        return -1;
+
+    ret =qemuMonitorJSONCommand(mon, cmd, &reply);
+
+    if (ret == 0)
+        ret = qemuMonitorJSONCheckError(cmd, reply);
+
+    virJSONValueFree(cmd);
+    virJSONValueFree(reply);
+    return ret;
+}
diff --git a/src/qemu/qemu_monitor_json.h b/src/qemu/qemu_monitor_json.h
index 9512793..2a7df76 100644
--- a/src/qemu/qemu_monitor_json.h
+++ b/src/qemu/qemu_monitor_json.h
@@ -231,4 +231,5 @@ int qemuMonitorJSONBlockJob(qemuMonitorPtr mon,
                             virDomainBlockJobInfoPtr info,
                             int mode);
 
+int qemuMonitorJSONQuit(qemuMonitorPtr mon);
 #endif /* QEMU_MONITOR_JSON_H */
diff --git a/src/qemu/qemu_monitor_text.c b/src/qemu/qemu_monitor_text.c
index 335e39e..19c2690 100644
--- a/src/qemu/qemu_monitor_text.c
+++ b/src/qemu/qemu_monitor_text.c
@@ -3046,3 +3046,23 @@ cleanup:
     VIR_FREE(reply);
     return ret;
 }
+
+
+int qemuMonitorTextQuit(qemuMonitorPtr mon)
+{
+    const char *cmd = "quit";
+    char *reply = NULL;
+    int ret = -1;
+
+    if (qemuMonitorHMPCommand(mon, cmd, &reply) < 0) {
+        qemuReportError(VIR_ERR_INTERNAL_ERROR,
+                        "%s", _("cannot run monitor command"));
+        goto cleanup;
+    }
+
+    ret = 0;
+
+cleanup:
+    VIR_FREE(reply);
+    return ret;
+}
diff --git a/src/qemu/qemu_monitor_text.h b/src/qemu/qemu_monitor_text.h
index b250738..9ade938 100644
--- a/src/qemu/qemu_monitor_text.h
+++ b/src/qemu/qemu_monitor_text.h
@@ -224,4 +224,5 @@ int qemuMonitorTextBlockJob(qemuMonitorPtr mon,
                             virDomainBlockJobInfoPtr info,
                             int mode);
 
+int qemuMonitorTextQuit(qemuMonitorPtr mon);
 #endif /* QEMU_MONITOR_TEXT_H */
diff --git a/tools/virsh.c b/tools/virsh.c
index f1eb4ca..0c18d8b 100644
--- a/tools/virsh.c
+++ b/tools/virsh.c
@@ -2565,6 +2565,7 @@ static const vshCmdInfo info_destroy[] = {
 
 static const vshCmdOptDef opts_destroy[] = {
     {"domain", VSH_OT_DATA, VSH_OFLAG_REQ, N_("domain name, id or uuid")},
+    {"monitor", VSH_OT_BOOL, 0, N_("try graceful destroy via monitor first")},
     {NULL, 0, 0, NULL}
 };
 
@@ -2574,6 +2575,7 @@ cmdDestroy(vshControl *ctl, const vshCmd *cmd)
     virDomainPtr dom;
     bool ret = true;
     const char *name;
+    int flags = 0;
 
     if (!vshConnectionUsability(ctl, ctl->conn))
         return false;
@@ -2581,7 +2583,11 @@ cmdDestroy(vshControl *ctl, const vshCmd *cmd)
     if (!(dom = vshCommandOptDomain(ctl, cmd, &name)))
         return false;
 
-    if (virDomainDestroy(dom) == 0) {
+    if (vshCommandOptBool(cmd, "monitor"))
+        flags |= VIR_DOMAIN_DESTROY_MONITOR;
+
+    if ((!flags && virDomainDestroy(dom) == 0) ||
+        virDomainDestroyFlags(dom, flags) == 0)  {
         vshPrint(ctl, _("Domain %s destroyed\n"), name);
     } else {
         vshError(ctl, _("Failed to destroy domain %s"), name);
-- 
1.7.3.4

    

Michal Privoznik

Daniel Veillard

Daniel P. Berrange

Michal Privoznik

Eric Blake

Eric Blake

Michal Privoznik

Daniel P. Berrange

Eric Blake

Daniel P. Berrange

Daniel P. Berrange

Michal Privoznik

Daniel P. Berrange

Jiri Denemark

Daniel P. Berrange

tags

participants (5)