[libvirt] [PATCH 1/1] qemu: host NUMA hugepage policy without guest NUMA

12 Oct 2016

At the moment, guests that are backed by hugepages in the host are
only able to use policy to control the placement of those hugepages
on a per-(guest-)CPU basis. Policy applied globally is ignored.

Such guests would use <memoryBacking><hugepages/></memoryBacking> and
a <numatune> block with <memory mode=... nodeset=.../> but no <memnode
.../> elements.

This patch corrects this by, in this specific case, changing the QEMU
command line from "-mem-prealloc -mem-path=..." (which cannot
specify NUMA policy) to "-object memory-backend-file ..." (which can).

Note: This is not visible to the guest and does not appear to create
a migration incompatibility.

Signed-off-by: Sam Bobroff <sam.bobroff@au1.ibm.com>
---
There was some discussion leading up to this patch, here:

https://www.redhat.com/archives/libvir-list/2016-October/msg00033.html

During that discussion it seemed that fixing this issue would cause
migration incompatibility but after some careful testing, it appears
that it only does when a memory-backend object is attached to a guest
NUMA node and that is not the case here. If only one is created, and
used globally (not attached via mem=<id>), the migration data does not
seem to be changed and so it seems reasonable to try something like
this patch.

This patch does work for my test cases but I don't claim a deep
understanding of the libvirt code so this is at least partly a RFC.
Comments welcome :-)

Cheers,
Sam.

 src/qemu/qemu_command.c | 55 ++++++++++++++++++++++++++++++++++++++++---------
 src/qemu/qemu_command.h |  2 +-
 2 files changed, 46 insertions(+), 11 deletions(-)

diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index 0804133..c28c8f2 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -3143,7 +3143,7 @@ qemuBuildMemoryBackendStr(unsigned long long size,
                           int guestNode,
                           virBitmapPtr userNodeset,
                           virBitmapPtr autoNodeset,
-                          virDomainDefPtr def,
+                          const virDomainDefPtr def,
                           virQEMUCapsPtr qemuCaps,
                           virQEMUDriverConfigPtr cfg,
                           const char **backendType,
@@ -7129,12 +7129,18 @@ qemuBuildSmpCommandLine(virCommandPtr cmd,
 
 static int
 qemuBuildMemPathStr(virQEMUDriverConfigPtr cfg,
-                    const virDomainDef *def,
+                    const virDomainDefPtr def,
                     virQEMUCapsPtr qemuCaps,
-                    virCommandPtr cmd)
+                    virCommandPtr cmd,
+                    virBitmapPtr auto_nodeset)
 {
     const long system_page_size = virGetSystemPageSizeKB();
     char *mem_path = NULL;
+    virBitmapPtr nodemask = NULL;
+    const char *backendType = NULL;
+    char *backendStr = NULL;
+    virJSONValuePtr props;
+    int rv = -1;
 
     /*
      *  No-op if hugepages were not requested.
@@ -7159,18 +7165,47 @@ qemuBuildMemPathStr(virQEMUDriverConfigPtr cfg,
     if (qemuGetHupageMemPath(cfg, def->mem.hugepages[0].size, &mem_path) < 0)
         return -1;
 
-    virCommandAddArgList(cmd, "-mem-prealloc", "-mem-path", mem_path, NULL);
+    if (virDomainNumatuneMaybeGetNodeset(def->numa, auto_nodeset,
+                                         &nodemask, -1) < 0)
+        return -1;
+    if (nodemask && virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_FILE)) {
+        props = virJSONValueNewObject();
+        if (qemuBuildMemoryBackendStr(virDomainDefGetMemoryInitial(def),
+                                      0, -1, NULL, auto_nodeset,
+                                      def, qemuCaps, cfg, &backendType,
+                                      &props, false) < 0)
+            goto cleanup;
+        if (!(backendStr = virQEMUBuildObjectCommandlineFromJSON(backendType,
+                                                                 "mem",
+                                                                 props)))
+            goto cleanup;
+        virCommandAddArgList(cmd, "-object", backendStr, NULL);
+        rv = 0;
+cleanup:
+        VIR_FREE(backendStr);
+        VIR_FREE(props);
+    }
+    else {
+        if (nodemask || 1)
+            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+                           _("Memory file backend objects are "
+                             "unsupported by QEMU binary. Global NUMA "
+                             "hugepage policy will be ignored."));
+        virCommandAddArgList(cmd, "-mem-prealloc", "-mem-path", mem_path, NULL);
+        rv = 0;
+    }
     VIR_FREE(mem_path);
 
-    return 0;
+    return rv;
 }
 
 
 static int
 qemuBuildMemCommandLine(virCommandPtr cmd,
                         virQEMUDriverConfigPtr cfg,
-                        const virDomainDef *def,
-                        virQEMUCapsPtr qemuCaps)
+                        const virDomainDefPtr def,
+                        virQEMUCapsPtr qemuCaps,
+                        virBitmapPtr auto_nodeset)
 {
     if (qemuDomainDefValidateMemoryHotplug(def, qemuCaps, NULL) < 0)
         return -1;
@@ -7194,7 +7229,7 @@ qemuBuildMemCommandLine(virCommandPtr cmd,
      * there is no numa node specified.
      */
     if (!virDomainNumaGetNodeCount(def->numa) &&
-        qemuBuildMemPathStr(cfg, def, qemuCaps, cmd) < 0)
+        qemuBuildMemPathStr(cfg, def, qemuCaps, cmd, auto_nodeset) < 0)
         return -1;
 
     if (def->mem.locked && !virQEMUCapsGet(qemuCaps, QEMU_CAPS_REALTIME_MLOCK)) {
@@ -7331,7 +7366,7 @@ qemuBuildNumaArgStr(virQEMUDriverConfigPtr cfg,
     }
 
     if (!needBackend &&
-        qemuBuildMemPathStr(cfg, def, qemuCaps, cmd) < 0)
+        qemuBuildMemPathStr(cfg, def, qemuCaps, cmd, auto_nodeset) < 0)
         goto cleanup;
 
     for (i = 0; i < ncells; i++) {
@@ -9381,7 +9416,7 @@ qemuBuildCommandLine(virQEMUDriverPtr driver,
     if (!migrateURI && !snapshot && qemuDomainAlignMemorySizes(def) < 0)
         goto error;
 
-    if (qemuBuildMemCommandLine(cmd, cfg, def, qemuCaps) < 0)
+    if (qemuBuildMemCommandLine(cmd, cfg, def, qemuCaps, nodeset) < 0)
         goto error;
 
     if (qemuBuildSmpCommandLine(cmd, def) < 0)
diff --git a/src/qemu/qemu_command.h b/src/qemu/qemu_command.h
index 2f2a6ff..1e97d76 100644
--- a/src/qemu/qemu_command.h
+++ b/src/qemu/qemu_command.h
@@ -126,7 +126,7 @@ int qemuBuildMemoryBackendStr(unsigned long long size,
                               int guestNode,
                               virBitmapPtr userNodeset,
                               virBitmapPtr autoNodeset,
-                              virDomainDefPtr def,
+                              const virDomainDefPtr def,
                               virQEMUCapsPtr qemuCaps,
                               virQEMUDriverConfigPtr cfg,
                               const char **backendType,
-- 
2.10.0.297.gf6727b0

    

Sam Bobroff

Peter Krempa

Sam Bobroff

Martin Kletzander

Sam Bobroff

Martin Kletzander

Sam Bobroff

Martin Kletzander

Sam Bobroff

Martin Kletzander

Sam Bobroff

Martin Kletzander

Sam Bobroff

tags

participants (3)