At the moment, guests that are backed by hugepages in the host are
only able to use policy to control the placement of those hugepages
on a per-(guest-)CPU basis. Policy applied globally is ignored.
Such guests would use <memoryBacking><hugepages/></memoryBacking> and
a <numatune> block with <memory mode=... nodeset=.../> but no <memnode
.../> elements.
This patch corrects this by, in this specific case, changing the QEMU
command line from "-mem-prealloc -mem-path=..." (which cannot
specify NUMA policy) to "-object memory-backend-file ..." (which can).
Note: This is not visible to the guest and does not appear to create
a migration incompatibility.
Signed-off-by: Sam Bobroff <sam.bobroff(a)au1.ibm.com>
---
There was some discussion leading up to this patch, here:
https://www.redhat.com/archives/libvir-list/2016-October/msg00033.html
During that discussion it seemed that fixing this issue would cause
migration incompatibility but after some careful testing, it appears
that it only does when a memory-backend object is attached to a guest
NUMA node and that is not the case here. If only one is created, and
used globally (not attached via mem=<id>), the migration data does not
seem to be changed and so it seems reasonable to try something like
this patch.
This patch does work for my test cases but I don't claim a deep
understanding of the libvirt code so this is at least partly a RFC.
Comments welcome :-)
Cheers,
Sam.
src/qemu/qemu_command.c | 55 ++++++++++++++++++++++++++++++++++++++++---------
src/qemu/qemu_command.h | 2 +-
2 files changed, 46 insertions(+), 11 deletions(-)
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index 0804133..c28c8f2 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -3143,7 +3143,7 @@ qemuBuildMemoryBackendStr(unsigned long long size,
int guestNode,
virBitmapPtr userNodeset,
virBitmapPtr autoNodeset,
- virDomainDefPtr def,
+ const virDomainDefPtr def,
virQEMUCapsPtr qemuCaps,
virQEMUDriverConfigPtr cfg,
const char **backendType,
@@ -7129,12 +7129,18 @@ qemuBuildSmpCommandLine(virCommandPtr cmd,
static int
qemuBuildMemPathStr(virQEMUDriverConfigPtr cfg,
- const virDomainDef *def,
+ const virDomainDefPtr def,
virQEMUCapsPtr qemuCaps,
- virCommandPtr cmd)
+ virCommandPtr cmd,
+ virBitmapPtr auto_nodeset)
{
const long system_page_size = virGetSystemPageSizeKB();
char *mem_path = NULL;
+ virBitmapPtr nodemask = NULL;
+ const char *backendType = NULL;
+ char *backendStr = NULL;
+ virJSONValuePtr props;
+ int rv = -1;
/*
* No-op if hugepages were not requested.
@@ -7159,18 +7165,47 @@ qemuBuildMemPathStr(virQEMUDriverConfigPtr cfg,
if (qemuGetHupageMemPath(cfg, def->mem.hugepages[0].size, &mem_path) < 0)
return -1;
- virCommandAddArgList(cmd, "-mem-prealloc", "-mem-path", mem_path,
NULL);
+ if (virDomainNumatuneMaybeGetNodeset(def->numa, auto_nodeset,
+ &nodemask, -1) < 0)
+ return -1;
+ if (nodemask && virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_FILE)) {
+ props = virJSONValueNewObject();
+ if (qemuBuildMemoryBackendStr(virDomainDefGetMemoryInitial(def),
+ 0, -1, NULL, auto_nodeset,
+ def, qemuCaps, cfg, &backendType,
+ &props, false) < 0)
+ goto cleanup;
+ if (!(backendStr = virQEMUBuildObjectCommandlineFromJSON(backendType,
+ "mem",
+ props)))
+ goto cleanup;
+ virCommandAddArgList(cmd, "-object", backendStr, NULL);
+ rv = 0;
+cleanup:
+ VIR_FREE(backendStr);
+ VIR_FREE(props);
+ }
+ else {
+ if (nodemask || 1)
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("Memory file backend objects are "
+ "unsupported by QEMU binary. Global NUMA "
+ "hugepage policy will be ignored."));
+ virCommandAddArgList(cmd, "-mem-prealloc", "-mem-path",
mem_path, NULL);
+ rv = 0;
+ }
VIR_FREE(mem_path);
- return 0;
+ return rv;
}
static int
qemuBuildMemCommandLine(virCommandPtr cmd,
virQEMUDriverConfigPtr cfg,
- const virDomainDef *def,
- virQEMUCapsPtr qemuCaps)
+ const virDomainDefPtr def,
+ virQEMUCapsPtr qemuCaps,
+ virBitmapPtr auto_nodeset)
{
if (qemuDomainDefValidateMemoryHotplug(def, qemuCaps, NULL) < 0)
return -1;
@@ -7194,7 +7229,7 @@ qemuBuildMemCommandLine(virCommandPtr cmd,
* there is no numa node specified.
*/
if (!virDomainNumaGetNodeCount(def->numa) &&
- qemuBuildMemPathStr(cfg, def, qemuCaps, cmd) < 0)
+ qemuBuildMemPathStr(cfg, def, qemuCaps, cmd, auto_nodeset) < 0)
return -1;
if (def->mem.locked && !virQEMUCapsGet(qemuCaps,
QEMU_CAPS_REALTIME_MLOCK)) {
@@ -7331,7 +7366,7 @@ qemuBuildNumaArgStr(virQEMUDriverConfigPtr cfg,
}
if (!needBackend &&
- qemuBuildMemPathStr(cfg, def, qemuCaps, cmd) < 0)
+ qemuBuildMemPathStr(cfg, def, qemuCaps, cmd, auto_nodeset) < 0)
goto cleanup;
for (i = 0; i < ncells; i++) {
@@ -9381,7 +9416,7 @@ qemuBuildCommandLine(virQEMUDriverPtr driver,
if (!migrateURI && !snapshot && qemuDomainAlignMemorySizes(def) <
0)
goto error;
- if (qemuBuildMemCommandLine(cmd, cfg, def, qemuCaps) < 0)
+ if (qemuBuildMemCommandLine(cmd, cfg, def, qemuCaps, nodeset) < 0)
goto error;
if (qemuBuildSmpCommandLine(cmd, def) < 0)
diff --git a/src/qemu/qemu_command.h b/src/qemu/qemu_command.h
index 2f2a6ff..1e97d76 100644
--- a/src/qemu/qemu_command.h
+++ b/src/qemu/qemu_command.h
@@ -126,7 +126,7 @@ int qemuBuildMemoryBackendStr(unsigned long long size,
int guestNode,
virBitmapPtr userNodeset,
virBitmapPtr autoNodeset,
- virDomainDefPtr def,
+ const virDomainDefPtr def,
virQEMUCapsPtr qemuCaps,
virQEMUDriverConfigPtr cfg,
const char **backendType,
--
2.10.0.297.gf6727b0