[libvirt] [PATCH v2 0/2] qemu: Don't use -mem-prealloc among with .prealloc=yes

v2 of: https://www.redhat.com/archives/libvir-list/2018-November/msg00159.html diff to v1: - Patch 01/02 is completely new - Patch 02/02 has reworded commit message Michal Prívozník (2): qemuBuildMemoryBackendProps: Pass @priv instead of its individual members qemu: Don't use -mem-prealloc among with .prealloc=yes src/qemu/qemu_command.c | 56 ++++++++++--------- src/qemu/qemu_command.h | 3 +- src/qemu/qemu_domain.c | 7 +++ src/qemu/qemu_domain.h | 3 + src/qemu/qemu_hotplug.c | 2 +- .../hugepages-numa-default-dimm.args | 2 +- 6 files changed, 44 insertions(+), 29 deletions(-) -- 2.19.2

So far we have two arguments that we are passing to qemuBuildMemoryBackendProps() and that are taken from domain private data: @qemuCaps and @autoNodeset. In the next commit I will use one more item from there. Therefore, instead of having it as yet another argument to the function, pass pointer to the private data object. There is one change in qemuDomainAttachMemory() where previously @autoNodeset was NULL but now is priv->autoNodeset (which may be set). This is safe to do as @autoNodeset is advisory only. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 30 ++++++++++++++---------------- src/qemu/qemu_command.h | 3 +-- src/qemu/qemu_hotplug.c | 2 +- 3 files changed, 16 insertions(+), 19 deletions(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 32ed83f277..01a3141134 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3201,22 +3201,21 @@ qemuBuildMemoryBackendPropsShare(virJSONValuePtr props, * @backendProps: [out] constructed object * @alias: alias of the device * @cfg: qemu driver config object - * @qemuCaps: qemu capabilities object + * @priv: pointer to domain private object * @def: domain definition object * @mem: memory definition object - * @autoNodeset: fallback nodeset in case of automatic NUMA placement * @force: forcibly use one of the backends * * Creates a configuration object that represents memory backend of given guest - * NUMA node (domain @def and @mem). Use @autoNodeset to fine tune the + * NUMA node (domain @def and @mem). Use @priv->autoNodeset to fine tune the * placement of the memory on the host NUMA nodes. * * By default, if no memory-backend-* object is necessary to fulfil the guest * configuration value of 1 is returned. This behaviour can be suppressed by * setting @force to true in which case 0 would be returned. * - * Then, if one of the three memory-backend-* should be used, the @qemuCaps is - * consulted to check if qemu does support it. + * Then, if one of the three memory-backend-* should be used, the @priv->qemuCaps + * is consulted to check if qemu does support it. * * Returns: 0 on success, * 1 on success and if there's no need to use memory-backend-* @@ -3226,10 +3225,9 @@ int qemuBuildMemoryBackendProps(virJSONValuePtr *backendProps, const char *alias, virQEMUDriverConfigPtr cfg, - virQEMUCapsPtr qemuCaps, + qemuDomainObjPrivatePtr priv, virDomainDefPtr def, virDomainMemoryDefPtr mem, - virBitmapPtr autoNodeset, bool force) { const char *backendType = "memory-backend-file"; @@ -3379,7 +3377,7 @@ qemuBuildMemoryBackendProps(virJSONValuePtr *backendProps, if (!mem->nvdimmPath && discard == VIR_TRISTATE_BOOL_YES) { - if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_FILE_DISCARD)) { + if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_OBJECT_MEMORY_FILE_DISCARD)) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", _("this QEMU doesn't support memory discard")); goto cleanup; @@ -3403,7 +3401,7 @@ qemuBuildMemoryBackendProps(virJSONValuePtr *backendProps, if (mem->sourceNodes) { nodemask = mem->sourceNodes; } else { - if (virDomainNumatuneMaybeGetNodeset(def->numa, autoNodeset, + if (virDomainNumatuneMaybeGetNodeset(def->numa, priv->autoNodeset, &nodemask, mem->targetNode) < 0) goto cleanup; } @@ -3431,19 +3429,19 @@ qemuBuildMemoryBackendProps(virJSONValuePtr *backendProps, } else { /* otherwise check the required capability */ if (STREQ(backendType, "memory-backend-file") && - !virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_FILE)) { + !virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_OBJECT_MEMORY_FILE)) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", _("this qemu doesn't support the " "memory-backend-file object")); goto cleanup; } else if (STREQ(backendType, "memory-backend-ram") && - !virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_RAM)) { + !virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_OBJECT_MEMORY_RAM)) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", _("this qemu doesn't support the " "memory-backend-ram object")); goto cleanup; } else if (STREQ(backendType, "memory-backend-memory") && - !virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_MEMFD)) { + !virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_OBJECT_MEMORY_MEMFD)) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", _("this qemu doesn't support the " "memory-backend-memfd object")); @@ -3486,8 +3484,8 @@ qemuBuildMemoryCellBackendStr(virDomainDefPtr def, mem.targetNode = cell; mem.info.alias = alias; - if ((rc = qemuBuildMemoryBackendProps(&props, alias, cfg, priv->qemuCaps, - def, &mem, priv->autoNodeset, false)) < 0) + if ((rc = qemuBuildMemoryBackendProps(&props, alias, cfg, + priv, def, &mem, false)) < 0) goto cleanup; if (virQEMUBuildObjectCommandlineFromJSON(buf, props) < 0) @@ -3523,8 +3521,8 @@ qemuBuildMemoryDimmBackendStr(virBufferPtr buf, if (virAsprintf(&alias, "mem%s", mem->info.alias) < 0) goto cleanup; - if (qemuBuildMemoryBackendProps(&props, alias, cfg, priv->qemuCaps, - def, mem, priv->autoNodeset, true) < 0) + if (qemuBuildMemoryBackendProps(&props, alias, cfg, + priv, def, mem, true) < 0) goto cleanup; if (virQEMUBuildObjectCommandlineFromJSON(buf, props) < 0) diff --git a/src/qemu/qemu_command.h b/src/qemu/qemu_command.h index d382cd592a..5cd744b8bf 100644 --- a/src/qemu/qemu_command.h +++ b/src/qemu/qemu_command.h @@ -125,10 +125,9 @@ int qemuBuildControllerDevStr(const virDomainDef *domainDef, int qemuBuildMemoryBackendProps(virJSONValuePtr *backendProps, const char *alias, virQEMUDriverConfigPtr cfg, - virQEMUCapsPtr qemuCaps, + qemuDomainObjPrivatePtr priv, virDomainDefPtr def, virDomainMemoryDefPtr mem, - virBitmapPtr autoNodeset, bool force); char *qemuBuildMemoryDeviceStr(virDomainMemoryDefPtr mem); diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index c455baeab6..4e795c7859 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -2568,7 +2568,7 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, goto cleanup; if (qemuBuildMemoryBackendProps(&props, objalias, cfg, - priv->qemuCaps, vm->def, mem, NULL, true) < 0) + priv, vm->def, mem, true) < 0) goto cleanup; if (qemuProcessBuildDestroyMemoryPaths(driver, vm, mem, true) < 0) -- 2.19.2

https://bugzilla.redhat.com/show_bug.cgi?id=1624223 There are two ways to request memory preallocation on cmd line: -mem-prealloc and .prealloc attribute for a memory-backend-file. However, as it turns out it's not safe to use both at the same time. If -mem-prealloc is used then qemu will fullly allocate the memory (this is done by actually touching every page that has been allocated). Then, if .prealloc=yes is specified, mbind(flags = MPOL_MF_STRICT | MPOL_MF_MOVE) is called which: a) has to (possibly) move the memory to a different NUMA node, b) can have no effect when hugepages are in play (thus ignoring user request to place memory on desired NUMA nodes). Prefer -mem-prealloc as it is more backward compatible compared to switching to "-numa node,memdev= + -object memory-backend-file". Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 26 ++++++++++++------- src/qemu/qemu_domain.c | 7 +++++ src/qemu/qemu_domain.h | 3 +++ .../hugepages-numa-default-dimm.args | 2 +- 4 files changed, 28 insertions(+), 10 deletions(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 01a3141134..1a1cb9cbbd 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3357,11 +3357,13 @@ qemuBuildMemoryBackendProps(virJSONValuePtr *backendProps, if (mem->nvdimmPath) { if (VIR_STRDUP(memPath, mem->nvdimmPath) < 0) goto cleanup; - prealloc = true; + if (!priv->memPrealloc) + prealloc = true; } else if (useHugepage) { if (qemuGetDomainHupageMemPath(def, cfg, pagesize, &memPath) < 0) goto cleanup; - prealloc = true; + if (!priv->memPrealloc) + prealloc = true; } else { /* We can have both pagesize and mem source. If that's the case, * prefer hugepages as those are more specific. */ @@ -7576,7 +7578,8 @@ qemuBuildSmpCommandLine(virCommandPtr cmd, static int qemuBuildMemPathStr(virQEMUDriverConfigPtr cfg, const virDomainDef *def, - virCommandPtr cmd) + virCommandPtr cmd, + qemuDomainObjPrivatePtr priv) { const long system_page_size = virGetSystemPageSizeKB(); char *mem_path = NULL; @@ -7598,8 +7601,10 @@ qemuBuildMemPathStr(virQEMUDriverConfigPtr cfg, return 0; } - if (def->mem.allocation != VIR_DOMAIN_MEMORY_ALLOCATION_IMMEDIATE) + if (def->mem.allocation != VIR_DOMAIN_MEMORY_ALLOCATION_IMMEDIATE) { virCommandAddArgList(cmd, "-mem-prealloc", NULL); + priv->memPrealloc = true; + } virCommandAddArgList(cmd, "-mem-path", mem_path, NULL); VIR_FREE(mem_path); @@ -7612,7 +7617,8 @@ static int qemuBuildMemCommandLine(virCommandPtr cmd, virQEMUDriverConfigPtr cfg, const virDomainDef *def, - virQEMUCapsPtr qemuCaps) + virQEMUCapsPtr qemuCaps, + qemuDomainObjPrivatePtr priv) { if (qemuDomainDefValidateMemoryHotplug(def, qemuCaps, NULL) < 0) return -1; @@ -7631,15 +7637,17 @@ qemuBuildMemCommandLine(virCommandPtr cmd, virDomainDefGetMemoryInitial(def) / 1024); } - if (def->mem.allocation == VIR_DOMAIN_MEMORY_ALLOCATION_IMMEDIATE) + if (def->mem.allocation == VIR_DOMAIN_MEMORY_ALLOCATION_IMMEDIATE) { virCommandAddArgList(cmd, "-mem-prealloc", NULL); + priv->memPrealloc = true; + } /* * Add '-mem-path' (and '-mem-prealloc') parameter here if * the hugepages and no numa node is specified. */ if (!virDomainNumaGetNodeCount(def->numa) && - qemuBuildMemPathStr(cfg, def, cmd) < 0) + qemuBuildMemPathStr(cfg, def, cmd, priv) < 0) return -1; if (def->mem.locked && !virQEMUCapsGet(qemuCaps, QEMU_CAPS_REALTIME_MLOCK)) { @@ -7748,7 +7756,7 @@ qemuBuildNumaArgStr(virQEMUDriverConfigPtr cfg, } if (!needBackend && - qemuBuildMemPathStr(cfg, def, cmd) < 0) + qemuBuildMemPathStr(cfg, def, cmd, priv) < 0) goto cleanup; for (i = 0; i < ncells; i++) { @@ -10441,7 +10449,7 @@ qemuBuildCommandLine(virQEMUDriverPtr driver, if (!migrateURI && !snapshot && qemuDomainAlignMemorySizes(def) < 0) goto error; - if (qemuBuildMemCommandLine(cmd, cfg, def, qemuCaps) < 0) + if (qemuBuildMemCommandLine(cmd, cfg, def, qemuCaps, priv) < 0) goto error; if (qemuBuildSmpCommandLine(cmd, def) < 0) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 509da6bfea..039b887d8e 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -1944,6 +1944,8 @@ qemuDomainObjPrivateDataClear(qemuDomainObjPrivatePtr priv) VIR_FREE(priv->libDir); VIR_FREE(priv->channelTargetDir); + priv->memPrealloc = false; + /* remove automatic pinning data */ virBitmapFree(priv->autoNodeset); priv->autoNodeset = NULL; @@ -2489,6 +2491,9 @@ qemuDomainObjPrivateXMLFormat(virBufferPtr buf, if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_BLOCKDEV)) virBufferAsprintf(buf, "<nodename index='%llu'/>\n", priv->nodenameindex); + if (priv->memPrealloc) + virBufferAddLit(buf, "<memPrealloc/>\n"); + if (qemuDomainObjPrivateXMLFormatBlockjobs(buf, vm) < 0) return -1; @@ -2993,6 +2998,8 @@ qemuDomainObjPrivateXMLParse(xmlXPathContextPtr ctxt, goto error; } + priv->memPrealloc = virXPathBoolean("boolean(./memPrealloc)", ctxt) == 1; + return 0; error: diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 53b5ea1678..b7347c72ce 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -372,6 +372,9 @@ struct _qemuDomainObjPrivate { /* true if libvirt remembers the original owner for files */ bool rememberOwner; + + /* true if global -mem-prealloc appears on cmd line */ + bool memPrealloc; }; # define QEMU_DOMAIN_PRIVATE(vm) \ diff --git a/tests/qemuxml2argvdata/hugepages-numa-default-dimm.args b/tests/qemuxml2argvdata/hugepages-numa-default-dimm.args index 143d8b041f..df90f7aad9 100644 --- a/tests/qemuxml2argvdata/hugepages-numa-default-dimm.args +++ b/tests/qemuxml2argvdata/hugepages-numa-default-dimm.args @@ -13,7 +13,7 @@ QEMU_AUDIO_DRV=none \ -mem-prealloc \ -mem-path /dev/hugepages2M/libvirt/qemu/-1-fedora \ -numa node,nodeid=0,cpus=0-1,mem=1024 \ --object memory-backend-file,id=memdimm0,prealloc=yes,\ +-object memory-backend-file,id=memdimm0,\ mem-path=/dev/hugepages1G/libvirt/qemu/-1-fedora,size=1073741824,\ host-nodes=1-3,policy=bind \ -device pc-dimm,node=0,memdev=memdimm0,id=dimm0,slot=0 \ -- 2.19.2

On 12/12/18 7:58 AM, Michal Privoznik wrote:
v2 of:
https://www.redhat.com/archives/libvir-list/2018-November/msg00159.html
diff to v1: - Patch 01/02 is completely new - Patch 02/02 has reworded commit message
Michal Prívozník (2): qemuBuildMemoryBackendProps: Pass @priv instead of its individual members qemu: Don't use -mem-prealloc among with .prealloc=yes
src/qemu/qemu_command.c | 56 ++++++++++--------- src/qemu/qemu_command.h | 3 +- src/qemu/qemu_domain.c | 7 +++ src/qemu/qemu_domain.h | 3 + src/qemu/qemu_hotplug.c | 2 +- .../hugepages-numa-default-dimm.args | 2 +- 6 files changed, 44 insertions(+), 29 deletions(-)
Adjust commit message for patch2 to spell "fully" instead of "fullly"... Reviewed-by: John Ferlan <jferlan@redhat.com> (series) John
participants (2)
-
John Ferlan
-
Michal Privoznik