Hi
On Thu, Aug 16, 2018 at 4:35 AM, John Ferlan <jferlan(a)redhat.com> wrote:
On 07/13/2018 09:28 AM, marcandre.lureau(a)redhat.com wrote:
> From: Marc-André Lureau <marcandre.lureau(a)redhat.com>
>
> When a domain is configured with 'shared' memory backing:
>
> <memoryBacking>
> <access mode='shared'/>
> </memoryBacking>
>
> But no explicit NUMA configuration, let's configure a shared memory
> backend associated with default -numa.
>
> Signed-off-by: Marc-André Lureau <marcandre.lureau(a)redhat.com>
> ---
> src/qemu/qemu_command.c | 100 ++++++++++++------
> .../fd-memory-no-numa-topology.args | 4 +
> 2 files changed, 73 insertions(+), 31 deletions(-)
>
NUMA, memory backends, and hugepages - not in my wheelhouse of
knowledge. Hopefully Michal and/or Pavel will take a look!
Is it possible someone may not want this type of thing to happen? Is
I assume someone that sets 'shared' memory mode may consider this as a bug fix.
there an upside or downside to this? What happens "today"
when not
You get non-shared memory
generated? And of course, what about migration concerns about
unconditionally doing this for some target migration?
True, this will break migration though if the target uses
numa/memory-backend-file.
What do you suggest?
> diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
> index 44ae8dcef7..f1235099b2 100644
> --- a/src/qemu/qemu_command.c
> +++ b/src/qemu/qemu_command.c
> @@ -3254,26 +3254,21 @@ qemuBuildMemoryBackendProps(virJSONValuePtr *backendProps,
>
>
> static int
> -qemuBuildMemoryCellBackendStr(virDomainDefPtr def,
> - virQEMUDriverConfigPtr cfg,
> - size_t cell,
> - qemuDomainObjPrivatePtr priv,
> - virBufferPtr buf)
> +qemuBuildMemoryBackendStr(virDomainDefPtr def,
> + virQEMUDriverConfigPtr cfg,
> + const char *alias,
So the one "concern" I'd have here is some time in the future the @mem
gets allocated and handled like a real device eventually calling
virDomainDeviceInfoClear and that'd be a problem for the passed const
char * string. Some future person's problem I guess!
> + int targetNode,
> + unsigned long long memsize,
> + qemuDomainObjPrivatePtr priv,
> + virBufferPtr buf)
As much as a long name is a pain, is this more of a :
qemuBuildMemorySharedDefaultBackendStr
Why?
> {
> virJSONValuePtr props = NULL;
> - char *alias = NULL;
> - int ret = -1;
> - int rc;
> virDomainMemoryDef mem = { 0 };
> - unsigned long long memsize = virDomainNumaGetNodeMemorySize(def->numa,
> - cell);
> -
> - if (virAsprintf(&alias, "ram-node%zu", cell) < 0)
> - goto cleanup;
> + int rc, ret = -1;
>
> mem.size = memsize;
> - mem.targetNode = cell;
> - mem.info.alias = alias;
> + mem.targetNode = targetNode;
> + mem.info.alias = (char *)alias;
>
> if ((rc = qemuBuildMemoryBackendProps(&props, alias, cfg,
priv->qemuCaps,
> def, &mem, priv->autoNodeset,
false)) < 0)> @@ -3284,9 +3279,30 @@ qemuBuildMemoryCellBackendStr(virDomainDefPtr
def,
>
> ret = rc;
>
> +cleanup:
Fails 'make check syntax-check' :
maint.mk: Top-level labels should be indented by one space
make: *** [cfg.mk:898: sc_require_space_before_label] Error 1
argh, fixed
> + virJSONValueFree(props);
> + return ret;
> +}
> +
> +
> +static int
> +qemuBuildMemoryCellBackendStr(virDomainDefPtr def,
> + virQEMUDriverConfigPtr cfg,
> + size_t cell,
> + qemuDomainObjPrivatePtr priv,
> + virBufferPtr buf)
> +{
> + char *alias = NULL;
> + int ret = -1;
> + unsigned long long memsize = virDomainNumaGetNodeMemorySize(def->numa,
cell);
> +
> + if (virAsprintf(&alias, "ram-node%zu", cell) < 0)
> + goto cleanup;
> +
> + ret = qemuBuildMemoryBackendStr(def, cfg, alias, cell, memsize, priv, buf);
> +
> cleanup:
> VIR_FREE(alias);
> - virJSONValueFree(props);
>
> return ret;
> }
> @@ -7590,6 +7606,17 @@ qemuBuildNumaArgStr(virQEMUDriverConfigPtr cfg,
> size_t ncells = virDomainNumaGetNodeCount(def->numa);
> const long system_page_size = virGetSystemPageSizeKB();
> bool numa_distances = false;
> + bool implicit = false;
> +
> + if (ncells == 0) {
> + if (def->mem.access == VIR_DOMAIN_MEMORY_ACCESS_SHARED) {
> + ncells = 1;
Well, that's cheating for the subsequent for loop ;-)
> + implicit = true;
> + } else {
> + ret = 0;
> + goto cleanup;
> + }
> + }
So if ncells == 0 && def->mem.access != VIR_DOMAIN_MEMORY_ACCESS_SHARED,
then we return 0 without doing the subsequent code? Is that expected? Is
there something done later that may be necessary, needed, or assumed.
No, before the patch, virDomainNumaGetNodeCount() is checked before
calling qemuBuildNumaArgStr(). Now it is handled inside in case
ncells==0 && def->mem.access != VIR_DOMAIN_MEMORY_ACCESS_SHARED.
>
> if (virDomainNumatuneHasPerNodeBinding(def->numa) &&
> !(virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_RAM) ||
> @@ -7645,14 +7672,22 @@ qemuBuildNumaArgStr(virQEMUDriverConfigPtr cfg,
> if (virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_RAM) ||
> virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_FILE)) {
>
> - if ((rc = qemuBuildMemoryCellBackendStr(def, cfg, i, priv,
> - &nodeBackends[i])) < 0)
> +
> + if (implicit)
> + rc = qemuBuildMemoryBackendStr(def, cfg, "ram-node", -1,
Using "ram-node" where other devices use "ram-node%zu" could easily
be
missed - maybe "default" or "implicit" as a prefix or postfix to make
it
stand out a bit more? It's not a big deal though.
> + def->mem.total_memory,
> + priv, &nodeBackends[i]);
> + else
> + rc = qemuBuildMemoryCellBackendStr(def, cfg, i,
> + priv, &nodeBackends[i]);
> + if (rc < 0)
> goto cleanup;
>
> if (rc == 0)
> needBackend = true;
> } else {
> - if (virDomainNumaGetNodeMemoryAccessMode(def->numa, i)) {
> + if (implicit ||
> + virDomainNumaGetNodeMemoryAccessMode(def->numa, i)) {
> virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> _("Shared memory mapping is not supported
"
> "with this QEMU"));
> @@ -7667,15 +7702,18 @@ qemuBuildNumaArgStr(virQEMUDriverConfigPtr cfg,
>
> for (i = 0; i < ncells; i++) {
> VIR_FREE(cpumask);
> - if (!(cpumask = virBitmapFormat(virDomainNumaGetNodeCpumask(def->numa,
i))))
> - goto cleanup;
>
> - if (strchr(cpumask, ',') &&
> - !virQEMUCapsGet(qemuCaps, QEMU_CAPS_NUMA)) {
> - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> - _("disjoint NUMA cpu ranges are not supported
"
> - "with this QEMU"));
> - goto cleanup;
> + if (!implicit) {
> + if (!(cpumask =
virBitmapFormat(virDomainNumaGetNodeCpumask(def->numa, i))))
> + goto cleanup;
> +
> + if (strchr(cpumask, ',') &&
> + !virQEMUCapsGet(qemuCaps, QEMU_CAPS_NUMA)) {
> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> + _("disjoint NUMA cpu ranges are not supported
"
> + "with this QEMU"));
> + goto cleanup;
> + }
> }
>
> if (needBackend) {
> @@ -7694,7 +7732,8 @@ qemuBuildNumaArgStr(virQEMUDriverConfigPtr cfg,
> }
>
> if (needBackend)
> - virBufferAsprintf(&buf, ",memdev=ram-node%zu", i);
> + virBufferAsprintf(&buf, implicit ?
> + ",memdev=ram-node" :
",memdev=ram-node%zu", i);
> else
> virBufferAsprintf(&buf, ",mem=%llu",
> virDomainNumaGetNodeMemorySize(def->numa, i) /
1024);
> @@ -7717,7 +7756,7 @@ qemuBuildNumaArgStr(virQEMUDriverConfigPtr cfg,
> break;
> }
>
> - if (numa_distances) {
> + if (!implicit && numa_distances) {
> if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_NUMA_DIST)) {
> virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> _("setting NUMA distances is not "
> @@ -10303,8 +10342,7 @@ qemuBuildCommandLine(virQEMUDriverPtr driver,
> if (qemuBuildIOThreadCommandLine(cmd, def) < 0)
> goto error;
>
> - if (virDomainNumaGetNodeCount(def->numa) &&
> - qemuBuildNumaArgStr(cfg, def, cmd, priv) < 0)
> + if (qemuBuildNumaArgStr(cfg, def, cmd, priv) < 0)
> goto error;
>
> if (qemuBuildMemoryDeviceCommandLine(cmd, cfg, def, priv) < 0)
> diff --git a/tests/qemuxml2argvdata/fd-memory-no-numa-topology.args
b/tests/qemuxml2argvdata/fd-memory-no-numa-topology.args
> index bd88daaa3b..400fb39cc6 100644
> --- a/tests/qemuxml2argvdata/fd-memory-no-numa-topology.args
> +++ b/tests/qemuxml2argvdata/fd-memory-no-numa-topology.args
> @@ -11,6 +11,10 @@ QEMU_AUDIO_DRV=none \
> -m 14336 \
> -mem-prealloc \
> -smp 8,sockets=8,cores=1,threads=1 \
> +-object memory-backend-file,id=ram-node,\
> +mem-path=/var/lib/libvirt/qemu/ram/libvirt/qemu/-1-instance-00000092/ram-node,\
> +share=yes,size=15032385536 \
Curious does that perhaps rather large file for mem-path get created? If
so, wow! Seems to me something like this would need to be documented or
as noted earlier be configurable.
You mean during tests? no, it's created by qemu afaik.
I am not a libvirt expert either, and I would rather have no file be
created when you want shared memory. That's what the next patches
propose with memfd support.
John
> +-numa node,nodeid=0,memdev=ram-node \
> -uuid 126f2720-6f8e-45ab-a886-ec9277079a67 \
> -display none \
> -no-user-config \
>
thanks