[PATCH 0/5] qemu: Prefer -numa cpu over -numa node,cpus=

I've sent these patches a while ago. Resending them again to resume the discussion. https://listman.redhat.com/archives/libvir-list/2020-May/msg01035.html Michal Prívozník (5): virCPUDefParseXML: Parse uint using virXPathUInt() virCPUDefParseXML: Prefer virXMLPropUInt over virXPathUInt qemuBuildNumaCommandLine: Move vars into loops qemuBuildNumaCommandLine: Separate out building of CPU list qemu: Prefer -numa cpu over -numa node,cpus= src/conf/cpu_conf.c | 45 ++---- src/qemu/qemu_command.c | 151 ++++++++++++++++-- .../hugepages-nvdimm.x86_64-latest.args | 4 +- ...memory-default-hugepage.x86_64-latest.args | 10 +- .../memfd-memory-numa.x86_64-latest.args | 10 +- ...y-hotplug-nvdimm-access.x86_64-latest.args | 4 +- ...ory-hotplug-nvdimm-align.x86_64-5.2.0.args | 4 +- ...ry-hotplug-nvdimm-align.x86_64-latest.args | 4 +- ...ory-hotplug-nvdimm-label.x86_64-5.2.0.args | 4 +- ...ry-hotplug-nvdimm-label.x86_64-latest.args | 4 +- ...mory-hotplug-nvdimm-pmem.x86_64-5.2.0.args | 4 +- ...ory-hotplug-nvdimm-pmem.x86_64-latest.args | 4 +- ...-hotplug-nvdimm-readonly.x86_64-5.2.0.args | 4 +- ...hotplug-nvdimm-readonly.x86_64-latest.args | 4 +- .../memory-hotplug-nvdimm.x86_64-latest.args | 4 +- ...mory-hotplug-virtio-pmem.x86_64-5.2.0.args | 4 +- ...ory-hotplug-virtio-pmem.x86_64-latest.args | 4 +- .../numatune-hmat.x86_64-latest.args | 18 ++- ...emnode-restrictive-mode.x86_64-latest.args | 38 ++++- .../numatune-memnode.x86_64-5.2.0.args | 38 ++++- .../numatune-memnode.x86_64-latest.args | 38 ++++- ...vhost-user-fs-fd-memory.x86_64-latest.args | 4 +- ...vhost-user-fs-hugepages.x86_64-latest.args | 4 +- ...host-user-gpu-secondary.x86_64-latest.args | 3 +- .../vhost-user-vga.x86_64-latest.args | 3 +- 25 files changed, 338 insertions(+), 76 deletions(-) -- 2.32.0

There is no need to use virXPathULong() and a temporary UL variable if we can use virXPathUInt() directly. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/conf/cpu_conf.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/src/conf/cpu_conf.c b/src/conf/cpu_conf.c index 1674cd6957..d31f28dfe7 100644 --- a/src/conf/cpu_conf.c +++ b/src/conf/cpu_conf.c @@ -526,39 +526,33 @@ virCPUDefParseXML(xmlXPathContextPtr ctxt, } if (virXPathNode("./topology[1]", ctxt)) { - unsigned long ul; - - if (virXPathULong("string(./topology[1]/@sockets)", ctxt, &ul) < 0) { + if (virXPathUInt("string(./topology[1]/@sockets)", ctxt, &def->sockets) < 0) { virReportError(VIR_ERR_XML_ERROR, "%s", _("Missing 'sockets' attribute in CPU topology")); return -1; } - def->sockets = (unsigned int) ul; if (virXPathNode("./topology[1]/@dies", ctxt)) { - if (virXPathULong("string(./topology[1]/@dies)", ctxt, &ul) < 0) { + if (virXPathUInt("string(./topology[1]/@dies)", ctxt, &def->dies) < 0) { virReportError(VIR_ERR_XML_ERROR, "%s", _("Malformed 'dies' attribute in CPU topology")); return -1; } - def->dies = (unsigned int) ul; } else { def->dies = 1; } - if (virXPathULong("string(./topology[1]/@cores)", ctxt, &ul) < 0) { + if (virXPathUInt("string(./topology[1]/@cores)", ctxt, &def->cores) < 0) { virReportError(VIR_ERR_XML_ERROR, "%s", _("Missing 'cores' attribute in CPU topology")); return -1; } - def->cores = (unsigned int) ul; - if (virXPathULong("string(./topology[1]/@threads)", ctxt, &ul) < 0) { + if (virXPathUInt("string(./topology[1]/@threads)", ctxt, &def->threads) < 0) { virReportError(VIR_ERR_XML_ERROR, "%s", _("Missing 'threads' attribute in CPU topology")); return -1; } - def->threads = (unsigned int) ul; if (!def->sockets || !def->cores || !def->threads || !def->dies) { virReportError(VIR_ERR_XML_ERROR, "%s", -- 2.32.0

On Tue, Sep 21, 2021 at 16:50:27 +0200, Michal Privoznik wrote:
There is no need to use virXPathULong() and a temporary UL variable if we can use virXPathUInt() directly.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/conf/cpu_conf.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-)
Reviewed-by: Peter Krempa <pkrempa@redhat.com>

When parsing CPU topology, which is described in <topology/> attributes we can use virXMLPropUInt() instead of virXPathUInt() as the former results in shorter code. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/conf/cpu_conf.c | 41 ++++++++++++++++++----------------------- 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/src/conf/cpu_conf.c b/src/conf/cpu_conf.c index d31f28dfe7..fbceac1657 100644 --- a/src/conf/cpu_conf.c +++ b/src/conf/cpu_conf.c @@ -320,6 +320,7 @@ virCPUDefParseXML(xmlXPathContextPtr ctxt, { g_autoptr(virCPUDef) def = NULL; g_autofree xmlNodePtr *nodes = NULL; + xmlNodePtr topology = NULL; VIR_XPATH_NODE_AUTORESTORE(ctxt) int n; size_t i; @@ -525,38 +526,32 @@ virCPUDefParseXML(xmlXPathContextPtr ctxt, return -1; } - if (virXPathNode("./topology[1]", ctxt)) { - if (virXPathUInt("string(./topology[1]/@sockets)", ctxt, &def->sockets) < 0) { - virReportError(VIR_ERR_XML_ERROR, "%s", - _("Missing 'sockets' attribute in CPU topology")); + if ((topology = virXPathNode("./topology[1]", ctxt))) { + int rc; + + if (virXMLPropUInt(topology, "sockets", 10, + VIR_XML_PROP_REQUIRED | VIR_XML_PROP_NONZERO, + &def->sockets) < 0) { return -1; } - if (virXPathNode("./topology[1]/@dies", ctxt)) { - if (virXPathUInt("string(./topology[1]/@dies)", ctxt, &def->dies) < 0) { - virReportError(VIR_ERR_XML_ERROR, "%s", - _("Malformed 'dies' attribute in CPU topology")); - return -1; - } - } else { + if ((rc = virXMLPropUInt(topology, "dies", 10, + VIR_XML_PROP_NONZERO, + &def->dies)) < 0) { + return -1; + } else if (rc == 0) { def->dies = 1; } - if (virXPathUInt("string(./topology[1]/@cores)", ctxt, &def->cores) < 0) { - virReportError(VIR_ERR_XML_ERROR, "%s", - _("Missing 'cores' attribute in CPU topology")); + if (virXMLPropUInt(topology, "cores", 10, + VIR_XML_PROP_REQUIRED | VIR_XML_PROP_NONZERO, + &def->cores) < 0) { return -1; } - if (virXPathUInt("string(./topology[1]/@threads)", ctxt, &def->threads) < 0) { - virReportError(VIR_ERR_XML_ERROR, "%s", - _("Missing 'threads' attribute in CPU topology")); - return -1; - } - - if (!def->sockets || !def->cores || !def->threads || !def->dies) { - virReportError(VIR_ERR_XML_ERROR, "%s", - _("Invalid CPU topology")); + if (virXMLPropUInt(topology, "threads", 10, + VIR_XML_PROP_REQUIRED | VIR_XML_PROP_NONZERO, + &def->threads) < 0) { return -1; } } -- 2.32.0

On Tue, Sep 21, 2021 at 16:50:28 +0200, Michal Privoznik wrote:
When parsing CPU topology, which is described in <topology/> attributes we can use virXMLPropUInt() instead of virXPathUInt() as the former results in shorter code.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/conf/cpu_conf.c | 41 ++++++++++++++++++----------------------- 1 file changed, 18 insertions(+), 23 deletions(-)
Reviewed-by: Peter Krempa <pkrempa@redhat.com>

There are two variables that are used only in a single loop. Move their definitions into their respective blocks. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 3de37aa5c5..020cbfa214 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -7416,11 +7416,9 @@ qemuBuildNumaCommandLine(virQEMUDriverConfig *cfg, size_t i, j; virQEMUCaps *qemuCaps = priv->qemuCaps; g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER; - char *next = NULL; virBuffer *nodeBackends = NULL; bool needBackend = false; bool hmat = false; - int rc; int ret = -1; size_t ncells = virDomainNumaGetNodeCount(def->numa); ssize_t masterInitiator = -1; @@ -7445,6 +7443,7 @@ qemuBuildNumaCommandLine(virQEMUDriverConfig *cfg, if (virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_RAM) || virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_FILE) || virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_MEMFD)) { + int rc; for (i = 0; i < ncells; i++) { if ((rc = qemuBuildMemoryCellBackendStr(def, cfg, i, priv, @@ -7487,6 +7486,7 @@ qemuBuildNumaCommandLine(virQEMUDriverConfig *cfg, if (cpumask) { g_autofree char *cpumaskStr = NULL; + char *next = NULL; char *tmpmask; if (!(cpumaskStr = virBitmapFormat(cpumask))) -- 2.32.0

On Tue, Sep 21, 2021 at 16:50:29 +0200, Michal Privoznik wrote:
There are two variables that are used only in a single loop. Move their definitions into their respective blocks.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
Reviewed-by: Peter Krempa <pkrempa@redhat.com>

Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 43 ++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 020cbfa214..f04ae1e311 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -7407,6 +7407,31 @@ qemuBuildNumaHMATCommandLine(virCommand *cmd, } +static int +qemuBuildNumaCPUs(virBuffer *buf, + virBitmap *cpu) +{ + g_autofree char *cpumask = NULL; + char *tmpmask = NULL; + char *next = NULL; + + if (!cpu) + return 0; + + if (!(cpumask = virBitmapFormat(cpu))) + return -1; + + for (tmpmask = cpumask; tmpmask; tmpmask = next) { + if ((next = strchr(tmpmask, ','))) + *(next++) = '\0'; + virBufferAddLit(buf, ",cpus="); + virBufferAdd(buf, tmpmask, -1); + } + + return 0; +} + + static int qemuBuildNumaCommandLine(virQEMUDriverConfig *cfg, virDomainDef *def, @@ -7473,7 +7498,6 @@ qemuBuildNumaCommandLine(virQEMUDriverConfig *cfg, } for (i = 0; i < ncells; i++) { - virBitmap *cpumask = virDomainNumaGetNodeCpumask(def->numa, i); ssize_t initiator = virDomainNumaGetNodeInitiator(def->numa, i); if (needBackend) { @@ -7484,21 +7508,8 @@ qemuBuildNumaCommandLine(virQEMUDriverConfig *cfg, virCommandAddArg(cmd, "-numa"); virBufferAsprintf(&buf, "node,nodeid=%zu", i); - if (cpumask) { - g_autofree char *cpumaskStr = NULL; - char *next = NULL; - char *tmpmask; - - if (!(cpumaskStr = virBitmapFormat(cpumask))) - goto cleanup; - - for (tmpmask = cpumaskStr; tmpmask; tmpmask = next) { - if ((next = strchr(tmpmask, ','))) - *(next++) = '\0'; - virBufferAddLit(&buf, ",cpus="); - virBufferAdd(&buf, tmpmask, -1); - } - } + if (qemuBuildNumaCPUs(&buf, virDomainNumaGetNodeCpumask(def->numa, i)) < 0) + goto cleanup; if (hmat) { if (initiator < 0) -- 2.32.0

On Tue, Sep 21, 2021 at 16:50:30 +0200, Michal Privoznik wrote:
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 43 ++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 16 deletions(-)
Reviewed-by: Michal Privoznik <mprivozn@redhat.com>

On Thu, 30 Sep 2021 13:33:24 +0200 Peter Krempa <pkrempa@redhat.com> wrote:
On Tue, Sep 21, 2021 at 16:50:30 +0200, Michal Privoznik wrote:
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 43 ++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 16 deletions(-)
Reviewed-by: Michal Privoznik <mprivozn@redhat.com> ^^^ copy past err :)

QEMU is trying to obsolete -numa node,cpus= because that uses ambiguous vCPU id to [socket, die, core, thread] mapping. The new form is: -numa cpu,node-id=N,socket-id=S,die-id=D,core-id=C,thread-id=T which is repeated for every vCPU and places it at [S, D, C, T] into guest NUMA node N. While in general this is magic mapping, we can deal with it. Firstly, with QEMU 2.7 or newer, libvirt ensures that if topology is given then maxvcpus must be sockets * dies * cores * threads (i.e. there are no 'holes'). Secondly, if no topology is given then libvirt itself places each vCPU into a different socket (basically, it fakes topology of: [maxvcpus, 1, 1, 1]) Thirdly, we can copy whatever QEMU is doing when mapping vCPUs onto topology, to make sure vCPUs don't start to move around. Note, migration from old to new cmd line works and therefore doesn't need any special handling. Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1678085 Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 112 +++++++++++++++++- .../hugepages-nvdimm.x86_64-latest.args | 4 +- ...memory-default-hugepage.x86_64-latest.args | 10 +- .../memfd-memory-numa.x86_64-latest.args | 10 +- ...y-hotplug-nvdimm-access.x86_64-latest.args | 4 +- ...ory-hotplug-nvdimm-align.x86_64-5.2.0.args | 4 +- ...ry-hotplug-nvdimm-align.x86_64-latest.args | 4 +- ...ory-hotplug-nvdimm-label.x86_64-5.2.0.args | 4 +- ...ry-hotplug-nvdimm-label.x86_64-latest.args | 4 +- ...mory-hotplug-nvdimm-pmem.x86_64-5.2.0.args | 4 +- ...ory-hotplug-nvdimm-pmem.x86_64-latest.args | 4 +- ...-hotplug-nvdimm-readonly.x86_64-5.2.0.args | 4 +- ...hotplug-nvdimm-readonly.x86_64-latest.args | 4 +- .../memory-hotplug-nvdimm.x86_64-latest.args | 4 +- ...mory-hotplug-virtio-pmem.x86_64-5.2.0.args | 4 +- ...ory-hotplug-virtio-pmem.x86_64-latest.args | 4 +- .../numatune-hmat.x86_64-latest.args | 18 ++- ...emnode-restrictive-mode.x86_64-latest.args | 38 +++++- .../numatune-memnode.x86_64-5.2.0.args | 38 +++++- .../numatune-memnode.x86_64-latest.args | 38 +++++- ...vhost-user-fs-fd-memory.x86_64-latest.args | 4 +- ...vhost-user-fs-hugepages.x86_64-latest.args | 4 +- ...host-user-gpu-secondary.x86_64-latest.args | 3 +- .../vhost-user-vga.x86_64-latest.args | 3 +- 24 files changed, 296 insertions(+), 34 deletions(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index f04ae1e311..5192bd7630 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -7408,8 +7408,8 @@ qemuBuildNumaHMATCommandLine(virCommand *cmd, static int -qemuBuildNumaCPUs(virBuffer *buf, - virBitmap *cpu) +qemuBuildNumaOldCPUs(virBuffer *buf, + virBitmap *cpu) { g_autofree char *cpumask = NULL; char *tmpmask = NULL; @@ -7432,6 +7432,94 @@ qemuBuildNumaCPUs(virBuffer *buf, } +/** + * qemuTranlsatevCPUID: + * + * For given vCPU @id and vCPU topology (@cpu) compute corresponding + * @socket, @die, @core and @thread). This assumes linear topology, + * that is every [socket, die, core, thread] combination is valid vCPU + * ID and there are no 'holes'. This is ensured by + * qemuValidateDomainDef() if QEMU_CAPS_QUERY_HOTPLUGGABLE_CPUS is + * set. + * + * Moreover, if @diesSupported is false (QEMU lacks + * QEMU_CAPS_SMP_DIES) then @die is set to zero and @socket is + * computed without taking number of dies into account. + * + * The algorithm is shamelessly copied over from QEMU's + * x86_topo_ids_from_idx() and its history (before introducing dies). + */ +static void +qemuTranlsatevCPUID(unsigned int id, + bool diesSupported, + virCPUDef *cpu, + unsigned int *socket, + unsigned int *die, + unsigned int *core, + unsigned int *thread) +{ + if (cpu && cpu->sockets) { + *thread = id % cpu->threads; + *core = id / cpu->threads % cpu->cores; + if (diesSupported) { + *die = id / (cpu->cores * cpu->threads) % cpu->dies; + *socket = id / (cpu->dies * cpu->cores * cpu->threads); + } else { + *die = 0; + *socket = id / (cpu->cores * cpu->threads) % cpu->sockets; + } + } else { + /* If no topology was provided, then qemuBuildSmpCommandLine() + * puts all vCPUs into a separate socket. */ + *thread = 0; + *core = 0; + *die = 0; + *socket = id; + } +} + + +static void +qemuBuildNumaNewCPUs(virCommand *cmd, + virCPUDef *cpu, + virBitmap *cpumask, + size_t nodeid, + virQEMUCaps *qemuCaps) +{ + const bool diesSupported = virQEMUCapsGet(qemuCaps, QEMU_CAPS_SMP_DIES); + ssize_t vcpuid = -1; + + if (!cpumask) + return; + + while ((vcpuid = virBitmapNextSetBit(cpumask, vcpuid)) >= 0) { + unsigned int socket; + unsigned int die; + unsigned int core; + unsigned int thread; + + qemuTranlsatevCPUID(vcpuid, diesSupported, cpu, + &socket, &die, &core, &thread); + + virCommandAddArg(cmd, "-numa"); + + /* The simple fact that dies are supported by QEMU doesn't mean we can + * put it onto command line. QEMU will accept die-id only if -smp dies + * was set to a value greater than 1. On the other hand, this allows us + * to generate shorter command line. */ + if (diesSupported && cpu && cpu->dies > 1) { + virCommandAddArgFormat(cmd, + "cpu,node-id=%zu,socket-id=%u,die-id=%u,core-id=%u,thread-id=%u", + nodeid, socket, die, core, thread); + } else { + virCommandAddArgFormat(cmd, + "cpu,node-id=%zu,socket-id=%u,core-id=%u,thread-id=%u", + nodeid, socket, core, thread); + } + } +} + + static int qemuBuildNumaCommandLine(virQEMUDriverConfig *cfg, virDomainDef *def, @@ -7444,6 +7532,7 @@ qemuBuildNumaCommandLine(virQEMUDriverConfig *cfg, virBuffer *nodeBackends = NULL; bool needBackend = false; bool hmat = false; + bool newCpus = false; int ret = -1; size_t ncells = virDomainNumaGetNodeCount(def->numa); ssize_t masterInitiator = -1; @@ -7484,6 +7573,17 @@ qemuBuildNumaCommandLine(virQEMUDriverConfig *cfg, qemuBuildMemPathStr(def, cmd, priv) < 0) goto cleanup; + /* Use modern style of specifying vCPU topology only if: + * -numa cpu is available, introduced in the same time as -numa + * dist, hence slightly misleading capability test, and + * query-hotpluggable-cpus is avialable, because then + * qemuValidateDomainDef() ensures that if + * topology is specified it matches max vCPU + * count and we can make some shortcuts in + * qemuTranlsatevCPUID(). + */ + newCpus = virQEMUCapsGet(qemuCaps, QEMU_CAPS_QUERY_HOTPLUGGABLE_CPUS); + for (i = 0; i < ncells; i++) { if (virDomainNumaGetNodeCpumask(def->numa, i)) { masterInitiator = i; @@ -7498,6 +7598,7 @@ qemuBuildNumaCommandLine(virQEMUDriverConfig *cfg, } for (i = 0; i < ncells; i++) { + virBitmap *cpu = virDomainNumaGetNodeCpumask(def->numa, i); ssize_t initiator = virDomainNumaGetNodeInitiator(def->numa, i); if (needBackend) { @@ -7508,7 +7609,9 @@ qemuBuildNumaCommandLine(virQEMUDriverConfig *cfg, virCommandAddArg(cmd, "-numa"); virBufferAsprintf(&buf, "node,nodeid=%zu", i); - if (qemuBuildNumaCPUs(&buf, virDomainNumaGetNodeCpumask(def->numa, i)) < 0) + /* -numa cpu is supported from the same release as -numa dist */ + if (!newCpus && + qemuBuildNumaOldCPUs(&buf, cpu) < 0) goto cleanup; if (hmat) { @@ -7525,6 +7628,9 @@ qemuBuildNumaCommandLine(virQEMUDriverConfig *cfg, virDomainNumaGetNodeMemorySize(def->numa, i) / 1024); virCommandAddArgBuffer(cmd, &buf); + + if (newCpus) + qemuBuildNumaNewCPUs(cmd, def->cpu, cpu, i, qemuCaps); } /* If NUMA node distance is specified for at least one pair diff --git a/tests/qemuxml2argvdata/hugepages-nvdimm.x86_64-latest.args b/tests/qemuxml2argvdata/hugepages-nvdimm.x86_64-latest.args index 9f3c6fa63f..8af4b44758 100644 --- a/tests/qemuxml2argvdata/hugepages-nvdimm.x86_64-latest.args +++ b/tests/qemuxml2argvdata/hugepages-nvdimm.x86_64-latest.args @@ -16,7 +16,9 @@ XDG_CONFIG_HOME=/tmp/lib/domain--1-QEMUGuest1/.config \ -overcommit mem-lock=off \ -smp 2,sockets=2,dies=1,cores=1,threads=1 \ -object '{"qom-type":"memory-backend-file","id":"ram-node0","mem-path":"/dev/hugepages2M/libvirt/qemu/-1-QEMUGuest1","share":true,"prealloc":true,"size":1073741824}' \ --numa node,nodeid=0,cpus=0-1,memdev=ram-node0 \ +-numa node,nodeid=0,memdev=ram-node0 \ +-numa cpu,node-id=0,socket-id=0,core-id=0,thread-id=0 \ +-numa cpu,node-id=0,socket-id=1,core-id=0,thread-id=0 \ -object '{"qom-type":"memory-backend-file","id":"memnvdimm0","mem-path":"/tmp/nvdimm","share":true,"prealloc":true,"size":536870912}' \ -device nvdimm,node=0,memdev=memnvdimm0,id=nvdimm0,slot=0 \ -uuid c7a5fdbd-edaf-9455-926a-d65c16db1809 \ diff --git a/tests/qemuxml2argvdata/memfd-memory-default-hugepage.x86_64-latest.args b/tests/qemuxml2argvdata/memfd-memory-default-hugepage.x86_64-latest.args index 5e54908666..e0114d3423 100644 --- a/tests/qemuxml2argvdata/memfd-memory-default-hugepage.x86_64-latest.args +++ b/tests/qemuxml2argvdata/memfd-memory-default-hugepage.x86_64-latest.args @@ -16,7 +16,15 @@ XDG_CONFIG_HOME=/tmp/lib/domain--1-instance-00000092/.config \ -overcommit mem-lock=off \ -smp 8,sockets=1,dies=1,cores=8,threads=1 \ -object '{"qom-type":"memory-backend-memfd","id":"ram-node0","hugetlb":true,"hugetlbsize":2097152,"share":true,"prealloc":true,"size":15032385536,"host-nodes":[3],"policy":"preferred"}' \ --numa node,nodeid=0,cpus=0-7,memdev=ram-node0 \ +-numa node,nodeid=0,memdev=ram-node0 \ +-numa cpu,node-id=0,socket-id=0,core-id=0,thread-id=0 \ +-numa cpu,node-id=0,socket-id=0,core-id=1,thread-id=0 \ +-numa cpu,node-id=0,socket-id=0,core-id=2,thread-id=0 \ +-numa cpu,node-id=0,socket-id=0,core-id=3,thread-id=0 \ +-numa cpu,node-id=0,socket-id=0,core-id=4,thread-id=0 \ +-numa cpu,node-id=0,socket-id=0,core-id=5,thread-id=0 \ +-numa cpu,node-id=0,socket-id=0,core-id=6,thread-id=0 \ +-numa cpu,node-id=0,socket-id=0,core-id=7,thread-id=0 \ -uuid 126f2720-6f8e-45ab-a886-ec9277079a67 \ -display none \ -no-user-config \ diff --git a/tests/qemuxml2argvdata/memfd-memory-numa.x86_64-latest.args b/tests/qemuxml2argvdata/memfd-memory-numa.x86_64-latest.args index 3b33db3c55..dd517ebf8a 100644 --- a/tests/qemuxml2argvdata/memfd-memory-numa.x86_64-latest.args +++ b/tests/qemuxml2argvdata/memfd-memory-numa.x86_64-latest.args @@ -16,7 +16,15 @@ XDG_CONFIG_HOME=/tmp/lib/domain--1-instance-00000092/.config \ -overcommit mem-lock=off \ -smp 8,sockets=1,dies=1,cores=8,threads=1 \ -object '{"qom-type":"memory-backend-memfd","id":"ram-node0","hugetlb":true,"hugetlbsize":2097152,"share":true,"prealloc":true,"size":15032385536,"host-nodes":[3],"policy":"preferred"}' \ --numa node,nodeid=0,cpus=0-7,memdev=ram-node0 \ +-numa node,nodeid=0,memdev=ram-node0 \ +-numa cpu,node-id=0,socket-id=0,core-id=0,thread-id=0 \ +-numa cpu,node-id=0,socket-id=0,core-id=1,thread-id=0 \ +-numa cpu,node-id=0,socket-id=0,core-id=2,thread-id=0 \ +-numa cpu,node-id=0,socket-id=0,core-id=3,thread-id=0 \ +-numa cpu,node-id=0,socket-id=0,core-id=4,thread-id=0 \ +-numa cpu,node-id=0,socket-id=0,core-id=5,thread-id=0 \ +-numa cpu,node-id=0,socket-id=0,core-id=6,thread-id=0 \ +-numa cpu,node-id=0,socket-id=0,core-id=7,thread-id=0 \ -object '{"qom-type":"memory-backend-file","id":"memnvdimm0","mem-path":"/tmp/nvdimm","share":true,"prealloc":true,"size":536870912,"host-nodes":[3],"policy":"preferred"}' \ -device nvdimm,node=0,memdev=memnvdimm0,id=nvdimm0,slot=0 \ -uuid 126f2720-6f8e-45ab-a886-ec9277079a67 \ diff --git a/tests/qemuxml2argvdata/memory-hotplug-nvdimm-access.x86_64-latest.args b/tests/qemuxml2argvdata/memory-hotplug-nvdimm-access.x86_64-latest.args index d124ba7f29..8ff665e658 100644 --- a/tests/qemuxml2argvdata/memory-hotplug-nvdimm-access.x86_64-latest.args +++ b/tests/qemuxml2argvdata/memory-hotplug-nvdimm-access.x86_64-latest.args @@ -16,7 +16,9 @@ XDG_CONFIG_HOME=/tmp/lib/domain--1-QEMUGuest1/.config \ -overcommit mem-lock=off \ -smp 2,sockets=2,dies=1,cores=1,threads=1 \ -object '{"qom-type":"memory-backend-ram","id":"ram-node0","size":224395264}' \ --numa node,nodeid=0,cpus=0-1,memdev=ram-node0 \ +-numa node,nodeid=0,memdev=ram-node0 \ +-numa cpu,node-id=0,socket-id=0,core-id=0,thread-id=0 \ +-numa cpu,node-id=0,socket-id=1,core-id=0,thread-id=0 \ -object '{"qom-type":"memory-backend-file","id":"memnvdimm0","mem-path":"/tmp/nvdimm","share":false,"prealloc":true,"size":536870912}' \ -device nvdimm,node=0,memdev=memnvdimm0,id=nvdimm0,slot=0 \ -uuid c7a5fdbd-edaf-9455-926a-d65c16db1809 \ diff --git a/tests/qemuxml2argvdata/memory-hotplug-nvdimm-align.x86_64-5.2.0.args b/tests/qemuxml2argvdata/memory-hotplug-nvdimm-align.x86_64-5.2.0.args index 0c1b9a6de6..a37467c68c 100644 --- a/tests/qemuxml2argvdata/memory-hotplug-nvdimm-align.x86_64-5.2.0.args +++ b/tests/qemuxml2argvdata/memory-hotplug-nvdimm-align.x86_64-5.2.0.args @@ -16,7 +16,9 @@ XDG_CONFIG_HOME=/tmp/lib/domain--1-QEMUGuest1/.config \ -overcommit mem-lock=off \ -smp 2,sockets=2,dies=1,cores=1,threads=1 \ -object memory-backend-ram,id=ram-node0,size=224395264 \ --numa node,nodeid=0,cpus=0-1,memdev=ram-node0 \ +-numa node,nodeid=0,memdev=ram-node0 \ +-numa cpu,node-id=0,socket-id=0,core-id=0,thread-id=0 \ +-numa cpu,node-id=0,socket-id=1,core-id=0,thread-id=0 \ -object memory-backend-file,id=memnvdimm0,mem-path=/tmp/nvdimm,share=off,prealloc=on,size=536870912,align=2097152 \ -device nvdimm,node=0,memdev=memnvdimm0,id=nvdimm0,slot=0 \ -uuid c7a5fdbd-edaf-9455-926a-d65c16db1809 \ diff --git a/tests/qemuxml2argvdata/memory-hotplug-nvdimm-align.x86_64-latest.args b/tests/qemuxml2argvdata/memory-hotplug-nvdimm-align.x86_64-latest.args index c3f5c8d558..52fabbb89e 100644 --- a/tests/qemuxml2argvdata/memory-hotplug-nvdimm-align.x86_64-latest.args +++ b/tests/qemuxml2argvdata/memory-hotplug-nvdimm-align.x86_64-latest.args @@ -16,7 +16,9 @@ XDG_CONFIG_HOME=/tmp/lib/domain--1-QEMUGuest1/.config \ -overcommit mem-lock=off \ -smp 2,sockets=2,dies=1,cores=1,threads=1 \ -object '{"qom-type":"memory-backend-ram","id":"ram-node0","size":224395264}' \ --numa node,nodeid=0,cpus=0-1,memdev=ram-node0 \ +-numa node,nodeid=0,memdev=ram-node0 \ +-numa cpu,node-id=0,socket-id=0,core-id=0,thread-id=0 \ +-numa cpu,node-id=0,socket-id=1,core-id=0,thread-id=0 \ -object '{"qom-type":"memory-backend-file","id":"memnvdimm0","mem-path":"/tmp/nvdimm","share":false,"prealloc":true,"size":536870912,"align":2097152}' \ -device nvdimm,node=0,memdev=memnvdimm0,id=nvdimm0,slot=0 \ -uuid c7a5fdbd-edaf-9455-926a-d65c16db1809 \ diff --git a/tests/qemuxml2argvdata/memory-hotplug-nvdimm-label.x86_64-5.2.0.args b/tests/qemuxml2argvdata/memory-hotplug-nvdimm-label.x86_64-5.2.0.args index d7c61b88a4..48eed90399 100644 --- a/tests/qemuxml2argvdata/memory-hotplug-nvdimm-label.x86_64-5.2.0.args +++ b/tests/qemuxml2argvdata/memory-hotplug-nvdimm-label.x86_64-5.2.0.args @@ -16,7 +16,9 @@ XDG_CONFIG_HOME=/tmp/lib/domain--1-QEMUGuest1/.config \ -overcommit mem-lock=off \ -smp 2,sockets=2,dies=1,cores=1,threads=1 \ -object memory-backend-ram,id=ram-node0,size=224395264 \ --numa node,nodeid=0,cpus=0-1,memdev=ram-node0 \ +-numa node,nodeid=0,memdev=ram-node0 \ +-numa cpu,node-id=0,socket-id=0,core-id=0,thread-id=0 \ +-numa cpu,node-id=0,socket-id=1,core-id=0,thread-id=0 \ -object memory-backend-file,id=memnvdimm0,mem-path=/tmp/nvdimm,share=off,prealloc=on,size=536870912 \ -device nvdimm,node=0,label-size=131072,memdev=memnvdimm0,id=nvdimm0,slot=0 \ -uuid c7a5fdbd-edaf-9455-926a-d65c16db1809 \ diff --git a/tests/qemuxml2argvdata/memory-hotplug-nvdimm-label.x86_64-latest.args b/tests/qemuxml2argvdata/memory-hotplug-nvdimm-label.x86_64-latest.args index 51ff3dc455..09d6a91b57 100644 --- a/tests/qemuxml2argvdata/memory-hotplug-nvdimm-label.x86_64-latest.args +++ b/tests/qemuxml2argvdata/memory-hotplug-nvdimm-label.x86_64-latest.args @@ -16,7 +16,9 @@ XDG_CONFIG_HOME=/tmp/lib/domain--1-QEMUGuest1/.config \ -overcommit mem-lock=off \ -smp 2,sockets=2,dies=1,cores=1,threads=1 \ -object '{"qom-type":"memory-backend-ram","id":"ram-node0","size":224395264}' \ --numa node,nodeid=0,cpus=0-1,memdev=ram-node0 \ +-numa node,nodeid=0,memdev=ram-node0 \ +-numa cpu,node-id=0,socket-id=0,core-id=0,thread-id=0 \ +-numa cpu,node-id=0,socket-id=1,core-id=0,thread-id=0 \ -object '{"qom-type":"memory-backend-file","id":"memnvdimm0","mem-path":"/tmp/nvdimm","share":false,"prealloc":true,"size":536870912}' \ -device nvdimm,node=0,label-size=131072,memdev=memnvdimm0,id=nvdimm0,slot=0 \ -uuid c7a5fdbd-edaf-9455-926a-d65c16db1809 \ diff --git a/tests/qemuxml2argvdata/memory-hotplug-nvdimm-pmem.x86_64-5.2.0.args b/tests/qemuxml2argvdata/memory-hotplug-nvdimm-pmem.x86_64-5.2.0.args index 42fb1c9acb..a023974513 100644 --- a/tests/qemuxml2argvdata/memory-hotplug-nvdimm-pmem.x86_64-5.2.0.args +++ b/tests/qemuxml2argvdata/memory-hotplug-nvdimm-pmem.x86_64-5.2.0.args @@ -16,7 +16,9 @@ XDG_CONFIG_HOME=/tmp/lib/domain--1-QEMUGuest1/.config \ -overcommit mem-lock=off \ -smp 2,sockets=2,dies=1,cores=1,threads=1 \ -object memory-backend-ram,id=ram-node0,size=224395264 \ --numa node,nodeid=0,cpus=0-1,memdev=ram-node0 \ +-numa node,nodeid=0,memdev=ram-node0 \ +-numa cpu,node-id=0,socket-id=0,core-id=0,thread-id=0 \ +-numa cpu,node-id=0,socket-id=1,core-id=0,thread-id=0 \ -object memory-backend-file,id=memnvdimm0,mem-path=/tmp/nvdimm,share=off,size=536870912,pmem=on \ -device nvdimm,node=0,memdev=memnvdimm0,id=nvdimm0,slot=0 \ -uuid c7a5fdbd-edaf-9455-926a-d65c16db1809 \ diff --git a/tests/qemuxml2argvdata/memory-hotplug-nvdimm-pmem.x86_64-latest.args b/tests/qemuxml2argvdata/memory-hotplug-nvdimm-pmem.x86_64-latest.args index 19ecd08ae5..06d3bdcfe0 100644 --- a/tests/qemuxml2argvdata/memory-hotplug-nvdimm-pmem.x86_64-latest.args +++ b/tests/qemuxml2argvdata/memory-hotplug-nvdimm-pmem.x86_64-latest.args @@ -16,7 +16,9 @@ XDG_CONFIG_HOME=/tmp/lib/domain--1-QEMUGuest1/.config \ -overcommit mem-lock=off \ -smp 2,sockets=2,dies=1,cores=1,threads=1 \ -object '{"qom-type":"memory-backend-ram","id":"ram-node0","size":224395264}' \ --numa node,nodeid=0,cpus=0-1,memdev=ram-node0 \ +-numa node,nodeid=0,memdev=ram-node0 \ +-numa cpu,node-id=0,socket-id=0,core-id=0,thread-id=0 \ +-numa cpu,node-id=0,socket-id=1,core-id=0,thread-id=0 \ -object '{"qom-type":"memory-backend-file","id":"memnvdimm0","mem-path":"/tmp/nvdimm","share":false,"size":536870912,"pmem":true}' \ -device nvdimm,node=0,memdev=memnvdimm0,id=nvdimm0,slot=0 \ -uuid c7a5fdbd-edaf-9455-926a-d65c16db1809 \ diff --git a/tests/qemuxml2argvdata/memory-hotplug-nvdimm-readonly.x86_64-5.2.0.args b/tests/qemuxml2argvdata/memory-hotplug-nvdimm-readonly.x86_64-5.2.0.args index 15e1ee8b6c..b8480e1252 100644 --- a/tests/qemuxml2argvdata/memory-hotplug-nvdimm-readonly.x86_64-5.2.0.args +++ b/tests/qemuxml2argvdata/memory-hotplug-nvdimm-readonly.x86_64-5.2.0.args @@ -16,7 +16,9 @@ XDG_CONFIG_HOME=/tmp/lib/domain--1-QEMUGuest1/.config \ -overcommit mem-lock=off \ -smp 2,sockets=2,dies=1,cores=1,threads=1 \ -object memory-backend-ram,id=ram-node0,size=224395264 \ --numa node,nodeid=0,cpus=0-1,memdev=ram-node0 \ +-numa node,nodeid=0,memdev=ram-node0 \ +-numa cpu,node-id=0,socket-id=0,core-id=0,thread-id=0 \ +-numa cpu,node-id=0,socket-id=1,core-id=0,thread-id=0 \ -object memory-backend-file,id=memnvdimm0,mem-path=/tmp/nvdimm,share=off,prealloc=on,size=536870912 \ -device nvdimm,node=0,unarmed=on,memdev=memnvdimm0,id=nvdimm0,slot=0 \ -uuid c7a5fdbd-edaf-9455-926a-d65c16db1809 \ diff --git a/tests/qemuxml2argvdata/memory-hotplug-nvdimm-readonly.x86_64-latest.args b/tests/qemuxml2argvdata/memory-hotplug-nvdimm-readonly.x86_64-latest.args index 6e9cf15a14..b82689f828 100644 --- a/tests/qemuxml2argvdata/memory-hotplug-nvdimm-readonly.x86_64-latest.args +++ b/tests/qemuxml2argvdata/memory-hotplug-nvdimm-readonly.x86_64-latest.args @@ -16,7 +16,9 @@ XDG_CONFIG_HOME=/tmp/lib/domain--1-QEMUGuest1/.config \ -overcommit mem-lock=off \ -smp 2,sockets=2,dies=1,cores=1,threads=1 \ -object '{"qom-type":"memory-backend-ram","id":"ram-node0","size":224395264}' \ --numa node,nodeid=0,cpus=0-1,memdev=ram-node0 \ +-numa node,nodeid=0,memdev=ram-node0 \ +-numa cpu,node-id=0,socket-id=0,core-id=0,thread-id=0 \ +-numa cpu,node-id=0,socket-id=1,core-id=0,thread-id=0 \ -object '{"qom-type":"memory-backend-file","id":"memnvdimm0","mem-path":"/tmp/nvdimm","share":false,"prealloc":true,"size":536870912}' \ -device nvdimm,node=0,unarmed=on,memdev=memnvdimm0,id=nvdimm0,slot=0 \ -uuid c7a5fdbd-edaf-9455-926a-d65c16db1809 \ diff --git a/tests/qemuxml2argvdata/memory-hotplug-nvdimm.x86_64-latest.args b/tests/qemuxml2argvdata/memory-hotplug-nvdimm.x86_64-latest.args index 789f4fa11f..a05dcbc66b 100644 --- a/tests/qemuxml2argvdata/memory-hotplug-nvdimm.x86_64-latest.args +++ b/tests/qemuxml2argvdata/memory-hotplug-nvdimm.x86_64-latest.args @@ -16,7 +16,9 @@ XDG_CONFIG_HOME=/tmp/lib/domain--1-QEMUGuest1/.config \ -overcommit mem-lock=off \ -smp 2,sockets=2,dies=1,cores=1,threads=1 \ -object '{"qom-type":"memory-backend-ram","id":"ram-node0","size":1073741824}' \ --numa node,nodeid=0,cpus=0-1,memdev=ram-node0 \ +-numa node,nodeid=0,memdev=ram-node0 \ +-numa cpu,node-id=0,socket-id=0,core-id=0,thread-id=0 \ +-numa cpu,node-id=0,socket-id=1,core-id=0,thread-id=0 \ -object '{"qom-type":"memory-backend-file","id":"memnvdimm0","mem-path":"/tmp/nvdimm","prealloc":true,"size":536870912}' \ -device nvdimm,node=0,memdev=memnvdimm0,id=nvdimm0,slot=0 \ -uuid c7a5fdbd-edaf-9455-926a-d65c16db1809 \ diff --git a/tests/qemuxml2argvdata/memory-hotplug-virtio-pmem.x86_64-5.2.0.args b/tests/qemuxml2argvdata/memory-hotplug-virtio-pmem.x86_64-5.2.0.args index 969e2dbd7d..9f0b7a4007 100644 --- a/tests/qemuxml2argvdata/memory-hotplug-virtio-pmem.x86_64-5.2.0.args +++ b/tests/qemuxml2argvdata/memory-hotplug-virtio-pmem.x86_64-5.2.0.args @@ -16,7 +16,9 @@ XDG_CONFIG_HOME=/tmp/lib/domain--1-QEMUGuest1/.config \ -overcommit mem-lock=off \ -smp 2,sockets=2,dies=1,cores=1,threads=1 \ -object memory-backend-ram,id=ram-node0,size=2145386496 \ --numa node,nodeid=0,cpus=0-1,memdev=ram-node0 \ +-numa node,nodeid=0,memdev=ram-node0 \ +-numa cpu,node-id=0,socket-id=0,core-id=0,thread-id=0 \ +-numa cpu,node-id=0,socket-id=1,core-id=0,thread-id=0 \ -object memory-backend-file,id=memvirtiopmem0,mem-path=/tmp/virtio_pmem,share=on,size=536870912 \ -device virtio-pmem-pci,memdev=memvirtiopmem0,id=virtiopmem0,bus=pci.0,addr=0x5 \ -uuid c7a5fdbd-edaf-9455-926a-d65c16db1809 \ diff --git a/tests/qemuxml2argvdata/memory-hotplug-virtio-pmem.x86_64-latest.args b/tests/qemuxml2argvdata/memory-hotplug-virtio-pmem.x86_64-latest.args index 4b1c17378b..ed785fb9d1 100644 --- a/tests/qemuxml2argvdata/memory-hotplug-virtio-pmem.x86_64-latest.args +++ b/tests/qemuxml2argvdata/memory-hotplug-virtio-pmem.x86_64-latest.args @@ -16,7 +16,9 @@ XDG_CONFIG_HOME=/tmp/lib/domain--1-QEMUGuest1/.config \ -overcommit mem-lock=off \ -smp 2,sockets=2,dies=1,cores=1,threads=1 \ -object '{"qom-type":"memory-backend-ram","id":"ram-node0","size":2145386496}' \ --numa node,nodeid=0,cpus=0-1,memdev=ram-node0 \ +-numa node,nodeid=0,memdev=ram-node0 \ +-numa cpu,node-id=0,socket-id=0,core-id=0,thread-id=0 \ +-numa cpu,node-id=0,socket-id=1,core-id=0,thread-id=0 \ -object '{"qom-type":"memory-backend-file","id":"memvirtiopmem0","mem-path":"/tmp/virtio_pmem","share":true,"size":536870912}' \ -device virtio-pmem-pci,memdev=memvirtiopmem0,id=virtiopmem0,bus=pci.0,addr=0x5 \ -uuid c7a5fdbd-edaf-9455-926a-d65c16db1809 \ diff --git a/tests/qemuxml2argvdata/numatune-hmat.x86_64-latest.args b/tests/qemuxml2argvdata/numatune-hmat.x86_64-latest.args index 54ab91b09c..9c2d210963 100644 --- a/tests/qemuxml2argvdata/numatune-hmat.x86_64-latest.args +++ b/tests/qemuxml2argvdata/numatune-hmat.x86_64-latest.args @@ -16,11 +16,23 @@ XDG_CONFIG_HOME=/tmp/lib/domain--1-QEMUGuest/.config \ -overcommit mem-lock=off \ -smp 12,sockets=12,cores=1,threads=1 \ -object '{"qom-type":"memory-backend-ram","id":"ram-node0","size":2147483648}' \ --numa node,nodeid=0,cpus=0-3,initiator=0,memdev=ram-node0 \ +-numa node,nodeid=0,initiator=0,memdev=ram-node0 \ +-numa cpu,node-id=0,socket-id=0,core-id=0,thread-id=0 \ +-numa cpu,node-id=0,socket-id=1,core-id=0,thread-id=0 \ +-numa cpu,node-id=0,socket-id=2,core-id=0,thread-id=0 \ +-numa cpu,node-id=0,socket-id=3,core-id=0,thread-id=0 \ -object '{"qom-type":"memory-backend-ram","id":"ram-node1","size":2147483648}' \ --numa node,nodeid=1,cpus=4-7,initiator=1,memdev=ram-node1 \ +-numa node,nodeid=1,initiator=1,memdev=ram-node1 \ +-numa cpu,node-id=1,socket-id=4,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=5,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=6,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=7,core-id=0,thread-id=0 \ -object '{"qom-type":"memory-backend-ram","id":"ram-node2","size":2147483648}' \ --numa node,nodeid=2,cpus=8-11,initiator=2,memdev=ram-node2 \ +-numa node,nodeid=2,initiator=2,memdev=ram-node2 \ +-numa cpu,node-id=2,socket-id=8,core-id=0,thread-id=0 \ +-numa cpu,node-id=2,socket-id=9,core-id=0,thread-id=0 \ +-numa cpu,node-id=2,socket-id=10,core-id=0,thread-id=0 \ +-numa cpu,node-id=2,socket-id=11,core-id=0,thread-id=0 \ -object '{"qom-type":"memory-backend-ram","id":"ram-node3","size":2147483648}' \ -numa node,nodeid=3,initiator=0,memdev=ram-node3 \ -object '{"qom-type":"memory-backend-ram","id":"ram-node4","size":2147483648}' \ diff --git a/tests/qemuxml2argvdata/numatune-memnode-restrictive-mode.x86_64-latest.args b/tests/qemuxml2argvdata/numatune-memnode-restrictive-mode.x86_64-latest.args index 6d5e2eb76e..d58e6c05ed 100644 --- a/tests/qemuxml2argvdata/numatune-memnode-restrictive-mode.x86_64-latest.args +++ b/tests/qemuxml2argvdata/numatune-memnode-restrictive-mode.x86_64-latest.args @@ -16,11 +16,43 @@ XDG_CONFIG_HOME=/tmp/lib/domain--1-QEMUGuest/.config \ -overcommit mem-lock=off \ -smp 32,sockets=32,cores=1,threads=1 \ -object '{"qom-type":"memory-backend-ram","id":"ram-node0","size":20971520}' \ --numa node,nodeid=0,cpus=0,memdev=ram-node0 \ +-numa node,nodeid=0,memdev=ram-node0 \ +-numa cpu,node-id=0,socket-id=0,core-id=0,thread-id=0 \ -object '{"qom-type":"memory-backend-ram","id":"ram-node1","size":676331520}' \ --numa node,nodeid=1,cpus=1-27,cpus=29,memdev=ram-node1 \ +-numa node,nodeid=1,memdev=ram-node1 \ +-numa cpu,node-id=1,socket-id=1,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=2,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=3,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=4,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=5,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=6,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=7,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=8,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=9,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=10,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=11,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=12,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=13,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=14,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=15,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=16,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=17,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=18,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=19,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=20,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=21,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=22,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=23,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=24,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=25,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=26,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=27,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=29,core-id=0,thread-id=0 \ -object '{"qom-type":"memory-backend-ram","id":"ram-node2","size":24578621440}' \ --numa node,nodeid=2,cpus=28,cpus=30-31,memdev=ram-node2 \ +-numa node,nodeid=2,memdev=ram-node2 \ +-numa cpu,node-id=2,socket-id=28,core-id=0,thread-id=0 \ +-numa cpu,node-id=2,socket-id=30,core-id=0,thread-id=0 \ +-numa cpu,node-id=2,socket-id=31,core-id=0,thread-id=0 \ -uuid 9f4b6512-e73a-4a25-93e8-5307802821ce \ -display none \ -no-user-config \ diff --git a/tests/qemuxml2argvdata/numatune-memnode.x86_64-5.2.0.args b/tests/qemuxml2argvdata/numatune-memnode.x86_64-5.2.0.args index 23bab6fca4..92818c8eee 100644 --- a/tests/qemuxml2argvdata/numatune-memnode.x86_64-5.2.0.args +++ b/tests/qemuxml2argvdata/numatune-memnode.x86_64-5.2.0.args @@ -16,11 +16,43 @@ XDG_CONFIG_HOME=/tmp/lib/domain--1-QEMUGuest/.config \ -overcommit mem-lock=off \ -smp 32,sockets=32,cores=1,threads=1 \ -object memory-backend-ram,id=ram-node0,size=20971520,host-nodes=3,policy=preferred \ --numa node,nodeid=0,cpus=0,memdev=ram-node0 \ +-numa node,nodeid=0,memdev=ram-node0 \ +-numa cpu,node-id=0,socket-id=0,core-id=0,thread-id=0 \ -object memory-backend-ram,id=ram-node1,size=676331520,host-nodes=0-7,policy=bind \ --numa node,nodeid=1,cpus=1-27,cpus=29,memdev=ram-node1 \ +-numa node,nodeid=1,memdev=ram-node1 \ +-numa cpu,node-id=1,socket-id=1,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=2,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=3,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=4,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=5,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=6,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=7,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=8,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=9,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=10,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=11,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=12,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=13,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=14,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=15,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=16,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=17,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=18,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=19,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=20,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=21,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=22,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=23,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=24,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=25,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=26,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=27,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=29,core-id=0,thread-id=0 \ -object memory-backend-ram,id=ram-node2,size=24578621440,host-nodes=1-2,host-nodes=5,host-nodes=7,policy=bind \ --numa node,nodeid=2,cpus=28,cpus=30-31,memdev=ram-node2 \ +-numa node,nodeid=2,memdev=ram-node2 \ +-numa cpu,node-id=2,socket-id=28,core-id=0,thread-id=0 \ +-numa cpu,node-id=2,socket-id=30,core-id=0,thread-id=0 \ +-numa cpu,node-id=2,socket-id=31,core-id=0,thread-id=0 \ -uuid 9f4b6512-e73a-4a25-93e8-5307802821ce \ -display none \ -no-user-config \ diff --git a/tests/qemuxml2argvdata/numatune-memnode.x86_64-latest.args b/tests/qemuxml2argvdata/numatune-memnode.x86_64-latest.args index fa639f48fe..d32e4e3164 100644 --- a/tests/qemuxml2argvdata/numatune-memnode.x86_64-latest.args +++ b/tests/qemuxml2argvdata/numatune-memnode.x86_64-latest.args @@ -16,11 +16,43 @@ XDG_CONFIG_HOME=/tmp/lib/domain--1-QEMUGuest/.config \ -overcommit mem-lock=off \ -smp 32,sockets=32,cores=1,threads=1 \ -object '{"qom-type":"memory-backend-ram","id":"ram-node0","size":20971520,"host-nodes":[3],"policy":"preferred"}' \ --numa node,nodeid=0,cpus=0,memdev=ram-node0 \ +-numa node,nodeid=0,memdev=ram-node0 \ +-numa cpu,node-id=0,socket-id=0,core-id=0,thread-id=0 \ -object '{"qom-type":"memory-backend-ram","id":"ram-node1","size":676331520,"host-nodes":[0,1,2,3,4,5,6,7],"policy":"bind"}' \ --numa node,nodeid=1,cpus=1-27,cpus=29,memdev=ram-node1 \ +-numa node,nodeid=1,memdev=ram-node1 \ +-numa cpu,node-id=1,socket-id=1,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=2,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=3,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=4,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=5,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=6,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=7,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=8,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=9,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=10,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=11,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=12,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=13,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=14,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=15,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=16,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=17,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=18,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=19,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=20,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=21,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=22,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=23,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=24,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=25,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=26,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=27,core-id=0,thread-id=0 \ +-numa cpu,node-id=1,socket-id=29,core-id=0,thread-id=0 \ -object '{"qom-type":"memory-backend-ram","id":"ram-node2","size":24578621440,"host-nodes":[1,2,5,7],"policy":"bind"}' \ --numa node,nodeid=2,cpus=28,cpus=30-31,memdev=ram-node2 \ +-numa node,nodeid=2,memdev=ram-node2 \ +-numa cpu,node-id=2,socket-id=28,core-id=0,thread-id=0 \ +-numa cpu,node-id=2,socket-id=30,core-id=0,thread-id=0 \ +-numa cpu,node-id=2,socket-id=31,core-id=0,thread-id=0 \ -uuid 9f4b6512-e73a-4a25-93e8-5307802821ce \ -display none \ -no-user-config \ diff --git a/tests/qemuxml2argvdata/vhost-user-fs-fd-memory.x86_64-latest.args b/tests/qemuxml2argvdata/vhost-user-fs-fd-memory.x86_64-latest.args index 6311f8f65e..68b0fcaae9 100644 --- a/tests/qemuxml2argvdata/vhost-user-fs-fd-memory.x86_64-latest.args +++ b/tests/qemuxml2argvdata/vhost-user-fs-fd-memory.x86_64-latest.args @@ -16,7 +16,9 @@ XDG_CONFIG_HOME=/tmp/lib/domain--1-guest/.config \ -overcommit mem-lock=off \ -smp 2,sockets=2,cores=1,threads=1 \ -object '{"qom-type":"memory-backend-file","id":"ram-node0","mem-path":"/var/lib/libvirt/qemu/ram/-1-guest/ram-node0","share":true,"size":15032385536}' \ --numa node,nodeid=0,cpus=0-1,memdev=ram-node0 \ +-numa node,nodeid=0,memdev=ram-node0 \ +-numa cpu,node-id=0,socket-id=0,core-id=0,thread-id=0 \ +-numa cpu,node-id=0,socket-id=1,core-id=0,thread-id=0 \ -uuid 126f2720-6f8e-45ab-a886-ec9277079a67 \ -display none \ -no-user-config \ diff --git a/tests/qemuxml2argvdata/vhost-user-fs-hugepages.x86_64-latest.args b/tests/qemuxml2argvdata/vhost-user-fs-hugepages.x86_64-latest.args index 58570592eb..f127112452 100644 --- a/tests/qemuxml2argvdata/vhost-user-fs-hugepages.x86_64-latest.args +++ b/tests/qemuxml2argvdata/vhost-user-fs-hugepages.x86_64-latest.args @@ -16,7 +16,9 @@ XDG_CONFIG_HOME=/tmp/lib/domain--1-guest/.config \ -overcommit mem-lock=off \ -smp 2,sockets=2,cores=1,threads=1 \ -object '{"qom-type":"memory-backend-file","id":"ram-node0","mem-path":"/dev/hugepages2M/libvirt/qemu/-1-guest","share":true,"prealloc":true,"size":2147483648}' \ --numa node,nodeid=0,cpus=0-1,memdev=ram-node0 \ +-numa node,nodeid=0,memdev=ram-node0 \ +-numa cpu,node-id=0,socket-id=0,core-id=0,thread-id=0 \ +-numa cpu,node-id=0,socket-id=1,core-id=0,thread-id=0 \ -uuid 1ccfd97d-5eb4-478a-bbe6-88d254c16db7 \ -display none \ -no-user-config \ diff --git a/tests/qemuxml2argvdata/vhost-user-gpu-secondary.x86_64-latest.args b/tests/qemuxml2argvdata/vhost-user-gpu-secondary.x86_64-latest.args index 6df0365e76..87d0f430c2 100644 --- a/tests/qemuxml2argvdata/vhost-user-gpu-secondary.x86_64-latest.args +++ b/tests/qemuxml2argvdata/vhost-user-gpu-secondary.x86_64-latest.args @@ -16,7 +16,8 @@ XDG_CONFIG_HOME=/tmp/lib/domain--1-QEMUGuest1/.config \ -overcommit mem-lock=off \ -smp 1,sockets=1,cores=1,threads=1 \ -object '{"qom-type":"memory-backend-memfd","id":"ram-node0","share":true,"size":224395264}' \ --numa node,nodeid=0,cpus=0,memdev=ram-node0 \ +-numa node,nodeid=0,memdev=ram-node0 \ +-numa cpu,node-id=0,socket-id=0,core-id=0,thread-id=0 \ -uuid c7a5fdbd-edaf-9455-926a-d65c16db1809 \ -display none \ -no-user-config \ diff --git a/tests/qemuxml2argvdata/vhost-user-vga.x86_64-latest.args b/tests/qemuxml2argvdata/vhost-user-vga.x86_64-latest.args index 0bdd98d3b6..8c463496a1 100644 --- a/tests/qemuxml2argvdata/vhost-user-vga.x86_64-latest.args +++ b/tests/qemuxml2argvdata/vhost-user-vga.x86_64-latest.args @@ -16,7 +16,8 @@ XDG_CONFIG_HOME=/tmp/lib/domain--1-QEMUGuest1/.config \ -overcommit mem-lock=off \ -smp 1,sockets=1,cores=1,threads=1 \ -object '{"qom-type":"memory-backend-memfd","id":"ram-node0","share":true,"size":224395264}' \ --numa node,nodeid=0,cpus=0,memdev=ram-node0 \ +-numa node,nodeid=0,memdev=ram-node0 \ +-numa cpu,node-id=0,socket-id=0,core-id=0,thread-id=0 \ -uuid c7a5fdbd-edaf-9455-926a-d65c16db1809 \ -display none \ -no-user-config \ -- 2.32.0

On Tue, Sep 21, 2021 at 16:50:31 +0200, Michal Privoznik wrote:
QEMU is trying to obsolete -numa node,cpus= because that uses ambiguous vCPU id to [socket, die, core, thread] mapping. The new form is:
-numa cpu,node-id=N,socket-id=S,die-id=D,core-id=C,thread-id=T
which is repeated for every vCPU and places it at [S, D, C, T] into guest NUMA node N.
While in general this is magic mapping, we can deal with it. Firstly, with QEMU 2.7 or newer, libvirt ensures that if topology is given then maxvcpus must be sockets * dies * cores * threads (i.e. there are no 'holes'). Secondly, if no topology is given then libvirt itself places each vCPU into a different socket (basically, it fakes topology of: [maxvcpus, 1, 1, 1]) Thirdly, we can copy whatever QEMU is doing when mapping vCPUs onto topology, to make sure vCPUs don't start to move around.
There's a problem with this premise though and unfortunately we don't seem to have qemuxml2argvtest for it. On PPC64, in certain situations the CPU can be configured such that threads are visible only to VMs. This has substantial impact on how CPUs are configured using the modern parameters (until now used only for cpu hotplug purposes, and that's the reason vCPU hotplug has such complicated incantations when starting the VM). In the above situation a CPU with topology of: sockets=1, cores=4, threads=8 (thus 32 cpus) will only expose 4 CPU "devices". core-id: 0, core-id: 8, core-id: 16 and core-id: 24 yet the guest will correctly see 32 cpus when used as such. You can see this in: tests/qemuhotplugtestcpus/ppc64-modern-individual-monitor.json Also note that the 'props' object does _not_ have any socket-id, and management apps are supposed to pass in 'props' as is. (There's a bunch of code to do that on hotplug). The problem is that you need to query the topology first (unless we want to duplicate all of qemu code that has to do with topology state and keep up with changes to it) to know how it's behaving on current machine. This historically was not possible. The supposed solution for this was the pre-config state where we'd be able to query and set it up via QMP, but I was not keeping up sufficiently with that work, so I don't know if it's possible. If preconfig is a viable option we IMO should start using it sooner rather than later and avoid duplicating qemu's logic here.
Note, migration from old to new cmd line works and therefore doesn't need any special handling.
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1678085 Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 112 +++++++++++++++++- .../hugepages-nvdimm.x86_64-latest.args | 4 +- ...memory-default-hugepage.x86_64-latest.args | 10 +- .../memfd-memory-numa.x86_64-latest.args | 10 +- ...y-hotplug-nvdimm-access.x86_64-latest.args | 4 +- ...ory-hotplug-nvdimm-align.x86_64-5.2.0.args | 4 +- ...ry-hotplug-nvdimm-align.x86_64-latest.args | 4 +- ...ory-hotplug-nvdimm-label.x86_64-5.2.0.args | 4 +- ...ry-hotplug-nvdimm-label.x86_64-latest.args | 4 +- ...mory-hotplug-nvdimm-pmem.x86_64-5.2.0.args | 4 +- ...ory-hotplug-nvdimm-pmem.x86_64-latest.args | 4 +- ...-hotplug-nvdimm-readonly.x86_64-5.2.0.args | 4 +- ...hotplug-nvdimm-readonly.x86_64-latest.args | 4 +- .../memory-hotplug-nvdimm.x86_64-latest.args | 4 +- ...mory-hotplug-virtio-pmem.x86_64-5.2.0.args | 4 +- ...ory-hotplug-virtio-pmem.x86_64-latest.args | 4 +- .../numatune-hmat.x86_64-latest.args | 18 ++- ...emnode-restrictive-mode.x86_64-latest.args | 38 +++++- .../numatune-memnode.x86_64-5.2.0.args | 38 +++++- .../numatune-memnode.x86_64-latest.args | 38 +++++- ...vhost-user-fs-fd-memory.x86_64-latest.args | 4 +- ...vhost-user-fs-hugepages.x86_64-latest.args | 4 +- ...host-user-gpu-secondary.x86_64-latest.args | 3 +- .../vhost-user-vga.x86_64-latest.args | 3 +- 24 files changed, 296 insertions(+), 34 deletions(-)
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index f04ae1e311..5192bd7630 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c
[...]
@@ -7432,6 +7432,94 @@ qemuBuildNumaCPUs(virBuffer *buf, }
+/** + * qemuTranlsatevCPUID: + * + * For given vCPU @id and vCPU topology (@cpu) compute corresponding + * @socket, @die, @core and @thread). This assumes linear topology, + * that is every [socket, die, core, thread] combination is valid vCPU + * ID and there are no 'holes'. This is ensured by + * qemuValidateDomainDef() if QEMU_CAPS_QUERY_HOTPLUGGABLE_CPUS is + * set.
As noted above, this assumption does not hold on PPC64. There are indeed "holes" in certain cases, while filled with cpus you are e.g. unable to spread them across multiple numa nodes. In fact allowing to have two sibling threads to be spread across multiple NUMA nodes is also nonsensical configuration, which we allowed unfortunately.
+ * Moreover, if @diesSupported is false (QEMU lacks + * QEMU_CAPS_SMP_DIES) then @die is set to zero and @socket is + * computed without taking number of dies into account. + * + * The algorithm is shamelessly copied over from QEMU's + * x86_topo_ids_from_idx() and its history (before introducing dies). + */ +static void +qemuTranlsatevCPUID(unsigned int id, + bool diesSupported, + virCPUDef *cpu, + unsigned int *socket, + unsigned int *die, + unsigned int *core, + unsigned int *thread) +{ + if (cpu && cpu->sockets) { + *thread = id % cpu->threads; + *core = id / cpu->threads % cpu->cores; + if (diesSupported) { + *die = id / (cpu->cores * cpu->threads) % cpu->dies; + *socket = id / (cpu->dies * cpu->cores * cpu->threads); + } else { + *die = 0; + *socket = id / (cpu->cores * cpu->threads) % cpu->sockets; + } + } else { + /* If no topology was provided, then qemuBuildSmpCommandLine() + * puts all vCPUs into a separate socket. */ + *thread = 0; + *core = 0; + *die = 0; + *socket = id; + } +} + + +static void +qemuBuildNumaNewCPUs(virCommand *cmd, + virCPUDef *cpu, + virBitmap *cpumask, + size_t nodeid, + virQEMUCaps *qemuCaps) +{ + const bool diesSupported = virQEMUCapsGet(qemuCaps, QEMU_CAPS_SMP_DIES); + ssize_t vcpuid = -1; + + if (!cpumask) + return; + + while ((vcpuid = virBitmapNextSetBit(cpumask, vcpuid)) >= 0) { + unsigned int socket; + unsigned int die; + unsigned int core; + unsigned int thread; + + qemuTranlsatevCPUID(vcpuid, diesSupported, cpu, + &socket, &die, &core, &thread); + + virCommandAddArg(cmd, "-numa"); + + /* The simple fact that dies are supported by QEMU doesn't mean we can + * put it onto command line. QEMU will accept die-id only if -smp dies + * was set to a value greater than 1. On the other hand, this allows us + * to generate shorter command line. */ + if (diesSupported && cpu && cpu->dies > 1) { + virCommandAddArgFormat(cmd, + "cpu,node-id=%zu,socket-id=%u,die-id=%u,core-id=%u,thread-id=%u", + nodeid, socket, die, core, thread); + } else { + virCommandAddArgFormat(cmd, + "cpu,node-id=%zu,socket-id=%u,core-id=%u,thread-id=%u", + nodeid, socket, core, thread); + } + } +} + + static int qemuBuildNumaCommandLine(virQEMUDriverConfig *cfg, virDomainDef *def,
[...]
@@ -7484,6 +7573,17 @@ qemuBuildNumaCommandLine(virQEMUDriverConfig *cfg, qemuBuildMemPathStr(def, cmd, priv) < 0) goto cleanup;
+ /* Use modern style of specifying vCPU topology only if: + * -numa cpu is available, introduced in the same time as -numa + * dist, hence slightly misleading capability test, and + * query-hotpluggable-cpus is avialable, because then + * qemuValidateDomainDef() ensures that if + * topology is specified it matches max vCPU + * count and we can make some shortcuts in + * qemuTranlsatevCPUID(). + */ + newCpus = virQEMUCapsGet(qemuCaps, QEMU_CAPS_QUERY_HOTPLUGGABLE_CPUS); + for (i = 0; i < ncells; i++) { if (virDomainNumaGetNodeCpumask(def->numa, i)) { masterInitiator = i;
[...]
diff --git a/tests/qemuxml2argvdata/hugepages-nvdimm.x86_64-latest.args b/tests/qemuxml2argvdata/hugepages-nvdimm.x86_64-latest.args index 9f3c6fa63f..8af4b44758 100644 --- a/tests/qemuxml2argvdata/hugepages-nvdimm.x86_64-latest.args +++ b/tests/qemuxml2argvdata/hugepages-nvdimm.x86_64-latest.args
[...]
diff --git a/tests/qemuxml2argvdata/vhost-user-vga.x86_64-latest.args b/tests/qemuxml2argvdata/vhost-user-vga.x86_64-latest.args index 0bdd98d3b6..8c463496a1 100644 --- a/tests/qemuxml2argvdata/vhost-user-vga.x86_64-latest.args +++ b/tests/qemuxml2argvdata/vhost-user-vga.x86_64-latest.args @@ -16,7 +16,8 @@ XDG_CONFIG_HOME=/tmp/lib/domain--1-QEMUGuest1/.config \ -overcommit mem-lock=off \ -smp 1,sockets=1,cores=1,threads=1 \ -object '{"qom-type":"memory-backend-memfd","id":"ram-node0","share":true,"size":224395264}' \ --numa node,nodeid=0,cpus=0,memdev=ram-node0 \ +-numa node,nodeid=0,memdev=ram-node0 \ +-numa cpu,node-id=0,socket-id=0,core-id=0,thread-id=0 \ -uuid c7a5fdbd-edaf-9455-926a-d65c16db1809 \ -display none \ -no-user-config \
None of the impacted tests have 'threads' set to anything else than 1 so we are not getting any 'thread-id' coverage. Please add some before this commit. Also as noted, we'll need some PPC64 tests that are impacted.

On Thu, 30 Sep 2021 14:08:34 +0200 Peter Krempa <pkrempa@redhat.com> wrote:
On Tue, Sep 21, 2021 at 16:50:31 +0200, Michal Privoznik wrote:
QEMU is trying to obsolete -numa node,cpus= because that uses ambiguous vCPU id to [socket, die, core, thread] mapping. The new form is:
-numa cpu,node-id=N,socket-id=S,die-id=D,core-id=C,thread-id=T
which is repeated for every vCPU and places it at [S, D, C, T] into guest NUMA node N.
While in general this is magic mapping, we can deal with it. Firstly, with QEMU 2.7 or newer, libvirt ensures that if topology is given then maxvcpus must be sockets * dies * cores * threads (i.e. there are no 'holes'). Secondly, if no topology is given then libvirt itself places each vCPU into a different socket (basically, it fakes topology of: [maxvcpus, 1, 1, 1]) Thirdly, we can copy whatever QEMU is doing when mapping vCPUs onto topology, to make sure vCPUs don't start to move around.
There's a problem with this premise though and unfortunately we don't seem to have qemuxml2argvtest for it.
On PPC64, in certain situations the CPU can be configured such that threads are visible only to VMs. This has substantial impact on how CPUs are configured using the modern parameters (until now used only for cpu hotplug purposes, and that's the reason vCPU hotplug has such complicated incantations when starting the VM).
In the above situation a CPU with topology of: sockets=1, cores=4, threads=8 (thus 32 cpus)
will only expose 4 CPU "devices".
core-id: 0, core-id: 8, core-id: 16 and core-id: 24
yet the guest will correctly see 32 cpus when used as such.
You can see this in:
tests/qemuhotplugtestcpus/ppc64-modern-individual-monitor.json
Also note that the 'props' object does _not_ have any socket-id, and management apps are supposed to pass in 'props' as is. (There's a bunch of code to do that on hotplug).
The problem is that you need to query the topology first (unless we want to duplicate all of qemu code that has to do with topology state and keep up with changes to it) to know how it's behaving on current machine. This historically was not possible. The supposed solution for this was the pre-config state where we'd be able to query and set it up via QMP, but I was not keeping up sufficiently with that work, so I don't know if it's possible.
If preconfig is a viable option we IMO should start using it sooner rather than later and avoid duplicating qemu's logic here.
using preconfig is preferable variant otherwise libvirt would end up duplicating topology logic which differs not only between targets but also between machine/cpu types. Closest example how to use preconfig is in pc_dynamic_cpu_cfg() test case. Though it uses query-hotpluggable-cpus only for verification, but one can use the command at the preconfig stage to get topology for given -smp/-machine type combination.
Note, migration from old to new cmd line works and therefore doesn't need any special handling.
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1678085 Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 112 +++++++++++++++++- .../hugepages-nvdimm.x86_64-latest.args | 4 +- ...memory-default-hugepage.x86_64-latest.args | 10 +- .../memfd-memory-numa.x86_64-latest.args | 10 +- ...y-hotplug-nvdimm-access.x86_64-latest.args | 4 +- ...ory-hotplug-nvdimm-align.x86_64-5.2.0.args | 4 +- ...ry-hotplug-nvdimm-align.x86_64-latest.args | 4 +- ...ory-hotplug-nvdimm-label.x86_64-5.2.0.args | 4 +- ...ry-hotplug-nvdimm-label.x86_64-latest.args | 4 +- ...mory-hotplug-nvdimm-pmem.x86_64-5.2.0.args | 4 +- ...ory-hotplug-nvdimm-pmem.x86_64-latest.args | 4 +- ...-hotplug-nvdimm-readonly.x86_64-5.2.0.args | 4 +- ...hotplug-nvdimm-readonly.x86_64-latest.args | 4 +- .../memory-hotplug-nvdimm.x86_64-latest.args | 4 +- ...mory-hotplug-virtio-pmem.x86_64-5.2.0.args | 4 +- ...ory-hotplug-virtio-pmem.x86_64-latest.args | 4 +- .../numatune-hmat.x86_64-latest.args | 18 ++- ...emnode-restrictive-mode.x86_64-latest.args | 38 +++++- .../numatune-memnode.x86_64-5.2.0.args | 38 +++++- .../numatune-memnode.x86_64-latest.args | 38 +++++- ...vhost-user-fs-fd-memory.x86_64-latest.args | 4 +- ...vhost-user-fs-hugepages.x86_64-latest.args | 4 +- ...host-user-gpu-secondary.x86_64-latest.args | 3 +- .../vhost-user-vga.x86_64-latest.args | 3 +- 24 files changed, 296 insertions(+), 34 deletions(-)
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index f04ae1e311..5192bd7630 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c
[...]
@@ -7432,6 +7432,94 @@ qemuBuildNumaCPUs(virBuffer *buf, }
+/** + * qemuTranlsatevCPUID: + * + * For given vCPU @id and vCPU topology (@cpu) compute corresponding + * @socket, @die, @core and @thread). This assumes linear topology, + * that is every [socket, die, core, thread] combination is valid vCPU + * ID and there are no 'holes'. This is ensured by + * qemuValidateDomainDef() if QEMU_CAPS_QUERY_HOTPLUGGABLE_CPUS is + * set.
As noted above, this assumption does not hold on PPC64. There are indeed "holes" in certain cases, while filled with cpus you are e.g. unable to spread them across multiple numa nodes.
In fact allowing to have two sibling threads to be spread across multiple NUMA nodes is also nonsensical configuration, which we allowed unfortunately.
+ * Moreover, if @diesSupported is false (QEMU lacks + * QEMU_CAPS_SMP_DIES) then @die is set to zero and @socket is + * computed without taking number of dies into account. + * + * The algorithm is shamelessly copied over from QEMU's + * x86_topo_ids_from_idx() and its history (before introducing dies). + */ +static void +qemuTranlsatevCPUID(unsigned int id, + bool diesSupported, + virCPUDef *cpu, + unsigned int *socket, + unsigned int *die, + unsigned int *core, + unsigned int *thread) +{ + if (cpu && cpu->sockets) { + *thread = id % cpu->threads; + *core = id / cpu->threads % cpu->cores; + if (diesSupported) { + *die = id / (cpu->cores * cpu->threads) % cpu->dies; + *socket = id / (cpu->dies * cpu->cores * cpu->threads); + } else { + *die = 0; + *socket = id / (cpu->cores * cpu->threads) % cpu->sockets; + } + } else { + /* If no topology was provided, then qemuBuildSmpCommandLine() + * puts all vCPUs into a separate socket. */ + *thread = 0; + *core = 0; + *die = 0; + *socket = id; + } +} + + +static void +qemuBuildNumaNewCPUs(virCommand *cmd, + virCPUDef *cpu, + virBitmap *cpumask, + size_t nodeid, + virQEMUCaps *qemuCaps) +{ + const bool diesSupported = virQEMUCapsGet(qemuCaps, QEMU_CAPS_SMP_DIES); + ssize_t vcpuid = -1; + + if (!cpumask) + return; + + while ((vcpuid = virBitmapNextSetBit(cpumask, vcpuid)) >= 0) { + unsigned int socket; + unsigned int die; + unsigned int core; + unsigned int thread; + + qemuTranlsatevCPUID(vcpuid, diesSupported, cpu, + &socket, &die, &core, &thread); + + virCommandAddArg(cmd, "-numa"); + + /* The simple fact that dies are supported by QEMU doesn't mean we can + * put it onto command line. QEMU will accept die-id only if -smp dies + * was set to a value greater than 1. On the other hand, this allows us + * to generate shorter command line. */ + if (diesSupported && cpu && cpu->dies > 1) { + virCommandAddArgFormat(cmd, + "cpu,node-id=%zu,socket-id=%u,die-id=%u,core-id=%u,thread-id=%u", + nodeid, socket, die, core, thread); + } else { + virCommandAddArgFormat(cmd, + "cpu,node-id=%zu,socket-id=%u,core-id=%u,thread-id=%u", + nodeid, socket, core, thread); + } + } +} + + static int qemuBuildNumaCommandLine(virQEMUDriverConfig *cfg, virDomainDef *def,
[...]
@@ -7484,6 +7573,17 @@ qemuBuildNumaCommandLine(virQEMUDriverConfig *cfg, qemuBuildMemPathStr(def, cmd, priv) < 0) goto cleanup;
+ /* Use modern style of specifying vCPU topology only if: + * -numa cpu is available, introduced in the same time as -numa + * dist, hence slightly misleading capability test, and + * query-hotpluggable-cpus is avialable, because then + * qemuValidateDomainDef() ensures that if + * topology is specified it matches max vCPU + * count and we can make some shortcuts in + * qemuTranlsatevCPUID(). + */ + newCpus = virQEMUCapsGet(qemuCaps, QEMU_CAPS_QUERY_HOTPLUGGABLE_CPUS); + for (i = 0; i < ncells; i++) { if (virDomainNumaGetNodeCpumask(def->numa, i)) { masterInitiator = i;
[...]
diff --git a/tests/qemuxml2argvdata/hugepages-nvdimm.x86_64-latest.args b/tests/qemuxml2argvdata/hugepages-nvdimm.x86_64-latest.args index 9f3c6fa63f..8af4b44758 100644 --- a/tests/qemuxml2argvdata/hugepages-nvdimm.x86_64-latest.args +++ b/tests/qemuxml2argvdata/hugepages-nvdimm.x86_64-latest.args
[...]
diff --git a/tests/qemuxml2argvdata/vhost-user-vga.x86_64-latest.args b/tests/qemuxml2argvdata/vhost-user-vga.x86_64-latest.args index 0bdd98d3b6..8c463496a1 100644 --- a/tests/qemuxml2argvdata/vhost-user-vga.x86_64-latest.args +++ b/tests/qemuxml2argvdata/vhost-user-vga.x86_64-latest.args @@ -16,7 +16,8 @@ XDG_CONFIG_HOME=/tmp/lib/domain--1-QEMUGuest1/.config \ -overcommit mem-lock=off \ -smp 1,sockets=1,cores=1,threads=1 \ -object '{"qom-type":"memory-backend-memfd","id":"ram-node0","share":true,"size":224395264}' \ --numa node,nodeid=0,cpus=0,memdev=ram-node0 \ +-numa node,nodeid=0,memdev=ram-node0 \ +-numa cpu,node-id=0,socket-id=0,core-id=0,thread-id=0 \ -uuid c7a5fdbd-edaf-9455-926a-d65c16db1809 \ -display none \ -no-user-config \
None of the impacted tests have 'threads' set to anything else than 1 so we are not getting any 'thread-id' coverage. Please add some before this commit. Also as noted, we'll need some PPC64 tests that are impacted.

On 10/6/21 3:32 PM, Igor Mammedov wrote:
On Thu, 30 Sep 2021 14:08:34 +0200 Peter Krempa <pkrempa@redhat.com> wrote:
On Tue, Sep 21, 2021 at 16:50:31 +0200, Michal Privoznik wrote:
QEMU is trying to obsolete -numa node,cpus= because that uses ambiguous vCPU id to [socket, die, core, thread] mapping. The new form is:
-numa cpu,node-id=N,socket-id=S,die-id=D,core-id=C,thread-id=T
which is repeated for every vCPU and places it at [S, D, C, T] into guest NUMA node N.
While in general this is magic mapping, we can deal with it. Firstly, with QEMU 2.7 or newer, libvirt ensures that if topology is given then maxvcpus must be sockets * dies * cores * threads (i.e. there are no 'holes'). Secondly, if no topology is given then libvirt itself places each vCPU into a different socket (basically, it fakes topology of: [maxvcpus, 1, 1, 1]) Thirdly, we can copy whatever QEMU is doing when mapping vCPUs onto topology, to make sure vCPUs don't start to move around.
There's a problem with this premise though and unfortunately we don't seem to have qemuxml2argvtest for it.
On PPC64, in certain situations the CPU can be configured such that threads are visible only to VMs. This has substantial impact on how CPUs are configured using the modern parameters (until now used only for cpu hotplug purposes, and that's the reason vCPU hotplug has such complicated incantations when starting the VM).
In the above situation a CPU with topology of: sockets=1, cores=4, threads=8 (thus 32 cpus)
will only expose 4 CPU "devices".
core-id: 0, core-id: 8, core-id: 16 and core-id: 24
yet the guest will correctly see 32 cpus when used as such.
You can see this in:
tests/qemuhotplugtestcpus/ppc64-modern-individual-monitor.json
Also note that the 'props' object does _not_ have any socket-id, and management apps are supposed to pass in 'props' as is. (There's a bunch of code to do that on hotplug).
The problem is that you need to query the topology first (unless we want to duplicate all of qemu code that has to do with topology state and keep up with changes to it) to know how it's behaving on current machine. This historically was not possible. The supposed solution for this was the pre-config state where we'd be able to query and set it up via QMP, but I was not keeping up sufficiently with that work, so I don't know if it's possible.
If preconfig is a viable option we IMO should start using it sooner rather than later and avoid duplicating qemu's logic here.
using preconfig is preferable variant otherwise libvirt would end up duplicating topology logic which differs not only between targets but also between machine/cpu types.
Closest example how to use preconfig is in pc_dynamic_cpu_cfg() test case. Though it uses query-hotpluggable-cpus only for verification, but one can use the command at the preconfig stage to get topology for given -smp/-machine type combination.
Alright, -preconfig should be pretty easy. However, I do have some points to raise/ask: 1) currently, exit-preconfig is marked as experimental (hence its "x-" prefix). Before libvirt consumes it, QEMU should make it stable. Is there anything that stops QEMU from doing so or is it just a matter of sending patches (I volunteer to do that)? 2) In my experiments I try to mimic what libvirt does. Here's my cmd line: qemu-system-x86_64 \ -S \ -preconfig \ -cpu host \ -smp 120,sockets=2,dies=3,cores=4,threads=5 \ -object '{"qom-type":"memory-backend-memfd","id":"ram-node0","size":4294967296,"host-nodes":[0],"policy":"bind"}' \ -numa node,nodeid=0,memdev=ram-node0 \ -no-user-config \ -nodefaults \ -no-shutdown \ -qmp stdio and here is my QMP log: {"QMP": {"version": {"qemu": {"micro": 50, "minor": 1, "major": 6}, "package": "v6.1.0-1552-g362534a643"}, "capabilities": ["oob"]}} {"execute":"qmp_capabilities"} {"return": {}} {"execute":"query-hotpluggable-cpus"} {"return": [{"props": {"core-id": 3, "thread-id": 4, "die-id": 2, "socket-id": 1}, "vcpus-count": 1, "type": "host-x86_64-cpu"}, {"props": {"core-id": 3, "thread-id": 3, "die-id": 2, "socket-id": 1}, "vcpus-count": 1, "type": "host-x86_64-cpu"}, {"props": {"core-id": 3, "thread-id": 2, "die-id": 2, "socket-id": 1}, "vcpus-count": 1, "type": "host-x86_64-cpu"}, {"props": {"core-id": 3, "thread-id": 1, "die-id": 2, "socket-id": 1}, "vcpus-count": 1, "type": "host-x86_64-cpu"}, {"props": {"core-id": 3, "thread-id": 0, "die-id": 2, "socket-id": 1}, "vcpus-count": 1, "type": "host-x86_64-cpu"}, {"props": {"core-id": 2, "thread-id": 4, "die-id": 2, "socket-id": 1}, "vcpus-count": 1, "type": "host-x86_64-cpu"}, <snip/> {"props": {"core-id": 0, "thread-id": 0, "die-id": 0, "socket-id": 0}, "vcpus-count": 1, "type": "host-x86_64-cpu"}]} I can see that query-hotpluggable-cpus returns an array. Can I safely assume that vCPU ID == index in the array? I mean, if I did have -numa node,cpus=X can I do array[X] to obtain mapping onto Core/Thread/ Die/Socket which would then be fed to 'set-numa-node' command. If not, what is the proper way to do it? And one more thing - if QEMU has to keep vCPU ID mapping code, what's the point in obsoleting -numa node,cpus=? In the end it is still QEMU who does the ID -> [Core,Thread,Die,Socket] translation but with extra steps for mgmt applications. Michal

On Wed, Oct 20, 2021 at 13:07:59 +0200, Michal Prívozník wrote:
On 10/6/21 3:32 PM, Igor Mammedov wrote:
On Thu, 30 Sep 2021 14:08:34 +0200 Peter Krempa <pkrempa@redhat.com> wrote:
[...]
2) In my experiments I try to mimic what libvirt does. Here's my cmd line:
qemu-system-x86_64 \ -S \ -preconfig \ -cpu host \ -smp 120,sockets=2,dies=3,cores=4,threads=5 \ -object '{"qom-type":"memory-backend-memfd","id":"ram-node0","size":4294967296,"host-nodes":[0],"policy":"bind"}' \ -numa node,nodeid=0,memdev=ram-node0 \ -no-user-config \ -nodefaults \ -no-shutdown \ -qmp stdio
and here is my QMP log:
{"QMP": {"version": {"qemu": {"micro": 50, "minor": 1, "major": 6}, "package": "v6.1.0-1552-g362534a643"}, "capabilities": ["oob"]}}
{"execute":"qmp_capabilities"} {"return": {}}
{"execute":"query-hotpluggable-cpus"} {"return": [{"props": {"core-id": 3, "thread-id": 4, "die-id": 2, "socket-id": 1}, "vcpus-count": 1, "type": "host-x86_64-cpu"}, {"props": {"core-id": 3, "thread-id": 3, "die-id": 2, "socket-id": 1}, "vcpus-count": 1, "type": "host-x86_64-cpu"}, {"props": {"core-id": 3, "thread-id": 2, "die-id": 2, "socket-id": 1}, "vcpus-count": 1, "type": "host-x86_64-cpu"}, {"props": {"core-id": 3, "thread-id": 1, "die-id": 2, "socket-id": 1}, "vcpus-count": 1, "type": "host-x86_64-cpu"}, {"props": {"core-id": 3, "thread-id": 0, "die-id": 2, "socket-id": 1}, "vcpus-count": 1, "type": "host-x86_64-cpu"}, {"props": {"core-id": 2, "thread-id": 4, "die-id": 2, "socket-id": 1}, "vcpus-count": 1, "type": "host-x86_64-cpu"}, <snip/> {"props": {"core-id": 0, "thread-id": 0, "die-id": 0, "socket-id": 0}, "vcpus-count": 1, "type": "host-x86_64-cpu"}]}
I can see that query-hotpluggable-cpus returns an array. Can I safely assume that vCPU ID == index in the array? I mean, if I did have -numa
No, this assumption would be incorrect on the aforementioned PPC platform where one entry in the returned array can describe multiple cores. qemuDomainFilterHotplugVcpuEntities is the code that cross-references the libvirt "index" with the data returned by query-hotpluggable cpus. The important bit is the 'vcpus-count' property. The code which deals with hotplug is already fetching everything that's needed.
node,cpus=X can I do array[X] to obtain mapping onto Core/Thread/ Die/Socket which would then be fed to 'set-numa-node' command. If not, what is the proper way to do it?

On 10/20/21 1:18 PM, Peter Krempa wrote:
On Wed, Oct 20, 2021 at 13:07:59 +0200, Michal Prívozník wrote:
On 10/6/21 3:32 PM, Igor Mammedov wrote:
On Thu, 30 Sep 2021 14:08:34 +0200 Peter Krempa <pkrempa@redhat.com> wrote:
[...]
2) In my experiments I try to mimic what libvirt does. Here's my cmd line:
qemu-system-x86_64 \ -S \ -preconfig \ -cpu host \ -smp 120,sockets=2,dies=3,cores=4,threads=5 \ -object '{"qom-type":"memory-backend-memfd","id":"ram-node0","size":4294967296,"host-nodes":[0],"policy":"bind"}' \ -numa node,nodeid=0,memdev=ram-node0 \ -no-user-config \ -nodefaults \ -no-shutdown \ -qmp stdio
and here is my QMP log:
{"QMP": {"version": {"qemu": {"micro": 50, "minor": 1, "major": 6}, "package": "v6.1.0-1552-g362534a643"}, "capabilities": ["oob"]}}
{"execute":"qmp_capabilities"} {"return": {}}
{"execute":"query-hotpluggable-cpus"} {"return": [{"props": {"core-id": 3, "thread-id": 4, "die-id": 2, "socket-id": 1}, "vcpus-count": 1, "type": "host-x86_64-cpu"}, {"props": {"core-id": 3, "thread-id": 3, "die-id": 2, "socket-id": 1}, "vcpus-count": 1, "type": "host-x86_64-cpu"}, {"props": {"core-id": 3, "thread-id": 2, "die-id": 2, "socket-id": 1}, "vcpus-count": 1, "type": "host-x86_64-cpu"}, {"props": {"core-id": 3, "thread-id": 1, "die-id": 2, "socket-id": 1}, "vcpus-count": 1, "type": "host-x86_64-cpu"}, {"props": {"core-id": 3, "thread-id": 0, "die-id": 2, "socket-id": 1}, "vcpus-count": 1, "type": "host-x86_64-cpu"}, {"props": {"core-id": 2, "thread-id": 4, "die-id": 2, "socket-id": 1}, "vcpus-count": 1, "type": "host-x86_64-cpu"}, <snip/> {"props": {"core-id": 0, "thread-id": 0, "die-id": 0, "socket-id": 0}, "vcpus-count": 1, "type": "host-x86_64-cpu"}]}
I can see that query-hotpluggable-cpus returns an array. Can I safely assume that vCPU ID == index in the array? I mean, if I did have -numa
No, this assumption would be incorrect on the aforementioned PPC platform where one entry in the returned array can describe multiple cores.
qemuDomainFilterHotplugVcpuEntities is the code that cross-references the libvirt "index" with the data returned by query-hotpluggable cpus.
The important bit is the 'vcpus-count' property. The code which deals with hotplug is already fetching everything that's needed.
Ah, I see. So my assumption would be correct if vcpus-count would be 1 for all entries. If it isn't then I need to account for how much vcpus-count is in each entity. Fair enough. But qemuDomainFilterHotplugVcpuEntities() doesn't really do vCPU ID -> [socket, core, thread] translation, does it? But even if it did, I am still wondering what the purpose of this whole exercise is. QEMU won't be able to drop ID -> [socket, core, thread] mapping. The only thing it would be able to drop is a few lines of code handling command line. Am I missing something obvious? Michal

On Wed, 20 Oct 2021 16:15:29 +0200 Michal Prívozník <mprivozn@redhat.com> wrote:
On 10/20/21 1:18 PM, Peter Krempa wrote:
On Wed, Oct 20, 2021 at 13:07:59 +0200, Michal Prívozník wrote:
On 10/6/21 3:32 PM, Igor Mammedov wrote:
On Thu, 30 Sep 2021 14:08:34 +0200 Peter Krempa <pkrempa@redhat.com> wrote:
[...]
2) In my experiments I try to mimic what libvirt does. Here's my cmd line:
qemu-system-x86_64 \ -S \ -preconfig \ -cpu host \ -smp 120,sockets=2,dies=3,cores=4,threads=5 \ -object '{"qom-type":"memory-backend-memfd","id":"ram-node0","size":4294967296,"host-nodes":[0],"policy":"bind"}' \ -numa node,nodeid=0,memdev=ram-node0 \ -no-user-config \ -nodefaults \ -no-shutdown \ -qmp stdio
and here is my QMP log:
{"QMP": {"version": {"qemu": {"micro": 50, "minor": 1, "major": 6}, "package": "v6.1.0-1552-g362534a643"}, "capabilities": ["oob"]}}
{"execute":"qmp_capabilities"} {"return": {}}
{"execute":"query-hotpluggable-cpus"} {"return": [{"props": {"core-id": 3, "thread-id": 4, "die-id": 2, "socket-id": 1}, "vcpus-count": 1, "type": "host-x86_64-cpu"}, {"props": {"core-id": 3, "thread-id": 3, "die-id": 2, "socket-id": 1}, "vcpus-count": 1, "type": "host-x86_64-cpu"}, {"props": {"core-id": 3, "thread-id": 2, "die-id": 2, "socket-id": 1}, "vcpus-count": 1, "type": "host-x86_64-cpu"}, {"props": {"core-id": 3, "thread-id": 1, "die-id": 2, "socket-id": 1}, "vcpus-count": 1, "type": "host-x86_64-cpu"}, {"props": {"core-id": 3, "thread-id": 0, "die-id": 2, "socket-id": 1}, "vcpus-count": 1, "type": "host-x86_64-cpu"}, {"props": {"core-id": 2, "thread-id": 4, "die-id": 2, "socket-id": 1}, "vcpus-count": 1, "type": "host-x86_64-cpu"}, <snip/> {"props": {"core-id": 0, "thread-id": 0, "die-id": 0, "socket-id": 0}, "vcpus-count": 1, "type": "host-x86_64-cpu"}]}
I can see that query-hotpluggable-cpus returns an array. Can I safely assume that vCPU ID == index in the array? I mean, if I did have -numa
No, this assumption would be incorrect on the aforementioned PPC platform where one entry in the returned array can describe multiple cores.
qemuDomainFilterHotplugVcpuEntities is the code that cross-references the libvirt "index" with the data returned by query-hotpluggable cpus.
The important bit is the 'vcpus-count' property. The code which deals with hotplug is already fetching everything that's needed.
Ah, I see. So my assumption would be correct if vcpus-count would be 1 for all entries. If it isn't then I need to account for how much only for some boards. An entry in array describes an single entity that should be handled as a single device by user (-device/plug/unplug/other mapping options) (and the entity might have 1 or more vCPUs (threads) depending on target arch/board).
vcpus-count is in each entity. Fair enough. But qemuDomainFilterHotplugVcpuEntities() doesn't really do vCPU ID -> [socket, core, thread] translation, does it?
But even if it did, I am still wondering what the purpose of this whole exercise is. QEMU won't be able to drop ID -> [socket, core, thread] mapping. The only thing it would be able to drop is a few lines of code handling command line. Am I missing something obvious? I described in other email why QEMU is dropping cpu_idex on external interfaces (it's possible to drop it internally too, but I don't see much gain there vs effort such refactoring would require).
Sure thing, you can invent/maintain libvirt internal "vCPU ID" -> [topo props] mapping if it's necessary. However using just a "vCPU ID" will obscure topology information from upper layers. Maybe providing a list of CPUs as an external interface would be better, then user can pick up which CPUs they wish to add/delete/assign/... using items from that list.
Michal

On Wed, 20 Oct 2021 13:07:59 +0200 Michal Prívozník <mprivozn@redhat.com> wrote:
On 10/6/21 3:32 PM, Igor Mammedov wrote:
On Thu, 30 Sep 2021 14:08:34 +0200 Peter Krempa <pkrempa@redhat.com> wrote:
On Tue, Sep 21, 2021 at 16:50:31 +0200, Michal Privoznik wrote:
QEMU is trying to obsolete -numa node,cpus= because that uses ambiguous vCPU id to [socket, die, core, thread] mapping. The new form is:
-numa cpu,node-id=N,socket-id=S,die-id=D,core-id=C,thread-id=T
which is repeated for every vCPU and places it at [S, D, C, T] into guest NUMA node N.
While in general this is magic mapping, we can deal with it. Firstly, with QEMU 2.7 or newer, libvirt ensures that if topology is given then maxvcpus must be sockets * dies * cores * threads (i.e. there are no 'holes'). Secondly, if no topology is given then libvirt itself places each vCPU into a different socket (basically, it fakes topology of: [maxvcpus, 1, 1, 1]) Thirdly, we can copy whatever QEMU is doing when mapping vCPUs onto topology, to make sure vCPUs don't start to move around.
There's a problem with this premise though and unfortunately we don't seem to have qemuxml2argvtest for it.
On PPC64, in certain situations the CPU can be configured such that threads are visible only to VMs. This has substantial impact on how CPUs are configured using the modern parameters (until now used only for cpu hotplug purposes, and that's the reason vCPU hotplug has such complicated incantations when starting the VM).
In the above situation a CPU with topology of: sockets=1, cores=4, threads=8 (thus 32 cpus)
will only expose 4 CPU "devices".
core-id: 0, core-id: 8, core-id: 16 and core-id: 24
yet the guest will correctly see 32 cpus when used as such.
You can see this in:
tests/qemuhotplugtestcpus/ppc64-modern-individual-monitor.json
Also note that the 'props' object does _not_ have any socket-id, and management apps are supposed to pass in 'props' as is. (There's a bunch of code to do that on hotplug).
The problem is that you need to query the topology first (unless we want to duplicate all of qemu code that has to do with topology state and keep up with changes to it) to know how it's behaving on current machine. This historically was not possible. The supposed solution for this was the pre-config state where we'd be able to query and set it up via QMP, but I was not keeping up sufficiently with that work, so I don't know if it's possible.
If preconfig is a viable option we IMO should start using it sooner rather than later and avoid duplicating qemu's logic here.
using preconfig is preferable variant otherwise libvirt would end up duplicating topology logic which differs not only between targets but also between machine/cpu types.
Closest example how to use preconfig is in pc_dynamic_cpu_cfg() test case. Though it uses query-hotpluggable-cpus only for verification, but one can use the command at the preconfig stage to get topology for given -smp/-machine type combination.
Alright, -preconfig should be pretty easy. However, I do have some points to raise/ask:
1) currently, exit-preconfig is marked as experimental (hence its "x-" prefix). Before libvirt consumes it, QEMU should make it stable. Is there anything that stops QEMU from doing so or is it just a matter of sending patches (I volunteer to do that)?
if I recall correctly, it was made experimental due to lack of actual users (it was supposed that libvirt would consume it once available but it didn't happen for quite a long time). So patches to make it stable interface should be fine.
2) In my experiments I try to mimic what libvirt does. Here's my cmd line:
qemu-system-x86_64 \ -S \ -preconfig \ -cpu host \ -smp 120,sockets=2,dies=3,cores=4,threads=5 \ -object '{"qom-type":"memory-backend-memfd","id":"ram-node0","size":4294967296,"host-nodes":[0],"policy":"bind"}' \ -numa node,nodeid=0,memdev=ram-node0 \ -no-user-config \ -nodefaults \ -no-shutdown \ -qmp stdio
and here is my QMP log:
{"QMP": {"version": {"qemu": {"micro": 50, "minor": 1, "major": 6}, "package": "v6.1.0-1552-g362534a643"}, "capabilities": ["oob"]}}
{"execute":"qmp_capabilities"} {"return": {}}
{"execute":"query-hotpluggable-cpus"} {"return": [{"props": {"core-id": 3, "thread-id": 4, "die-id": 2, "socket-id": 1}, "vcpus-count": 1, "type": "host-x86_64-cpu"}, {"props": {"core-id": 3, "thread-id": 3, "die-id": 2, "socket-id": 1}, "vcpus-count": 1, "type": "host-x86_64-cpu"}, {"props": {"core-id": 3, "thread-id": 2, "die-id": 2, "socket-id": 1}, "vcpus-count": 1, "type": "host-x86_64-cpu"}, {"props": {"core-id": 3, "thread-id": 1, "die-id": 2, "socket-id": 1}, "vcpus-count": 1, "type": "host-x86_64-cpu"}, {"props": {"core-id": 3, "thread-id": 0, "die-id": 2, "socket-id": 1}, "vcpus-count": 1, "type": "host-x86_64-cpu"}, {"props": {"core-id": 2, "thread-id": 4, "die-id": 2, "socket-id": 1}, "vcpus-count": 1, "type": "host-x86_64-cpu"}, <snip/> {"props": {"core-id": 0, "thread-id": 0, "die-id": 0, "socket-id": 0}, "vcpus-count": 1, "type": "host-x86_64-cpu"}]}
I can see that query-hotpluggable-cpus returns an array. Can I safely assume that vCPU ID == index in the array? I mean, if I did have -numa node,cpus=X can I do array[X] to obtain mapping onto Core/Thread/ Die/Socket which would then be fed to 'set-numa-node' command. If not, what is the proper way to do it?
From QEMU point of view, you shouldn't assume anything about vCPU ordering within returned array. It's internal impl. detail and a subject to change without notice. What you can assume is that CPUs descriptions in array will be stable for a given combination of [machine version, smp option, CPU type].
And one more thing - if QEMU has to keep vCPU ID mapping code, what's the point in obsoleting -numa node,cpus=? In the end it is still QEMU who does the ID -> [Core,Thread,Die,Socket] translation but with extra steps for mgmt applications.
point is that cpu_index is ambiguous and it's practically impossible to for user to tell which vCPU exactly it deals with unless user re-implements and keeps in sync topology code for f(board, machine version, smp option, CPU type) So even if cpu_index is still used inside of QEMU for other purposes, the external interfaces and API will be using only consistent topology tuple [Core,Thread,Die,Socket] to describe and address vCPUs, same like device_add.
Michal
participants (4)
-
Igor Mammedov
-
Michal Privoznik
-
Michal Prívozník
-
Peter Krempa