[libvirt] [PATCH v3 00/16] Support for per-guest-node binding

v3 of https://www.redhat.com/archives/libvir-list/2014-July/msg00372.html v3: - Michal's suggestions worked in - rebased on current master Martin Kletzander (16): qemu: purely a code movement qemu: remove useless error check conf: purely a code movement conf, schema: add 'id' field for cells numatune: create new module for numatune numatune: unify numatune struct and enum names numatune: Encapsulate numatune configuration in order to unify results conf, schema: add support for memnode elements numatune: add support for per-node memory bindings in private APIs qemu: allow qmp probing for cmdline options without params qemu: memory-backend-ram capability probing qemu: newer -numa parameter capability probing qemu: enable disjoint numa cpu ranges qemu: pass numa node binding preferences to qemu qemu: split out cpuset.mems setting qemu: leave restricting cpuset.mems after initialization docs/formatdomain.html.in | 32 +- docs/schemas/domaincommon.rng | 22 + po/POTFILES.in | 1 + src/Makefile.am | 3 +- src/conf/cpu_conf.c | 41 +- src/conf/cpu_conf.h | 3 +- src/conf/domain_conf.c | 203 ++----- src/conf/domain_conf.h | 10 +- src/conf/numatune_conf.c | 595 +++++++++++++++++++++ src/conf/numatune_conf.h | 108 ++++ src/libvirt_private.syms | 24 +- src/lxc/lxc_cgroup.c | 20 +- src/lxc/lxc_controller.c | 5 +- src/lxc/lxc_native.c | 15 +- src/parallels/parallels_driver.c | 7 +- src/qemu/qemu_capabilities.c | 16 +- src/qemu/qemu_capabilities.h | 2 + src/qemu/qemu_cgroup.c | 52 +- src/qemu/qemu_cgroup.h | 4 +- src/qemu/qemu_command.c | 98 +++- src/qemu/qemu_driver.c | 84 ++- src/qemu/qemu_monitor.c | 6 +- src/qemu/qemu_monitor.h | 3 +- src/qemu/qemu_monitor_json.c | 8 +- src/qemu/qemu_monitor_json.h | 3 +- src/qemu/qemu_process.c | 12 +- src/util/virnuma.c | 61 +-- src/util/virnuma.h | 28 +- tests/qemucapabilitiesdata/caps_1.6.50-1.caps | 1 + tests/qemucapabilitiesdata/caps_1.6.50-1.replies | 5 + tests/qemumonitorjsontest.c | 20 +- .../qemuxml2argv-cpu-numa-disjoint.args | 6 + .../qemuxml2argv-cpu-numa-disjoint.xml | 28 + tests/qemuxml2argvdata/qemuxml2argv-cpu-numa1.xml | 6 +- tests/qemuxml2argvdata/qemuxml2argv-cpu-numa2.xml | 6 +- tests/qemuxml2argvdata/qemuxml2argv-cpu-numa3.xml | 25 + .../qemuxml2argv-numatune-auto-prefer.xml | 29 + .../qemuxml2argv-numatune-memnode-no-memory.args | 8 + .../qemuxml2argv-numatune-memnode-no-memory.xml | 30 ++ .../qemuxml2argv-numatune-memnode-nocpu.xml | 25 + .../qemuxml2argv-numatune-memnode.args | 11 + .../qemuxml2argv-numatune-memnode.xml | 33 ++ .../qemuxml2argv-numatune-memnodes-problematic.xml | 31 ++ tests/qemuxml2argvtest.c | 12 + .../qemuxml2xmlout-cpu-numa1.xml | 28 + .../qemuxml2xmlout-cpu-numa2.xml | 28 + .../qemuxml2xmlout-numatune-auto-prefer.xml | 29 + .../qemuxml2xmlout-numatune-memnode.xml | 33 ++ tests/qemuxml2xmltest.c | 8 + 49 files changed, 1479 insertions(+), 389 deletions(-) create mode 100644 src/conf/numatune_conf.c create mode 100644 src/conf/numatune_conf.h create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-cpu-numa-disjoint.args create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-cpu-numa-disjoint.xml create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-cpu-numa3.xml create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-numatune-auto-prefer.xml create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode-no-memory.args create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode-no-memory.xml create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode-nocpu.xml create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode.args create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode.xml create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-numatune-memnodes-problematic.xml create mode 100644 tests/qemuxml2xmloutdata/qemuxml2xmlout-cpu-numa1.xml create mode 100644 tests/qemuxml2xmloutdata/qemuxml2xmlout-cpu-numa2.xml create mode 100644 tests/qemuxml2xmloutdata/qemuxml2xmlout-numatune-auto-prefer.xml create mode 100644 tests/qemuxml2xmloutdata/qemuxml2xmlout-numatune-memnode.xml -- 2.0.0

to ease the review of commits to follow. Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- src/qemu/qemu_command.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 2185ef4..4307f1f 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -6384,12 +6384,24 @@ qemuBuildNumaArgStr(const virDomainDef *def, virCommandPtr cmd) int ret = -1; for (i = 0; i < def->cpu->ncells; i++) { + int cellmem = VIR_DIV_UP(def->cpu->cells[i].mem, 1024); + def->cpu->cells[i].mem = cellmem * 1024; + VIR_FREE(cpumask); + + if (!(cpumask = virBitmapFormat(def->cpu->cells[i].cpumask))) + goto cleanup; + + if (strchr(cpumask, ',')) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("disjoint NUMA cpu ranges are not supported " + "with this QEMU")); + goto cleanup; + } + virCommandAddArg(cmd, "-numa"); virBufferAsprintf(&buf, "node,nodeid=%d", def->cpu->cells[i].cellid); virBufferAddLit(&buf, ",cpus="); - if (!(cpumask = virBitmapFormat(def->cpu->cells[i].cpumask))) - goto cleanup; /* Up through qemu 1.4, -numa does not accept a cpus * argument any more complex than start-stop. @@ -6397,16 +6409,8 @@ qemuBuildNumaArgStr(const virDomainDef *def, virCommandPtr cmd) * XXX For qemu 1.5, the syntax has not yet been decided; * but when it is, we need a capability bit and * translation of our cpumask into the qemu syntax. */ - if (strchr(cpumask, ',')) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("disjoint NUMA cpu ranges are not supported " - "with this QEMU")); - goto cleanup; - } virBufferAdd(&buf, cpumask, -1); - def->cpu->cells[i].mem = VIR_DIV_UP(def->cpu->cells[i].mem, - 1024) * 1024; - virBufferAsprintf(&buf, ",mem=%d", def->cpu->cells[i].mem / 1024); + virBufferAsprintf(&buf, ",mem=%d", cellmem); if (virBufferCheckError(&buf) < 0) goto cleanup; -- 2.0.0

Excerpt from the virCommandAddArgBuffer() description: "Correctly transfers memory errors or contents from buf to cmd." Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- src/qemu/qemu_command.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 4307f1f..9fc0778 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -6412,9 +6412,6 @@ qemuBuildNumaArgStr(const virDomainDef *def, virCommandPtr cmd) virBufferAdd(&buf, cpumask, -1); virBufferAsprintf(&buf, ",mem=%d", cellmem); - if (virBufferCheckError(&buf) < 0) - goto cleanup; - virCommandAddArgBuffer(cmd, &buf); } ret = 0; -- 2.0.0

to ease the review of commits to follow. Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- src/conf/domain_conf.c | 52 +++++++++++++++++++++++++------------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 1d83f13..315ea6a 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -11692,6 +11692,32 @@ virDomainDefParseXML(xmlDocPtr xml, } VIR_FREE(nodes); + /* analysis of cpu handling */ + if ((node = virXPathNode("./cpu[1]", ctxt)) != NULL) { + xmlNodePtr oldnode = ctxt->node; + ctxt->node = node; + def->cpu = virCPUDefParseXML(node, ctxt, VIR_CPU_TYPE_GUEST); + ctxt->node = oldnode; + + if (def->cpu == NULL) + goto error; + + if (def->cpu->sockets && + def->maxvcpus > + def->cpu->sockets * def->cpu->cores * def->cpu->threads) { + virReportError(VIR_ERR_XML_DETAIL, "%s", + _("Maximum CPUs greater than topology limit")); + goto error; + } + + if (def->cpu->cells_cpus > def->maxvcpus) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Number of CPUs in <numa> exceeds the" + " <vcpu> count")); + goto error; + } + } + /* Extract numatune if exists. */ if ((n = virXPathNodeSet("./numatune", ctxt, &nodes)) < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, @@ -12889,32 +12915,6 @@ virDomainDefParseXML(xmlDocPtr xml, goto error; } - /* analysis of cpu handling */ - if ((node = virXPathNode("./cpu[1]", ctxt)) != NULL) { - xmlNodePtr oldnode = ctxt->node; - ctxt->node = node; - def->cpu = virCPUDefParseXML(node, ctxt, VIR_CPU_TYPE_GUEST); - ctxt->node = oldnode; - - if (def->cpu == NULL) - goto error; - - if (def->cpu->sockets && - def->maxvcpus > - def->cpu->sockets * def->cpu->cores * def->cpu->threads) { - virReportError(VIR_ERR_XML_DETAIL, "%s", - _("Maximum CPUs greater than topology limit")); - goto error; - } - - if (def->cpu->cells_cpus > def->maxvcpus) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("Number of CPUs in <numa> exceeds the" - " <vcpu> count")); - goto error; - } - } - if ((node = virXPathNode("./sysinfo[1]", ctxt)) != NULL) { xmlNodePtr oldnode = ctxt->node; ctxt->node = node; -- 2.0.0

In XML format, by definition, order of fields should not matter, so order of parsing the elements doesn't affect the end result. When specifying guest NUMA cells, we depend only on the order of the 'cell' elements. With this patch all older domain XMLs are parsed as before, but with the 'id' attribute they are parsed and formatted according to that field. This will be useful when we have tuning settings for particular guest NUMA node. Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- docs/formatdomain.html.in | 17 +++++---- docs/schemas/domaincommon.rng | 5 +++ src/conf/cpu_conf.c | 41 +++++++++++++++++++--- src/conf/cpu_conf.h | 3 +- src/qemu/qemu_command.c | 2 +- tests/qemuxml2argvdata/qemuxml2argv-cpu-numa1.xml | 6 ++-- tests/qemuxml2argvdata/qemuxml2argv-cpu-numa2.xml | 6 ++-- tests/qemuxml2argvdata/qemuxml2argv-cpu-numa3.xml | 25 +++++++++++++ tests/qemuxml2argvtest.c | 1 + .../qemuxml2xmlout-cpu-numa1.xml | 28 +++++++++++++++ .../qemuxml2xmlout-cpu-numa2.xml | 28 +++++++++++++++ tests/qemuxml2xmltest.c | 3 ++ 12 files changed, 145 insertions(+), 20 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-cpu-numa3.xml create mode 100644 tests/qemuxml2xmloutdata/qemuxml2xmlout-cpu-numa1.xml create mode 100644 tests/qemuxml2xmloutdata/qemuxml2xmlout-cpu-numa2.xml diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index b69da4c..9f1082b 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -1030,8 +1030,8 @@ <cpu> ... <numa> - <cell cpus='0-3' memory='512000'/> - <cell cpus='4-7' memory='512000'/> + <cell id='0' cpus='0-3' memory='512000'/> + <cell id='1' cpus='4-7' memory='512000'/> </numa> ... </cpu> @@ -1039,10 +1039,15 @@ <p> Each <code>cell</code> element specifies a NUMA cell or a NUMA node. - <code>cpus</code> specifies the CPU or range of CPUs that are part of - the node. <code>memory</code> specifies the node memory in kibibytes - (i.e. blocks of 1024 bytes). Each cell or node is assigned cellid - or nodeid in the increasing order starting from 0. + <code>cpus</code> specifies the CPU or range of CPUs that are + part of the node. <code>memory</code> specifies the node memory + in kibibytes (i.e. blocks of 1024 bytes). + <span class="since">Since 1.2.7</span> all cells should + have <code>id</code> attribute in case referring to some cell is + necessary in the code, otherwise the cells are + assigned <code>id</code>s in the increasing order starting from + 0. Mixing cells with and without the <code>id</code> attribute + is not recommended as it may result in unwanted behaviour. </p> <p> diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index 7be028d..155a33e 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -3902,6 +3902,11 @@ <define name="numaCell"> <element name="cell"> + <optional> + <attribute name="id"> + <ref name="unsignedInt"/> + </attribute> + </optional> <attribute name="cpus"> <ref name="cpuset"/> </attribute> diff --git a/src/conf/cpu_conf.c b/src/conf/cpu_conf.c index 811893d..5003cf1 100644 --- a/src/conf/cpu_conf.c +++ b/src/conf/cpu_conf.c @@ -152,7 +152,6 @@ virCPUDefCopy(const virCPUDef *cpu) copy->ncells_max = copy->ncells = cpu->ncells; for (i = 0; i < cpu->ncells; i++) { - copy->cells[i].cellid = cpu->cells[i].cellid; copy->cells[i].mem = cpu->cells[i].mem; copy->cells[i].cpumask = virBitmapNewCopy(cpu->cells[i].cpumask); @@ -438,17 +437,48 @@ virCPUDefParseXML(xmlNodePtr node, for (i = 0; i < n; i++) { char *cpus, *memory; int ret, ncpus = 0; + unsigned int cur_cell; + char *tmp = NULL; + + tmp = virXMLPropString(nodes[i], "id"); + if (!tmp) { + cur_cell = i; + } else { + ret = virStrToLong_ui(tmp, NULL, 10, &cur_cell); + if (ret == -1) { + virReportError(VIR_ERR_XML_ERROR, + _("Invalid 'id' attribute in NUMA cell: %s"), + tmp); + VIR_FREE(tmp); + goto error; + } + VIR_FREE(tmp); + } + + if (cur_cell >= n) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Exactly one 'cell' element per guest " + "NUMA cell allowed, non-contiguous ranges or " + "ranges not starting from 0 are not allowed")); + goto error; + } + + if (def->cells[cur_cell].cpustr) { + virReportError(VIR_ERR_XML_ERROR, + _("Duplicate NUMA cell info for cell id '%u'"), + cur_cell); + goto error; + } - def->cells[i].cellid = i; cpus = virXMLPropString(nodes[i], "cpus"); if (!cpus) { virReportError(VIR_ERR_XML_ERROR, "%s", _("Missing 'cpus' attribute in NUMA cell")); goto error; } - def->cells[i].cpustr = cpus; + def->cells[cur_cell].cpustr = cpus; - ncpus = virBitmapParse(cpus, 0, &def->cells[i].cpumask, + ncpus = virBitmapParse(cpus, 0, &def->cells[cur_cell].cpumask, VIR_DOMAIN_CPUMASK_LEN); if (ncpus <= 0) goto error; @@ -461,7 +491,7 @@ virCPUDefParseXML(xmlNodePtr node, goto error; } - ret = virStrToLong_ui(memory, NULL, 10, &def->cells[i].mem); + ret = virStrToLong_ui(memory, NULL, 10, &def->cells[cur_cell].mem); if (ret == -1) { virReportError(VIR_ERR_XML_ERROR, "%s", _("Invalid 'memory' attribute in NUMA cell")); @@ -645,6 +675,7 @@ virCPUDefFormatBuf(virBufferPtr buf, virBufferAdjustIndent(buf, 2); for (i = 0; i < def->ncells; i++) { virBufferAddLit(buf, "<cell"); + virBufferAsprintf(buf, " id='%zu'", i); virBufferAsprintf(buf, " cpus='%s'", def->cells[i].cpustr); virBufferAsprintf(buf, " memory='%d'", def->cells[i].mem); virBufferAddLit(buf, "/>\n"); diff --git a/src/conf/cpu_conf.h b/src/conf/cpu_conf.h index 8c932ce..2d538db 100644 --- a/src/conf/cpu_conf.h +++ b/src/conf/cpu_conf.h @@ -1,7 +1,7 @@ /* * cpu_conf.h: CPU XML handling * - * Copyright (C) 2009-2011, 2013 Red Hat, Inc. + * Copyright (C) 2009-2011, 2013, 2014 Red Hat, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -93,7 +93,6 @@ struct _virCPUFeatureDef { typedef struct _virCellDef virCellDef; typedef virCellDef *virCellDefPtr; struct _virCellDef { - int cellid; virBitmapPtr cpumask; /* CPUs that are part of this node */ char *cpustr; /* CPUs stored in string form for dumpxml */ unsigned int mem; /* Node memory in kB */ diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 9fc0778..bf0f56e 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -6400,7 +6400,7 @@ qemuBuildNumaArgStr(const virDomainDef *def, virCommandPtr cmd) } virCommandAddArg(cmd, "-numa"); - virBufferAsprintf(&buf, "node,nodeid=%d", def->cpu->cells[i].cellid); + virBufferAsprintf(&buf, "node,nodeid=%zu", i); virBufferAddLit(&buf, ",cpus="); /* Up through qemu 1.4, -numa does not accept a cpus diff --git a/tests/qemuxml2argvdata/qemuxml2argv-cpu-numa1.xml b/tests/qemuxml2argvdata/qemuxml2argv-cpu-numa1.xml index ee402c8..0543f7f 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-cpu-numa1.xml +++ b/tests/qemuxml2argvdata/qemuxml2argv-cpu-numa1.xml @@ -9,10 +9,10 @@ <boot dev='network'/> </os> <cpu> - <topology sockets="2" cores="4" threads="2"/> + <topology sockets='2' cores='4' threads='2'/> <numa> - <cell cpus="0-7" memory="109550"/> - <cell cpus="8-15" memory="109550"/> + <cell cpus='0-7' memory='109550'/> + <cell cpus='8-15' memory='109550'/> </numa> </cpu> <clock offset='utc'/> diff --git a/tests/qemuxml2argvdata/qemuxml2argv-cpu-numa2.xml b/tests/qemuxml2argvdata/qemuxml2argv-cpu-numa2.xml index ee402c8..0a5f9fc 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-cpu-numa2.xml +++ b/tests/qemuxml2argvdata/qemuxml2argv-cpu-numa2.xml @@ -9,10 +9,10 @@ <boot dev='network'/> </os> <cpu> - <topology sockets="2" cores="4" threads="2"/> + <topology sockets='2' cores='4' threads='2'/> <numa> - <cell cpus="0-7" memory="109550"/> - <cell cpus="8-15" memory="109550"/> + <cell id='1' cpus='8-15' memory='109550'/> + <cell id='0' cpus='0-7' memory='109550'/> </numa> </cpu> <clock offset='utc'/> diff --git a/tests/qemuxml2argvdata/qemuxml2argv-cpu-numa3.xml b/tests/qemuxml2argvdata/qemuxml2argv-cpu-numa3.xml new file mode 100644 index 0000000..fa3070d --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-cpu-numa3.xml @@ -0,0 +1,25 @@ +<domain type='qemu'> + <name>QEMUGuest1</name> + <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid> + <memory unit='KiB'>219100</memory> + <currentMemory unit='KiB'>219100</currentMemory> + <vcpu placement='static'>16</vcpu> + <os> + <type arch='x86_64' machine='pc'>hvm</type> + <boot dev='network'/> + </os> + <cpu> + <topology sockets='2' cores='4' threads='2'/> + <numa> + <cell id='1' cpus='0-7' memory='109550'/> + <cell id='2' cpus='8-15' memory='109550'/> + </numa> + </cpu> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu</emulator> + </devices> +</domain> diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c index a841adb..414afda 100644 --- a/tests/qemuxml2argvtest.c +++ b/tests/qemuxml2argvtest.c @@ -1177,6 +1177,7 @@ mymain(void) DO_TEST("cpu-strict1", NONE); DO_TEST("cpu-numa1", NONE); DO_TEST("cpu-numa2", QEMU_CAPS_SMP_TOPOLOGY); + DO_TEST_PARSE_ERROR("cpu-numa3", NONE); DO_TEST("cpu-host-model", NONE); skipLegacyCPUs = true; DO_TEST("cpu-host-model-fallback", NONE); diff --git a/tests/qemuxml2xmloutdata/qemuxml2xmlout-cpu-numa1.xml b/tests/qemuxml2xmloutdata/qemuxml2xmlout-cpu-numa1.xml new file mode 100644 index 0000000..227bf1c --- /dev/null +++ b/tests/qemuxml2xmloutdata/qemuxml2xmlout-cpu-numa1.xml @@ -0,0 +1,28 @@ +<domain type='qemu'> + <name>QEMUGuest1</name> + <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid> + <memory unit='KiB'>219100</memory> + <currentMemory unit='KiB'>219100</currentMemory> + <vcpu placement='static'>16</vcpu> + <os> + <type arch='x86_64' machine='pc'>hvm</type> + <boot dev='network'/> + </os> + <cpu> + <topology sockets='2' cores='4' threads='2'/> + <numa> + <cell id='0' cpus='0-7' memory='109550'/> + <cell id='1' cpus='8-15' memory='109550'/> + </numa> + </cpu> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu</emulator> + <controller type='usb' index='0'/> + <controller type='pci' index='0' model='pci-root'/> + <memballoon model='virtio'/> + </devices> +</domain> diff --git a/tests/qemuxml2xmloutdata/qemuxml2xmlout-cpu-numa2.xml b/tests/qemuxml2xmloutdata/qemuxml2xmlout-cpu-numa2.xml new file mode 100644 index 0000000..227bf1c --- /dev/null +++ b/tests/qemuxml2xmloutdata/qemuxml2xmlout-cpu-numa2.xml @@ -0,0 +1,28 @@ +<domain type='qemu'> + <name>QEMUGuest1</name> + <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid> + <memory unit='KiB'>219100</memory> + <currentMemory unit='KiB'>219100</currentMemory> + <vcpu placement='static'>16</vcpu> + <os> + <type arch='x86_64' machine='pc'>hvm</type> + <boot dev='network'/> + </os> + <cpu> + <topology sockets='2' cores='4' threads='2'/> + <numa> + <cell id='0' cpus='0-7' memory='109550'/> + <cell id='1' cpus='8-15' memory='109550'/> + </numa> + </cpu> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu</emulator> + <controller type='usb' index='0'/> + <controller type='pci' index='0' model='pci-root'/> + <memballoon model='virtio'/> + </devices> +</domain> diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c index 9f919de..27281c8 100644 --- a/tests/qemuxml2xmltest.c +++ b/tests/qemuxml2xmltest.c @@ -369,6 +369,9 @@ mymain(void) DO_TEST("chardev-label"); + DO_TEST_DIFFERENT("cpu-numa1"); + DO_TEST_DIFFERENT("cpu-numa2"); + virObjectUnref(driver.caps); virObjectUnref(driver.xmlopt); -- 2.0.0

On 07/16/2014 08:42 AM, Martin Kletzander wrote:
In XML format, by definition, order of fields should not matter, so order of parsing the elements doesn't affect the end result. When specifying guest NUMA cells, we depend only on the order of the 'cell' elements. With this patch all older domain XMLs are parsed as before, but with the 'id' attribute they are parsed and formatted according to that field. This will be useful when we have tuning settings for particular guest NUMA node.
It's not always true that order doesn't matter - for the longest time, the order of <disk> under <devices> determined boot order. But I agree that the flexibility of using an id to allow arbitrary ordering can be nicer than enforcing constant ordering.
Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- docs/formatdomain.html.in | 17 +++++---- docs/schemas/domaincommon.rng | 5 +++
Looks okay to me on the schema side. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

On Wed, Jul 16, 2014 at 12:14:35PM -0600, Eric Blake wrote:
On 07/16/2014 08:42 AM, Martin Kletzander wrote:
In XML format, by definition, order of fields should not matter, so order of parsing the elements doesn't affect the end result. When specifying guest NUMA cells, we depend only on the order of the 'cell' elements. With this patch all older domain XMLs are parsed as before, but with the 'id' attribute they are parsed and formatted according to that field. This will be useful when we have tuning settings for particular guest NUMA node.
It's not always true that order doesn't matter - for the longest time, the order of <disk> under <devices> determined boot order. But I agree that the flexibility of using an id to allow arbitrary ordering can be nicer than enforcing constant ordering.
I was under the impression that it's determined by the buses the disks are connected to. Anyway even if that matters, I'm not sure we guarantee that and if we do, it should be fixed from my POV. But that's a discussion for another day.
Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- docs/formatdomain.html.in | 17 +++++---- docs/schemas/domaincommon.rng | 5 +++
Looks okay to me on the schema side.
Thanks for checking it.

There are many places with numatune-related code that should be put into special numatune_conf and this patch creates a basis for that. Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- src/Makefile.am | 3 ++- src/conf/domain_conf.h | 2 +- src/conf/numatune_conf.c | 37 ++++++++++++++++++++++++++++++ src/conf/numatune_conf.h | 54 ++++++++++++++++++++++++++++++++++++++++++++ src/libvirt_private.syms | 12 ++++++---- src/qemu/qemu_capabilities.c | 1 + src/util/virnuma.c | 13 +---------- src/util/virnuma.h | 26 ++------------------- 8 files changed, 106 insertions(+), 42 deletions(-) create mode 100644 src/conf/numatune_conf.c create mode 100644 src/conf/numatune_conf.h diff --git a/src/Makefile.am b/src/Makefile.am index a40e63f..982f63d 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -252,7 +252,8 @@ DOMAIN_CONF_SOURCES = \ conf/domain_conf.c conf/domain_conf.h \ conf/domain_audit.c conf/domain_audit.h \ conf/domain_nwfilter.c conf/domain_nwfilter.h \ - conf/snapshot_conf.c conf/snapshot_conf.h + conf/snapshot_conf.c conf/snapshot_conf.h \ + conf/numatune_conf.c conf/numatune_conf.h OBJECT_EVENT_SOURCES = \ conf/object_event.c conf/object_event.h \ diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 32674e0..fc7680c 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -36,6 +36,7 @@ # include "virhash.h" # include "virsocketaddr.h" # include "nwfilter_params.h" +# include "numatune_conf.h" # include "virnetdevmacvlan.h" # include "virsysinfo.h" # include "virnetdevvportprofile.h" @@ -46,7 +47,6 @@ # include "device_conf.h" # include "virbitmap.h" # include "virstoragefile.h" -# include "virnuma.h" # include "virseclabel.h" /* forward declarations of all device types, required by diff --git a/src/conf/numatune_conf.c b/src/conf/numatune_conf.c new file mode 100644 index 0000000..e9be040 --- /dev/null +++ b/src/conf/numatune_conf.c @@ -0,0 +1,37 @@ +/* + * numatune_conf.c + * + * Copyright (C) 2014 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * <http://www.gnu.org/licenses/>. + * + * Author: Martin Kletzander <mkletzan@redhat.com> + */ + +#include <config.h> + +#include "numatune_conf.h" + +VIR_ENUM_IMPL(virDomainNumatuneMemMode, + VIR_DOMAIN_NUMATUNE_MEM_LAST, + "strict", + "preferred", + "interleave"); + +VIR_ENUM_IMPL(virNumaTuneMemPlacementMode, + VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_LAST, + "default", + "static", + "auto"); diff --git a/src/conf/numatune_conf.h b/src/conf/numatune_conf.h new file mode 100644 index 0000000..6bdfdc0 --- /dev/null +++ b/src/conf/numatune_conf.h @@ -0,0 +1,54 @@ +/* + * numatune_conf.h + * + * Copyright (C) 2014 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * <http://www.gnu.org/licenses/>. + * + * Author: Martin Kletzander <mkletzan@redhat.com> + */ + +#ifndef __NUMATUNE_CONF_H__ +# define __NUMATUNE_CONF_H__ + +# include "internal.h" +# include "virutil.h" +# include "virbitmap.h" + +typedef enum { + VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_DEFAULT = 0, + VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_STATIC, + VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_AUTO, + + VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_LAST +} virDomainNumaTuneMemPlacementMode; + +VIR_ENUM_DECL(virNumaTuneMemPlacementMode) + +VIR_ENUM_DECL(virDomainNumatuneMemMode) + +typedef struct _virNumaTuneDef virNumaTuneDef; +typedef virNumaTuneDef *virNumaTuneDefPtr; +struct _virNumaTuneDef { + struct { + virBitmapPtr nodemask; + int mode; /* enum virDomainNumatuneMemMode */ + int placement_mode; /* enum virNumaTuneMemPlacementMode */ + } memory; /* pinning for all the memory */ + + /* Future NUMA tuning related stuff should go here. */ +}; + +#endif /* __NUMATUNE_CONF_H__ */ diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index a184268..7e0f90b 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -606,6 +606,13 @@ virNodeDeviceObjRemove; virNodeDeviceObjUnlock; +# conf/numatune_conf.h +virDomainNumatuneMemModeTypeFromString; +virDomainNumatuneMemModeTypeToString; +virNumaTuneMemPlacementModeTypeFromString; +virNumaTuneMemPlacementModeTypeToString; + + # conf/nwfilter_conf.h virNWFilterCallbackDriversLock; virNWFilterCallbackDriversUnlock; @@ -1680,8 +1687,6 @@ virNodeSuspendGetTargetMask; # util/virnuma.h -virDomainNumatuneMemModeTypeFromString; -virDomainNumatuneMemModeTypeToString; virNumaGetAutoPlacementAdvice; virNumaGetDistances; virNumaGetMaxNode; @@ -1691,8 +1696,7 @@ virNumaGetPages; virNumaIsAvailable; virNumaNodeIsAvailable; virNumaSetupMemoryPolicy; -virNumaTuneMemPlacementModeTypeFromString; -virNumaTuneMemPlacementModeTypeToString; + # util/virobject.h virClassForObject; diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index 8271e28..c5fef53 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -37,6 +37,7 @@ #include "vircommand.h" #include "virbitmap.h" #include "virnodesuspend.h" +#include "virnuma.h" #include "qemu_monitor.h" #include "virstring.h" #include "qemu_hostdev.h" diff --git a/src/util/virnuma.c b/src/util/virnuma.c index 207b804..de3f86a 100644 --- a/src/util/virnuma.c +++ b/src/util/virnuma.c @@ -1,7 +1,7 @@ /* * virnuma.c: helper APIs for managing numa * - * Copyright (C) 2011-2013 Red Hat, Inc. + * Copyright (C) 2011-2014 Red Hat, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -51,17 +51,6 @@ VIR_LOG_INIT("util.numa"); -VIR_ENUM_IMPL(virDomainNumatuneMemMode, - VIR_DOMAIN_NUMATUNE_MEM_LAST, - "strict", - "preferred", - "interleave"); - -VIR_ENUM_IMPL(virNumaTuneMemPlacementMode, - VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_LAST, - "default", - "static", - "auto"); #if HAVE_NUMAD char * diff --git a/src/util/virnuma.h b/src/util/virnuma.h index fbeb7a9..21e8125 100644 --- a/src/util/virnuma.h +++ b/src/util/virnuma.h @@ -1,7 +1,7 @@ /* * virnuma.h: helper APIs for managing numa * - * Copyright (C) 2011-2013 Red Hat, Inc. + * Copyright (C) 2011-2014 Red Hat, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -23,32 +23,10 @@ # define __VIR_NUMA_H__ # include "internal.h" +# include "numatune_conf.h" # include "virbitmap.h" # include "virutil.h" -typedef enum { - VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_DEFAULT = 0, - VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_STATIC, - VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_AUTO, - - VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_LAST -} virNumaTuneMemPlacementMode; - -VIR_ENUM_DECL(virNumaTuneMemPlacementMode) - -VIR_ENUM_DECL(virDomainNumatuneMemMode) - -typedef struct _virNumaTuneDef virNumaTuneDef; -typedef virNumaTuneDef *virNumaTuneDefPtr; -struct _virNumaTuneDef { - struct { - virBitmapPtr nodemask; - int mode; - int placement_mode; /* enum virNumaTuneMemPlacementMode */ - } memory; - - /* Future NUMA tuning related stuff should go here. */ -}; char *virNumaGetAutoPlacementAdvice(unsigned short vcups, unsigned long long balloon); -- 2.0.0

Since there was already public virDomainNumatune*, I changed the private virNumaTune to match the same, so all the uses are unified and public API is kept: s/vir\(Domain\)\?Numa[tT]une/virDomainNumatune/g then shrunk long lines, and mainly functions, that were created after that: sed -i 's/virDomainNumatuneMemPlacementMode/virDomainNumatunePlacement/g' And to cope with the enum name, I haad to change the constants as well: s/VIR_NUMA_TUNE_MEM_PLACEMENT_MODE/VIR_DOMAIN_NUMATUNE_PLACEMENT/g Last thing I did was at least a little shortening of already long name: s/virDomainNumatuneDef/virDomainNumatune/g Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- src/conf/domain_conf.c | 20 ++++++++++---------- src/conf/domain_conf.h | 2 +- src/conf/numatune_conf.c | 4 ++-- src/conf/numatune_conf.h | 20 ++++++++++---------- src/libvirt_private.syms | 4 ++-- src/lxc/lxc_cgroup.c | 4 ++-- src/lxc/lxc_controller.c | 2 +- src/lxc/lxc_native.c | 2 +- src/qemu/qemu_cgroup.c | 4 ++-- src/qemu/qemu_driver.c | 10 +++++----- src/qemu/qemu_process.c | 2 +- src/util/virnuma.c | 8 ++++---- src/util/virnuma.h | 2 +- 13 files changed, 42 insertions(+), 42 deletions(-) diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 315ea6a..f83050d 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -11773,7 +11773,7 @@ virDomainDefParseXML(xmlDocPtr xml, int placement_mode = 0; if (placement) { if ((placement_mode = - virNumaTuneMemPlacementModeTypeFromString(placement)) < 0) { + virDomainNumatunePlacementTypeFromString(placement)) < 0) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, _("Unsupported memory placement " "mode '%s'"), placement); @@ -11783,18 +11783,18 @@ virDomainDefParseXML(xmlDocPtr xml, VIR_FREE(placement); } else if (def->numatune.memory.nodemask) { /* Defaults to "static" if nodeset is specified. */ - placement_mode = VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_STATIC; + placement_mode = VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC; } else { /* Defaults to "placement" of <vcpu> if nodeset is * not specified. */ if (def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_STATIC) - placement_mode = VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_STATIC; + placement_mode = VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC; else - placement_mode = VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_AUTO; + placement_mode = VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO; } - if (placement_mode == VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_STATIC && + if (placement_mode == VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC && !def->numatune.memory.nodemask) { virReportError(VIR_ERR_XML_ERROR, "%s", _("nodeset for NUMA memory tuning must be set " @@ -11803,7 +11803,7 @@ virDomainDefParseXML(xmlDocPtr xml, } /* Ignore 'nodeset' if 'placement' is 'auto' finally */ - if (placement_mode == VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_AUTO) { + if (placement_mode == VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO) { virBitmapFree(def->numatune.memory.nodemask); def->numatune.memory.nodemask = NULL; } @@ -11811,7 +11811,7 @@ virDomainDefParseXML(xmlDocPtr xml, /* Copy 'placement' of <numatune> to <vcpu> if its 'placement' * is not specified and 'placement' of <numatune> is specified. */ - if (placement_mode == VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_AUTO && + if (placement_mode == VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO && !def->cpumask) def->placement_mode = VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO; @@ -11830,7 +11830,7 @@ virDomainDefParseXML(xmlDocPtr xml, * and 'placement' of <vcpu> is 'auto'. */ if (def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) { - def->numatune.memory.placement_mode = VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_AUTO; + def->numatune.memory.placement_mode = VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO; def->numatune.memory.mode = VIR_DOMAIN_NUMATUNE_MEM_STRICT; } } @@ -17452,13 +17452,13 @@ virDomainDefFormatInternal(virDomainDefPtr def, virBufferAsprintf(buf, "<memory mode='%s' ", mode); if (def->numatune.memory.placement_mode == - VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_STATIC) { + VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC) { if (!(nodemask = virBitmapFormat(def->numatune.memory.nodemask))) goto error; virBufferAsprintf(buf, "nodeset='%s'/>\n", nodemask); VIR_FREE(nodemask); } else if (def->numatune.memory.placement_mode) { - placement = virNumaTuneMemPlacementModeTypeToString(def->numatune.memory.placement_mode); + placement = virDomainNumatunePlacementTypeToString(def->numatune.memory.placement_mode); virBufferAsprintf(buf, "placement='%s'/>\n", placement); } virBufferAdjustIndent(buf, -2); diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index fc7680c..b0ac6b4 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -1891,7 +1891,7 @@ struct _virDomainDef { virDomainVcpuPinDefPtr emulatorpin; } cputune; - virNumaTuneDef numatune; + virDomainNumatune numatune; virDomainResourceDefPtr resource; virDomainIdMapDef idmap; diff --git a/src/conf/numatune_conf.c b/src/conf/numatune_conf.c index e9be040..14300f0 100644 --- a/src/conf/numatune_conf.c +++ b/src/conf/numatune_conf.c @@ -30,8 +30,8 @@ VIR_ENUM_IMPL(virDomainNumatuneMemMode, "preferred", "interleave"); -VIR_ENUM_IMPL(virNumaTuneMemPlacementMode, - VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_LAST, +VIR_ENUM_IMPL(virDomainNumatunePlacement, + VIR_DOMAIN_NUMATUNE_PLACEMENT_LAST, "default", "static", "auto"); diff --git a/src/conf/numatune_conf.h b/src/conf/numatune_conf.h index 6bdfdc0..baac7bf 100644 --- a/src/conf/numatune_conf.h +++ b/src/conf/numatune_conf.h @@ -28,24 +28,24 @@ # include "virbitmap.h" typedef enum { - VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_DEFAULT = 0, - VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_STATIC, - VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_AUTO, + VIR_DOMAIN_NUMATUNE_PLACEMENT_DEFAULT = 0, + VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC, + VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO, - VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_LAST -} virDomainNumaTuneMemPlacementMode; + VIR_DOMAIN_NUMATUNE_PLACEMENT_LAST +} virDomainNumatunePlacement; -VIR_ENUM_DECL(virNumaTuneMemPlacementMode) +VIR_ENUM_DECL(virDomainNumatunePlacement) VIR_ENUM_DECL(virDomainNumatuneMemMode) -typedef struct _virNumaTuneDef virNumaTuneDef; -typedef virNumaTuneDef *virNumaTuneDefPtr; -struct _virNumaTuneDef { +typedef struct _virDomainNumatune virDomainNumatune; +typedef virDomainNumatune *virDomainNumatunePtr; +struct _virDomainNumatune { struct { virBitmapPtr nodemask; int mode; /* enum virDomainNumatuneMemMode */ - int placement_mode; /* enum virNumaTuneMemPlacementMode */ + int placement_mode; /* enum virDomainNumatunePlacement */ } memory; /* pinning for all the memory */ /* Future NUMA tuning related stuff should go here. */ diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 7e0f90b..cf8bb91 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -609,8 +609,8 @@ virNodeDeviceObjUnlock; # conf/numatune_conf.h virDomainNumatuneMemModeTypeFromString; virDomainNumatuneMemModeTypeToString; -virNumaTuneMemPlacementModeTypeFromString; -virNumaTuneMemPlacementModeTypeToString; +virDomainNumatunePlacementTypeFromString; +virDomainNumatunePlacementTypeToString; # conf/nwfilter_conf.h diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c index 00ff807..b584e33 100644 --- a/src/lxc/lxc_cgroup.c +++ b/src/lxc/lxc_cgroup.c @@ -81,10 +81,10 @@ static int virLXCCgroupSetupCpusetTune(virDomainDefPtr def, if ((def->numatune.memory.nodemask || (def->numatune.memory.placement_mode == - VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_AUTO)) && + VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO)) && def->numatune.memory.mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) { if (def->numatune.memory.placement_mode == - VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_AUTO) + VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO) mask = virBitmapFormat(nodemask); else mask = virBitmapFormat(def->numatune.memory.nodemask); diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index 4aa7e02..03b1d31 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -656,7 +656,7 @@ static int virLXCControllerGetNumadAdvice(virLXCControllerPtr ctrl, if ((ctrl->def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) || (ctrl->def->numatune.memory.placement_mode == - VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_AUTO)) { + VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO)) { nodeset = virNumaGetAutoPlacementAdvice(ctrl->def->vcpus, ctrl->def->mem.cur_balloon); if (!nodeset) diff --git a/src/lxc/lxc_native.c b/src/lxc/lxc_native.c index f4c4556..30f7e55 100644 --- a/src/lxc/lxc_native.c +++ b/src/lxc/lxc_native.c @@ -720,7 +720,7 @@ lxcSetCpusetTune(virDomainDefPtr def, virConfPtr properties) if ((value = virConfGetValue(properties, "lxc.cgroup.cpuset.mems")) && value->str) { - def->numatune.memory.placement_mode = VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_STATIC; + def->numatune.memory.placement_mode = VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC; def->numatune.memory.mode = VIR_DOMAIN_NUMATUNE_MEM_STRICT; if (virBitmapParse(value->str, 0, &def->numatune.memory.nodemask, VIR_DOMAIN_CPUMASK_LEN) < 0) diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 79f5f55..a24975b 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -603,11 +603,11 @@ qemuSetupCpusetCgroup(virDomainObjPtr vm, if ((vm->def->numatune.memory.nodemask || (vm->def->numatune.memory.placement_mode == - VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_AUTO)) && + VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO)) && vm->def->numatune.memory.mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) { if (vm->def->numatune.memory.placement_mode == - VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_AUTO) + VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO) mem_mask = virBitmapFormat(nodemask); else mem_mask = virBitmapFormat(vm->def->numatune.memory.nodemask); diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 1652744..85c3684 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -8754,8 +8754,8 @@ qemuDomainSetNumaParameters(virDomainPtr dom, if (STREQ(param->field, VIR_DOMAIN_NUMA_MODE)) { int mode = param->value.i; - if (mode >= VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_LAST || - mode < VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_DEFAULT) + if (mode >= VIR_DOMAIN_NUMATUNE_PLACEMENT_LAST || + mode < VIR_DOMAIN_NUMATUNE_PLACEMENT_DEFAULT) { virReportError(VIR_ERR_INVALID_ARG, _("unsupported numa_mode: '%d'"), mode); @@ -8791,7 +8791,7 @@ qemuDomainSetNumaParameters(virDomainPtr dom, virBitmapFree(vm->def->numatune.memory.nodemask); vm->def->numatune.memory.placement_mode = - VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_STATIC; + VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC; vm->def->numatune.memory.nodemask = virBitmapNewCopy(nodeset); } @@ -8800,7 +8800,7 @@ qemuDomainSetNumaParameters(virDomainPtr dom, persistentDef->numatune.memory.nodemask = nodeset; persistentDef->numatune.memory.placement_mode = - VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_STATIC; + VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC; nodeset = NULL; } virBitmapFree(nodeset); @@ -8810,7 +8810,7 @@ qemuDomainSetNumaParameters(virDomainPtr dom, if (flags & VIR_DOMAIN_AFFECT_CONFIG) { if (!persistentDef->numatune.memory.placement_mode) persistentDef->numatune.memory.placement_mode = - VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_AUTO; + VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO; if (virDomainSaveConfig(cfg->configDir, persistentDef) < 0) goto cleanup; } diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index ccc571b..d8bb679 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -3901,7 +3901,7 @@ int qemuProcessStart(virConnectPtr conn, if ((vm->def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) || (vm->def->numatune.memory.placement_mode == - VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_AUTO)) { + VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO)) { nodeset = virNumaGetAutoPlacementAdvice(vm->def->vcpus, vm->def->mem.max_balloon); if (!nodeset) diff --git a/src/util/virnuma.c b/src/util/virnuma.c index de3f86a..bc5180a 100644 --- a/src/util/virnuma.c +++ b/src/util/virnuma.c @@ -87,7 +87,7 @@ virNumaGetAutoPlacementAdvice(unsigned short vcpus ATTRIBUTE_UNUSED, #if WITH_NUMACTL int -virNumaSetupMemoryPolicy(virNumaTuneDef numatune, +virNumaSetupMemoryPolicy(virDomainNumatune numatune, virBitmapPtr nodemask) { nodemask_t mask; @@ -100,13 +100,13 @@ virNumaSetupMemoryPolicy(virNumaTuneDef numatune, virBitmapPtr tmp_nodemask = NULL; if (numatune.memory.placement_mode == - VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_STATIC) { + VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC) { if (!numatune.memory.nodemask) return 0; VIR_DEBUG("Set NUMA memory policy with specified nodeset"); tmp_nodemask = numatune.memory.nodemask; } else if (numatune.memory.placement_mode == - VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_AUTO) { + VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO) { VIR_DEBUG("Set NUMA memory policy with advisory nodeset from numad"); tmp_nodemask = nodemask; } else { @@ -327,7 +327,7 @@ virNumaGetNodeCPUs(int node, #else int -virNumaSetupMemoryPolicy(virNumaTuneDef numatune, +virNumaSetupMemoryPolicy(virDomainNumatune numatune, virBitmapPtr nodemask ATTRIBUTE_UNUSED) { if (numatune.memory.nodemask) { diff --git a/src/util/virnuma.h b/src/util/virnuma.h index 21e8125..d2ab6af 100644 --- a/src/util/virnuma.h +++ b/src/util/virnuma.h @@ -31,7 +31,7 @@ char *virNumaGetAutoPlacementAdvice(unsigned short vcups, unsigned long long balloon); -int virNumaSetupMemoryPolicy(virNumaTuneDef numatune, +int virNumaSetupMemoryPolicy(virDomainNumatune numatune, virBitmapPtr nodemask); bool virNumaIsAvailable(void); -- 2.0.0

There were numerous places where numatune configuration (and thus domain config as well) was changed in different ways. On some places this even resulted in persistent domain definition not to be stable (it would change with daemon's restart). In order to uniformly change how numatune config is dealt with, all the internals are now accessible directly only in numatune_conf.c and outside this file accessors must be used. Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- po/POTFILES.in | 1 + src/conf/domain_conf.c | 159 ++--------- src/conf/domain_conf.h | 8 +- src/conf/numatune_conf.c | 318 +++++++++++++++++++++ src/conf/numatune_conf.h | 72 ++++- src/libvirt_private.syms | 11 + src/lxc/lxc_cgroup.c | 19 +- src/lxc/lxc_controller.c | 5 +- src/lxc/lxc_native.c | 15 +- src/parallels/parallels_driver.c | 7 +- src/qemu/qemu_cgroup.c | 23 +- src/qemu/qemu_driver.c | 84 +++--- src/qemu/qemu_process.c | 8 +- src/util/virnuma.c | 48 ++-- src/util/virnuma.h | 2 +- .../qemuxml2argv-numatune-auto-prefer.xml | 29 ++ .../qemuxml2xmlout-numatune-auto-prefer.xml | 29 ++ tests/qemuxml2xmltest.c | 2 + 18 files changed, 555 insertions(+), 285 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-numatune-auto-prefer.xml create mode 100644 tests/qemuxml2xmloutdata/qemuxml2xmlout-numatune-auto-prefer.xml diff --git a/po/POTFILES.in b/po/POTFILES.in index fd4b56e..d10e892 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -25,6 +25,7 @@ src/conf/netdev_vlan_conf.c src/conf/netdev_vport_profile_conf.c src/conf/network_conf.c src/conf/node_device_conf.c +src/conf/numatune_conf.c src/conf/nwfilter_conf.c src/conf/nwfilter_params.c src/conf/object_event.c diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index f83050d..d34d94c 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -2097,7 +2097,7 @@ void virDomainDefFree(virDomainDefPtr def) virDomainVcpuPinDefFree(def->cputune.emulatorpin); - virBitmapFree(def->numatune.memory.nodemask); + virDomainNumatuneFree(def->numatune); virSysinfoDefFree(def->sysinfo); @@ -11256,7 +11256,6 @@ virDomainDefParseXML(xmlDocPtr xml, unsigned long count; bool uuid_generated = false; virHashTablePtr bootHash = NULL; - xmlNodePtr cur; bool usb_none = false; bool usb_other = false; bool usb_master = false; @@ -11718,123 +11717,8 @@ virDomainDefParseXML(xmlDocPtr xml, } } - /* Extract numatune if exists. */ - if ((n = virXPathNodeSet("./numatune", ctxt, &nodes)) < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, - "%s", _("cannot extract numatune nodes")); + if (virDomainNumatuneParseXML(def, ctxt) < 0) goto error; - } - - if (n > 1) { - virReportError(VIR_ERR_XML_ERROR, "%s", - _("only one numatune is supported")); - VIR_FREE(nodes); - goto error; - } - - if (n) { - cur = nodes[0]->children; - while (cur != NULL) { - if (cur->type == XML_ELEMENT_NODE) { - if (xmlStrEqual(cur->name, BAD_CAST "memory")) { - char *mode = NULL; - char *placement = NULL; - char *nodeset = NULL; - - mode = virXMLPropString(cur, "mode"); - if (mode) { - if ((def->numatune.memory.mode = - virDomainNumatuneMemModeTypeFromString(mode)) < 0) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, - _("Unsupported NUMA memory " - "tuning mode '%s'"), - mode); - VIR_FREE(mode); - goto error; - } - VIR_FREE(mode); - } else { - def->numatune.memory.mode = VIR_DOMAIN_NUMATUNE_MEM_STRICT; - } - - nodeset = virXMLPropString(cur, "nodeset"); - if (nodeset) { - if (virBitmapParse(nodeset, - 0, - &def->numatune.memory.nodemask, - VIR_DOMAIN_CPUMASK_LEN) < 0) { - VIR_FREE(nodeset); - goto error; - } - VIR_FREE(nodeset); - } - - placement = virXMLPropString(cur, "placement"); - int placement_mode = 0; - if (placement) { - if ((placement_mode = - virDomainNumatunePlacementTypeFromString(placement)) < 0) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, - _("Unsupported memory placement " - "mode '%s'"), placement); - VIR_FREE(placement); - goto error; - } - VIR_FREE(placement); - } else if (def->numatune.memory.nodemask) { - /* Defaults to "static" if nodeset is specified. */ - placement_mode = VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC; - } else { - /* Defaults to "placement" of <vcpu> if nodeset is - * not specified. - */ - if (def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_STATIC) - placement_mode = VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC; - else - placement_mode = VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO; - } - - if (placement_mode == VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC && - !def->numatune.memory.nodemask) { - virReportError(VIR_ERR_XML_ERROR, "%s", - _("nodeset for NUMA memory tuning must be set " - "if 'placement' is 'static'")); - goto error; - } - - /* Ignore 'nodeset' if 'placement' is 'auto' finally */ - if (placement_mode == VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO) { - virBitmapFree(def->numatune.memory.nodemask); - def->numatune.memory.nodemask = NULL; - } - - /* Copy 'placement' of <numatune> to <vcpu> if its 'placement' - * is not specified and 'placement' of <numatune> is specified. - */ - if (placement_mode == VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO && - !def->cpumask) - def->placement_mode = VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO; - - def->numatune.memory.placement_mode = placement_mode; - } else { - virReportError(VIR_ERR_XML_ERROR, - _("unsupported XML element %s"), - (const char *)cur->name); - goto error; - } - } - cur = cur->next; - } - } else { - /* Defaults NUMA memory placement mode to 'auto' if no <numatune> - * and 'placement' of <vcpu> is 'auto'. - */ - if (def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) { - def->numatune.memory.placement_mode = VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO; - def->numatune.memory.mode = VIR_DOMAIN_NUMATUNE_MEM_STRICT; - } - } - VIR_FREE(nodes); if ((n = virXPathNodeSet("./resource", ctxt, &nodes)) < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, @@ -17440,30 +17324,8 @@ virDomainDefFormatInternal(virDomainDefPtr def, def->cputune.emulator_period || def->cputune.emulator_quota) virBufferAddLit(buf, "</cputune>\n"); - if (def->numatune.memory.nodemask || - def->numatune.memory.placement_mode) { - const char *mode; - char *nodemask = NULL; - const char *placement; - - virBufferAddLit(buf, "<numatune>\n"); - virBufferAdjustIndent(buf, 2); - mode = virDomainNumatuneMemModeTypeToString(def->numatune.memory.mode); - virBufferAsprintf(buf, "<memory mode='%s' ", mode); - - if (def->numatune.memory.placement_mode == - VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC) { - if (!(nodemask = virBitmapFormat(def->numatune.memory.nodemask))) - goto error; - virBufferAsprintf(buf, "nodeset='%s'/>\n", nodemask); - VIR_FREE(nodemask); - } else if (def->numatune.memory.placement_mode) { - placement = virDomainNumatunePlacementTypeToString(def->numatune.memory.placement_mode); - virBufferAsprintf(buf, "placement='%s'/>\n", placement); - } - virBufferAdjustIndent(buf, -2); - virBufferAddLit(buf, "</numatune>\n"); - } + if (virDomainNumatuneFormatXML(buf, def->numatune) < 0) + goto error; if (def->resource) virDomainResourceDefFormat(buf, def->resource); @@ -19760,3 +19622,16 @@ virDomainObjSetMetadata(virDomainObjPtr vm, return 0; } + + +bool +virDomainDefNeedsPlacementAdvice(virDomainDefPtr def) +{ + if (def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) + return true; + + if (virDomainNumatuneHasPlacementAuto(def->numatune)) + return true; + + return false; +} diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index b0ac6b4..9dc57d9 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -67,6 +67,9 @@ typedef virDomainFSDef *virDomainFSDefPtr; typedef struct _virDomainNetDef virDomainNetDef; typedef virDomainNetDef *virDomainNetDefPtr; +typedef struct _virDomainNumatune virDomainNumatune; +typedef virDomainNumatune *virDomainNumatunePtr; + typedef struct _virDomainInputDef virDomainInputDef; typedef virDomainInputDef *virDomainInputDefPtr; @@ -1891,7 +1894,7 @@ struct _virDomainDef { virDomainVcpuPinDefPtr emulatorpin; } cputune; - virDomainNumatune numatune; + virDomainNumatunePtr numatune; virDomainResourceDefPtr resource; virDomainIdMapDef idmap; @@ -2711,4 +2714,7 @@ int virDomainObjSetMetadata(virDomainObjPtr vm, const char *configDir, unsigned int flags); +bool virDomainDefNeedsPlacementAdvice(virDomainDefPtr def) + ATTRIBUTE_NONNULL(1); + #endif /* __DOMAIN_CONF_H */ diff --git a/src/conf/numatune_conf.c b/src/conf/numatune_conf.c index 14300f0..6ce1e2d 100644 --- a/src/conf/numatune_conf.c +++ b/src/conf/numatune_conf.c @@ -24,6 +24,12 @@ #include "numatune_conf.h" +#include "domain_conf.h" +#include "viralloc.h" +#include "virstring.h" + +#define VIR_FROM_THIS VIR_FROM_DOMAIN + VIR_ENUM_IMPL(virDomainNumatuneMemMode, VIR_DOMAIN_NUMATUNE_MEM_LAST, "strict", @@ -35,3 +41,315 @@ VIR_ENUM_IMPL(virDomainNumatunePlacement, "default", "static", "auto"); + +struct _virDomainNumatune { + struct { + virBitmapPtr nodeset; + virDomainNumatuneMemMode mode; + virDomainNumatunePlacement placement; + } memory; /* pinning for all the memory */ + + /* Future NUMA tuning related stuff should go here. */ +}; + + +int +virDomainNumatuneParseXML(virDomainDefPtr def, + xmlXPathContextPtr ctxt) +{ + char *tmp = NULL; + int mode = -1; + int n = 0; + int placement = -1; + int ret = -1; + virBitmapPtr nodeset = NULL; + xmlNodePtr node = NULL; + + if (virXPathInt("count(./numatune)", ctxt, &n) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("cannot extract numatune nodes")); + goto cleanup; + } else if (n > 1) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("only one numatune is supported")); + goto cleanup; + } + + node = virXPathNode("./numatune/memory[1]", ctxt); + + if (def->numatune) { + virDomainNumatuneFree(def->numatune); + def->numatune = NULL; + } + + if (!node && def->placement_mode != VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) + return 0; + + if (!node) { + /* We know that def->placement_mode is "auto" if we're here */ + if (virDomainNumatuneSet(def, VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO, + -1, NULL) < 0) + goto cleanup; + return 0; + } + + tmp = virXMLPropString(node, "mode"); + if (tmp) { + mode = virDomainNumatuneMemModeTypeFromString(tmp); + if (mode < 0) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Unsupported NUMA memory tuning mode '%s'"), + tmp); + goto cleanup; + } + } + VIR_FREE(tmp); + + tmp = virXMLPropString(node, "placement"); + if (tmp) { + placement = virDomainNumatunePlacementTypeFromString(tmp); + if (placement < 0) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Unsupported NUMA memory placement mode '%s'"), + tmp); + goto cleanup; + } + } + VIR_FREE(tmp); + + tmp = virXMLPropString(node, "nodeset"); + if (tmp && virBitmapParse(tmp, 0, &nodeset, VIR_DOMAIN_CPUMASK_LEN) < 0) + goto cleanup; + VIR_FREE(tmp); + + if (virDomainNumatuneSet(def, placement, mode, nodeset) < 0) + goto cleanup; + + if (!n) { + ret = 0; + goto cleanup; + } + + ret = 0; + cleanup: + virBitmapFree(nodeset); + VIR_FREE(tmp); + return ret; +} + +int +virDomainNumatuneFormatXML(virBufferPtr buf, + virDomainNumatunePtr numatune) +{ + const char *tmp = NULL; + char *nodeset = NULL; + + if (!numatune) + return 0; + + virBufferAddLit(buf, "<numatune>\n"); + virBufferAdjustIndent(buf, 2); + + tmp = virDomainNumatuneMemModeTypeToString(numatune->memory.mode); + virBufferAsprintf(buf, "<memory mode='%s' ", tmp); + + if (numatune->memory.placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC) { + if (!(nodeset = virBitmapFormat(numatune->memory.nodeset))) + return -1; + virBufferAsprintf(buf, "nodeset='%s'/>\n", nodeset); + VIR_FREE(nodeset); + } else if (numatune->memory.placement) { + tmp = virDomainNumatunePlacementTypeToString(numatune->memory.placement); + virBufferAsprintf(buf, "placement='%s'/>\n", tmp); + } + + virBufferAdjustIndent(buf, -2); + virBufferAddLit(buf, "</numatune>\n"); + return 0; +} + +void +virDomainNumatuneFree(virDomainNumatunePtr numatune) +{ + if (!numatune) + return; + + virBitmapFree(numatune->memory.nodeset); + + VIR_FREE(numatune); +} + +virDomainNumatuneMemMode +virDomainNumatuneGetMode(virDomainNumatunePtr numatune) +{ + return numatune ? numatune->memory.mode : 0; +} + +virBitmapPtr +virDomainNumatuneGetNodeset(virDomainNumatunePtr numatune, + virBitmapPtr auto_nodeset) +{ + if (!numatune) + return NULL; + + if (numatune->memory.placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO) + return auto_nodeset; + + /* + * This weird logic has the same meaning as switch with + * auto/static/default, but can be more readably changed later. + */ + if (numatune->memory.placement != VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC) + return NULL; + + return numatune->memory.nodeset; +} + +char * +virDomainNumatuneFormatNodeset(virDomainNumatunePtr numatune, + virBitmapPtr auto_nodeset) +{ + return virBitmapFormat(virDomainNumatuneGetNodeset(numatune, + auto_nodeset)); +} + +int +virDomainNumatuneMaybeFormatNodeset(virDomainNumatunePtr numatune, + virBitmapPtr auto_nodeset, + char **mask) +{ + *mask = NULL; + + if (!numatune) + return 0; + + if (numatune->memory.placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO && + !auto_nodeset) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Advice from numad is needed in case of " + "automatic numa placement")); + return -1; + } + + *mask = virDomainNumatuneFormatNodeset(numatune, auto_nodeset); + if (!*mask) + return -1; + + return 0; +} + +int +virDomainNumatuneSet(virDomainDefPtr def, + int placement, + int mode, + virBitmapPtr nodeset) +{ + bool create = !def->numatune; /* Whether we are creating new struct */ + int ret = -1; + virDomainNumatunePtr numatune = NULL; + + /* No need to do anything in this case */ + if (mode == -1 && placement == -1 && !nodeset) + return 0; + + /* Range checks */ + if (mode != -1 && + (mode < 0 || mode >= VIR_DOMAIN_NUMATUNE_MEM_LAST)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Unsupported numatune mode '%d'"), + mode); + goto cleanup; + } + if (placement != -1 && + (placement < 0 || placement >= VIR_DOMAIN_NUMATUNE_PLACEMENT_LAST)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Unsupported numatune placement '%d'"), + mode); + goto cleanup; + } + + if (create && VIR_ALLOC(def->numatune) < 0) + goto cleanup; + numatune = def->numatune; + + if (create) { + /* Defaults for new struct */ + if (mode == -1) + mode = VIR_DOMAIN_NUMATUNE_MEM_STRICT; + + if (placement == -1) + placement = VIR_DOMAIN_NUMATUNE_PLACEMENT_DEFAULT; + } + + if (mode != -1) + numatune->memory.mode = mode; + if (nodeset) { + virBitmapFree(numatune->memory.nodeset); + numatune->memory.nodeset = virBitmapNewCopy(nodeset); + if (!numatune->memory.nodeset) + goto cleanup; + if (placement == -1) + placement = VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC; + } + + if (placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_DEFAULT) { + if (numatune->memory.nodeset || + def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_STATIC) + placement = VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC; + else + placement = VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO; + } + + if (placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC && + !numatune->memory.nodeset) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("nodeset for NUMA memory tuning must be set " + "if 'placement' is 'static'")); + goto cleanup; + } + + if (placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO) { + virBitmapFree(numatune->memory.nodeset); + numatune->memory.nodeset = NULL; + if (!def->cpumask) + def->placement_mode = VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO; + } + + if (placement != -1) + numatune->memory.placement = placement; + + ret = 0; + cleanup: + return ret; +} + +bool +virDomainNumatuneEquals(virDomainNumatunePtr n1, + virDomainNumatunePtr n2) +{ + if (!n1 && !n2) + return true; + + if (!n1 || !n2) + return false; + + if (n1->memory.mode != n2->memory.mode) + return false; + + if (n1->memory.placement != n2->memory.placement) + return false; + + return virBitmapEqual(n1->memory.nodeset, n2->memory.nodeset); +} + +bool +virDomainNumatuneHasPlacementAuto(virDomainNumatunePtr numatune) +{ + if (!numatune) + return false; + + if (numatune->memory.placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO) + return true; + + return false; +} diff --git a/src/conf/numatune_conf.h b/src/conf/numatune_conf.h index baac7bf..888cff1 100644 --- a/src/conf/numatune_conf.h +++ b/src/conf/numatune_conf.h @@ -23,9 +23,25 @@ #ifndef __NUMATUNE_CONF_H__ # define __NUMATUNE_CONF_H__ +# include <libxml/xpath.h> + # include "internal.h" # include "virutil.h" # include "virbitmap.h" +# include "virbuffer.h" + +/* + * Since numatune configuration is closely bound to the whole config, + * and because we don't have separate domain_conf headers for + * typedefs, structs and functions, we need to have a forward + * declaration here for virDomainDef due to circular dependencies. + */ +typedef struct _virDomainDef virDomainDef; +typedef virDomainDef *virDomainDefPtr; + + +typedef struct _virDomainNumatune virDomainNumatune; +typedef virDomainNumatune *virDomainNumatunePtr; typedef enum { VIR_DOMAIN_NUMATUNE_PLACEMENT_DEFAULT = 0, @@ -36,19 +52,51 @@ typedef enum { } virDomainNumatunePlacement; VIR_ENUM_DECL(virDomainNumatunePlacement) - VIR_ENUM_DECL(virDomainNumatuneMemMode) -typedef struct _virDomainNumatune virDomainNumatune; -typedef virDomainNumatune *virDomainNumatunePtr; -struct _virDomainNumatune { - struct { - virBitmapPtr nodemask; - int mode; /* enum virDomainNumatuneMemMode */ - int placement_mode; /* enum virDomainNumatunePlacement */ - } memory; /* pinning for all the memory */ - - /* Future NUMA tuning related stuff should go here. */ -}; + +void virDomainNumatuneFree(virDomainNumatunePtr numatune); + +/* + * XML Parse/Format functions + */ +int virDomainNumatuneParseXML(virDomainDefPtr def, xmlXPathContextPtr ctxt) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2); + +int virDomainNumatuneFormatXML(virBufferPtr buf, virDomainNumatunePtr numatune) + ATTRIBUTE_NONNULL(1); + +/* + * Getters + */ +virDomainNumatuneMemMode virDomainNumatuneGetMode(virDomainNumatunePtr numatune); + +virBitmapPtr virDomainNumatuneGetNodeset(virDomainNumatunePtr numatune, + virBitmapPtr auto_nodeset); + +/* + * Formatters + */ +char *virDomainNumatuneFormatNodeset(virDomainNumatunePtr numatune, + virBitmapPtr auto_nodeset); + +int virDomainNumatuneMaybeFormatNodeset(virDomainNumatunePtr numatune, + virBitmapPtr auto_nodeset, + char **mask); + +/* + * Setters + */ +int virDomainNumatuneSet(virDomainDefPtr def, int placement, + int mode, virBitmapPtr nodeset) + ATTRIBUTE_NONNULL(1); + +/* + * Other accessors + */ +bool virDomainNumatuneEquals(virDomainNumatunePtr n1, + virDomainNumatunePtr n2); + +bool virDomainNumatuneHasPlacementAuto(virDomainNumatunePtr numatune); #endif /* __NUMATUNE_CONF_H__ */ diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index cf8bb91..0494d9d 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -200,6 +200,7 @@ virDomainDefGetDefaultEmulator; virDomainDefGetSecurityLabelDef; virDomainDefMaybeAddController; virDomainDefMaybeAddInput; +virDomainDefNeedsPlacementAdvice; virDomainDefNew; virDomainDefParseFile; virDomainDefParseNode; @@ -607,10 +608,20 @@ virNodeDeviceObjUnlock; # conf/numatune_conf.h +virDomainNumatuneEquals; +virDomainNumatuneFormatNodeset; +virDomainNumatuneFormatXML; +virDomainNumatuneFree; +virDomainNumatuneGetMode; +virDomainNumatuneGetNodeset; +virDomainNumatuneHasPlacementAuto; +virDomainNumatuneMaybeFormatNodeset; virDomainNumatuneMemModeTypeFromString; virDomainNumatuneMemModeTypeToString; +virDomainNumatuneParseXML; virDomainNumatunePlacementTypeFromString; virDomainNumatunePlacementTypeToString; +virDomainNumatuneSet; # conf/nwfilter_conf.h diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c index b584e33..cc93823 100644 --- a/src/lxc/lxc_cgroup.c +++ b/src/lxc/lxc_cgroup.c @@ -79,22 +79,11 @@ static int virLXCCgroupSetupCpusetTune(virDomainDefPtr def, goto cleanup; } - if ((def->numatune.memory.nodemask || - (def->numatune.memory.placement_mode == - VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO)) && - def->numatune.memory.mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) { - if (def->numatune.memory.placement_mode == - VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO) - mask = virBitmapFormat(nodemask); - else - mask = virBitmapFormat(def->numatune.memory.nodemask); - - if (!mask) - return -1; + if (virDomainNumatuneMaybeFormatNodeset(def->numatune, nodemask, &mask) < 0) + goto cleanup; - if (virCgroupSetCpusetMems(cgroup, mask) < 0) - goto cleanup; - } + if (mask && virCgroupSetCpusetMems(cgroup, mask) < 0) + goto cleanup; ret = 0; cleanup: diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index 03b1d31..fd93f47 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -653,10 +653,7 @@ static int virLXCControllerGetNumadAdvice(virLXCControllerPtr ctrl, /* Get the advisory nodeset from numad if 'placement' of * either <vcpu> or <numatune> is 'auto'. */ - if ((ctrl->def->placement_mode == - VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) || - (ctrl->def->numatune.memory.placement_mode == - VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO)) { + if (virDomainDefNeedsPlacementAdvice(ctrl->def)) { nodeset = virNumaGetAutoPlacementAdvice(ctrl->def->vcpus, ctrl->def->mem.cur_balloon); if (!nodeset) diff --git a/src/lxc/lxc_native.c b/src/lxc/lxc_native.c index 30f7e55..bb15a36 100644 --- a/src/lxc/lxc_native.c +++ b/src/lxc/lxc_native.c @@ -1,6 +1,7 @@ /* * lxc_native.c: LXC native configuration import * + * Copyright (c) 2014 Red Hat, Inc. * Copyright (c) 2013 SUSE LINUX Products GmbH, Nuernberg, Germany. * * This library is free software; you can redistribute it and/or @@ -708,6 +709,7 @@ static int lxcSetCpusetTune(virDomainDefPtr def, virConfPtr properties) { virConfValuePtr value; + virBitmapPtr nodeset = NULL; if ((value = virConfGetValue(properties, "lxc.cgroup.cpuset.cpus")) && value->str) { @@ -719,12 +721,15 @@ lxcSetCpusetTune(virDomainDefPtr def, virConfPtr properties) } if ((value = virConfGetValue(properties, "lxc.cgroup.cpuset.mems")) && - value->str) { - def->numatune.memory.placement_mode = VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC; - def->numatune.memory.mode = VIR_DOMAIN_NUMATUNE_MEM_STRICT; - if (virBitmapParse(value->str, 0, &def->numatune.memory.nodemask, - VIR_DOMAIN_CPUMASK_LEN) < 0) + value->str) { + if (virBitmapParse(value->str, 0, &nodeset, VIR_DOMAIN_CPUMASK_LEN) < 0) return -1; + if (virDomainNumatuneSet(def, VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC, + VIR_DOMAIN_NUMATUNE_MEM_STRICT, nodeset) < 0) { + virBitmapFree(nodeset); + return -1; + } + virBitmapFree(nodeset); } return 0; diff --git a/src/parallels/parallels_driver.c b/src/parallels/parallels_driver.c index 43568d1..a503dea 100644 --- a/src/parallels/parallels_driver.c +++ b/src/parallels/parallels_driver.c @@ -2078,12 +2078,7 @@ parallelsApplyChanges(virDomainObjPtr dom, virDomainDefPtr new) return -1; } - if (old->numatune.memory.mode != new->numatune.memory.mode || - old->numatune.memory.placement_mode != new->numatune.memory.placement_mode || - ((old->numatune.memory.nodemask != NULL || new->numatune.memory.nodemask != NULL) && - (old->numatune.memory.nodemask == NULL || new->numatune.memory.nodemask == NULL || - !virBitmapEqual(old->numatune.memory.nodemask, new->numatune.memory.nodemask)))){ - + if (!virDomainNumatuneEquals(old->numatune, new->numatune)) { virReportError(VIR_ERR_ARGUMENT_UNSUPPORTED, "%s", _("numa parameters are not supported " "by parallels driver")); diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index a24975b..dd393ee 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -601,23 +601,14 @@ qemuSetupCpusetCgroup(virDomainObjPtr vm, if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) return 0; - if ((vm->def->numatune.memory.nodemask || - (vm->def->numatune.memory.placement_mode == - VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO)) && - vm->def->numatune.memory.mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) { - - if (vm->def->numatune.memory.placement_mode == - VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO) - mem_mask = virBitmapFormat(nodemask); - else - mem_mask = virBitmapFormat(vm->def->numatune.memory.nodemask); - - if (!mem_mask) - goto cleanup; + if (virDomainNumatuneMaybeFormatNodeset(vm->def->numatune, + nodemask, + &mem_mask) < 0) + goto cleanup; - if (virCgroupSetCpusetMems(priv->cgroup, mem_mask) < 0) - goto cleanup; - } + if (mem_mask && + virCgroupSetCpusetMems(priv->cgroup, mem_mask) < 0) + goto cleanup; if (vm->def->cpumask || (vm->def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO)) { diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 85c3684..e166ca4 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -8637,7 +8637,8 @@ qemuDomainSetNumaParamsLive(virDomainObjPtr vm, size_t i = 0; int ret = -1; - if (vm->def->numatune.memory.mode != VIR_DOMAIN_NUMATUNE_MEM_STRICT) { + if (virDomainNumatuneGetMode(vm->def->numatune) != + VIR_DOMAIN_NUMATUNE_MEM_STRICT) { virReportError(VIR_ERR_OPERATION_INVALID, "%s", _("change of nodeset for running domain " "requires strict numa mode")); @@ -8712,6 +8713,8 @@ qemuDomainSetNumaParameters(virDomainPtr dom, virQEMUDriverConfigPtr cfg = NULL; virCapsPtr caps = NULL; qemuDomainObjPrivatePtr priv; + virBitmapPtr nodeset = NULL; + int mode = -1; virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | VIR_DOMAIN_AFFECT_CONFIG, -1); @@ -8752,65 +8755,47 @@ qemuDomainSetNumaParameters(virDomainPtr dom, virTypedParameterPtr param = ¶ms[i]; if (STREQ(param->field, VIR_DOMAIN_NUMA_MODE)) { - int mode = param->value.i; + mode = param->value.i; - if (mode >= VIR_DOMAIN_NUMATUNE_PLACEMENT_LAST || - mode < VIR_DOMAIN_NUMATUNE_PLACEMENT_DEFAULT) - { + if (mode < 0 || mode >= VIR_DOMAIN_NUMATUNE_MEM_LAST) { virReportError(VIR_ERR_INVALID_ARG, - _("unsupported numa_mode: '%d'"), mode); + _("unsupported numatune mode: '%d'"), mode); goto cleanup; } - if ((flags & VIR_DOMAIN_AFFECT_LIVE) && - vm->def->numatune.memory.mode != mode) { - virReportError(VIR_ERR_OPERATION_INVALID, "%s", - _("can't change numa mode for running domain")); - goto cleanup; - } - - if (flags & VIR_DOMAIN_AFFECT_CONFIG) - persistentDef->numatune.memory.mode = mode; - } else if (STREQ(param->field, VIR_DOMAIN_NUMA_NODESET)) { - virBitmapPtr nodeset = NULL; - if (virBitmapParse(param->value.s, 0, &nodeset, - VIR_DOMAIN_CPUMASK_LEN) < 0) { + VIR_DOMAIN_CPUMASK_LEN) < 0) goto cleanup; - } - if (flags & VIR_DOMAIN_AFFECT_LIVE) { - if (qemuDomainSetNumaParamsLive(vm, caps, nodeset) < 0) { - virBitmapFree(nodeset); - goto cleanup; - } - - /* update vm->def here so that dumpxml can read the new - * values from vm->def. */ - virBitmapFree(vm->def->numatune.memory.nodemask); - - vm->def->numatune.memory.placement_mode = - VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC; - vm->def->numatune.memory.nodemask = virBitmapNewCopy(nodeset); + if (virBitmapIsAllClear(nodeset)) { + virReportError(VIR_ERR_OPERATION_INVALID, "%s", + _("Invalid nodeset for numatune")); + goto cleanup; } + } + } - if (flags & VIR_DOMAIN_AFFECT_CONFIG) { - virBitmapFree(persistentDef->numatune.memory.nodemask); - - persistentDef->numatune.memory.nodemask = nodeset; - persistentDef->numatune.memory.placement_mode = - VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC; - nodeset = NULL; - } - virBitmapFree(nodeset); + if (flags & VIR_DOMAIN_AFFECT_LIVE) { + if (mode != -1 && + virDomainNumatuneGetMode(vm->def->numatune) != mode) { + virReportError(VIR_ERR_OPERATION_INVALID, "%s", + _("can't change numatune mode for running domain")); + goto cleanup; } + + if (nodeset && + qemuDomainSetNumaParamsLive(vm, caps, nodeset) < 0) + goto cleanup; + + if (virDomainNumatuneSet(vm->def, -1, mode, nodeset) < 0) + goto cleanup; } if (flags & VIR_DOMAIN_AFFECT_CONFIG) { - if (!persistentDef->numatune.memory.placement_mode) - persistentDef->numatune.memory.placement_mode = - VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO; + if (virDomainNumatuneSet(persistentDef, -1, mode, nodeset) < 0) + goto cleanup; + if (virDomainSaveConfig(cfg->configDir, persistentDef) < 0) goto cleanup; } @@ -8818,6 +8803,7 @@ qemuDomainSetNumaParameters(virDomainPtr dom, ret = 0; cleanup: + virBitmapFree(nodeset); if (vm) virObjectUnlock(vm); virObjectUnref(caps); @@ -8886,15 +8872,17 @@ qemuDomainGetNumaParameters(virDomainPtr dom, if (virTypedParameterAssign(param, VIR_DOMAIN_NUMA_MODE, VIR_TYPED_PARAM_INT, 0) < 0) goto cleanup; + if (flags & VIR_DOMAIN_AFFECT_CONFIG) - param->value.i = persistentDef->numatune.memory.mode; + param->value.i = virDomainNumatuneGetMode(persistentDef->numatune); else - param->value.i = vm->def->numatune.memory.mode; + param->value.i = virDomainNumatuneGetMode(vm->def->numatune); break; case 1: /* fill numa nodeset here */ if (flags & VIR_DOMAIN_AFFECT_CONFIG) { - nodeset = virBitmapFormat(persistentDef->numatune.memory.nodemask); + nodeset = virDomainNumatuneFormatNodeset(persistentDef->numatune, + NULL); if (!nodeset) goto cleanup; } else { diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index d8bb679..4c57f15 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -3898,10 +3898,7 @@ int qemuProcessStart(virConnectPtr conn, /* Get the advisory nodeset from numad if 'placement' of * either <vcpu> or <numatune> is 'auto'. */ - if ((vm->def->placement_mode == - VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) || - (vm->def->numatune.memory.placement_mode == - VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO)) { + if (virDomainDefNeedsPlacementAdvice(vm->def)) { nodeset = virNumaGetAutoPlacementAdvice(vm->def->vcpus, vm->def->mem.max_balloon); if (!nodeset) @@ -3909,8 +3906,7 @@ int qemuProcessStart(virConnectPtr conn, VIR_DEBUG("Nodeset returned from numad: %s", nodeset); - if (virBitmapParse(nodeset, 0, &nodemask, - VIR_DOMAIN_CPUMASK_LEN) < 0) + if (virBitmapParse(nodeset, 0, &nodemask, VIR_DOMAIN_CPUMASK_LEN) < 0) goto cleanup; } hookData.nodemask = nodemask; diff --git a/src/util/virnuma.c b/src/util/virnuma.c index bc5180a..087f679 100644 --- a/src/util/virnuma.c +++ b/src/util/virnuma.c @@ -87,11 +87,10 @@ virNumaGetAutoPlacementAdvice(unsigned short vcpus ATTRIBUTE_UNUSED, #if WITH_NUMACTL int -virNumaSetupMemoryPolicy(virDomainNumatune numatune, +virNumaSetupMemoryPolicy(virDomainNumatunePtr numatune, virBitmapPtr nodemask) { nodemask_t mask; - int mode = -1; int node = -1; int ret = -1; int bit = 0; @@ -99,19 +98,9 @@ virNumaSetupMemoryPolicy(virDomainNumatune numatune, int maxnode = 0; virBitmapPtr tmp_nodemask = NULL; - if (numatune.memory.placement_mode == - VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC) { - if (!numatune.memory.nodemask) - return 0; - VIR_DEBUG("Set NUMA memory policy with specified nodeset"); - tmp_nodemask = numatune.memory.nodemask; - } else if (numatune.memory.placement_mode == - VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO) { - VIR_DEBUG("Set NUMA memory policy with advisory nodeset from numad"); - tmp_nodemask = nodemask; - } else { + tmp_nodemask = virDomainNumatuneGetNodeset(numatune, nodemask); + if (!tmp_nodemask) return 0; - } if (numa_available() < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, @@ -134,13 +123,15 @@ virNumaSetupMemoryPolicy(virDomainNumatune numatune, nodemask_set(&mask, bit); } - mode = numatune.memory.mode; - - if (mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) { + switch (virDomainNumatuneGetMode(numatune)) { + case VIR_DOMAIN_NUMATUNE_MEM_STRICT: numa_set_bind_policy(1); numa_set_membind(&mask); numa_set_bind_policy(0); - } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_PREFERRED) { + break; + + case VIR_DOMAIN_NUMATUNE_MEM_PREFERRED: + { int nnodes = 0; for (i = 0; i < NUMA_NUM_NODES; i++) { if (nodemask_isset(&mask, i)) { @@ -158,17 +149,16 @@ virNumaSetupMemoryPolicy(virDomainNumatune numatune, numa_set_bind_policy(0); numa_set_preferred(node); - } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_INTERLEAVE) { - numa_set_interleave_mask(&mask); - } else { - /* XXX: Shouldn't go here, as we already do checking when - * parsing domain XML. - */ - virReportError(VIR_ERR_XML_ERROR, - "%s", _("Invalid mode for memory NUMA tuning.")); - goto cleanup; } + break; + case VIR_DOMAIN_NUMATUNE_MEM_INTERLEAVE: + numa_set_interleave_mask(&mask); + break; + + case VIR_DOMAIN_NUMATUNE_MEM_LAST: + break; + } ret = 0; cleanup: @@ -327,10 +317,10 @@ virNumaGetNodeCPUs(int node, #else int -virNumaSetupMemoryPolicy(virDomainNumatune numatune, +virNumaSetupMemoryPolicy(virDomainNumatunePtr numatune, virBitmapPtr nodemask ATTRIBUTE_UNUSED) { - if (numatune.memory.nodemask) { + if (virDomainNumatuneGetNodeset(numatune, NULL)) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("libvirt is compiled without NUMA tuning support")); diff --git a/src/util/virnuma.h b/src/util/virnuma.h index d2ab6af..13ebec6 100644 --- a/src/util/virnuma.h +++ b/src/util/virnuma.h @@ -31,7 +31,7 @@ char *virNumaGetAutoPlacementAdvice(unsigned short vcups, unsigned long long balloon); -int virNumaSetupMemoryPolicy(virDomainNumatune numatune, +int virNumaSetupMemoryPolicy(virDomainNumatunePtr numatune, virBitmapPtr nodemask); bool virNumaIsAvailable(void); diff --git a/tests/qemuxml2argvdata/qemuxml2argv-numatune-auto-prefer.xml b/tests/qemuxml2argvdata/qemuxml2argv-numatune-auto-prefer.xml new file mode 100644 index 0000000..63f0d1f --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-numatune-auto-prefer.xml @@ -0,0 +1,29 @@ +<domain type='qemu'> + <name>QEMUGuest</name> + <uuid>9f4b6512-e73a-4a25-93e8-5307802821ce</uuid> + <memory unit='KiB'>65536</memory> + <currentMemory unit='KiB'>65536</currentMemory> + <vcpu placement='auto'>1</vcpu> + <numatune> + <memory mode='preferred'/> + </numatune> + <os> + <type arch='x86_64' machine='pc'>hvm</type> + <boot dev='hd'/> + </os> + <cpu> + <numa> + <cell id='0' cpus='0' memory='65536'/> + </numa> + </cpu> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/kvm</emulator> + <controller type='usb' index='0'/> + <controller type='pci' index='0' model='pci-root'/> + <memballoon model='virtio'/> + </devices> +</domain> diff --git a/tests/qemuxml2xmloutdata/qemuxml2xmlout-numatune-auto-prefer.xml b/tests/qemuxml2xmloutdata/qemuxml2xmlout-numatune-auto-prefer.xml new file mode 100644 index 0000000..19761b4 --- /dev/null +++ b/tests/qemuxml2xmloutdata/qemuxml2xmlout-numatune-auto-prefer.xml @@ -0,0 +1,29 @@ +<domain type='qemu'> + <name>QEMUGuest</name> + <uuid>9f4b6512-e73a-4a25-93e8-5307802821ce</uuid> + <memory unit='KiB'>65536</memory> + <currentMemory unit='KiB'>65536</currentMemory> + <vcpu placement='auto'>1</vcpu> + <numatune> + <memory mode='preferred' placement='auto'/> + </numatune> + <os> + <type arch='x86_64' machine='pc'>hvm</type> + <boot dev='hd'/> + </os> + <cpu> + <numa> + <cell id='0' cpus='0' memory='65536'/> + </numa> + </cpu> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/kvm</emulator> + <controller type='usb' index='0'/> + <controller type='pci' index='0' model='pci-root'/> + <memballoon model='virtio'/> + </devices> +</domain> diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c index 27281c8..b650f54 100644 --- a/tests/qemuxml2xmltest.c +++ b/tests/qemuxml2xmltest.c @@ -372,6 +372,8 @@ mymain(void) DO_TEST_DIFFERENT("cpu-numa1"); DO_TEST_DIFFERENT("cpu-numa2"); + DO_TEST_DIFFERENT("numatune-auto-prefer"); + virObjectUnref(driver.caps); virObjectUnref(driver.xmlopt); -- 2.0.0

Martin Kletzander wrote:
There were numerous places where numatune configuration (and thus domain config as well) was changed in different ways. On some places this even resulted in persistent domain definition not to be stable (it would change with daemon's restart).
In order to uniformly change how numatune config is dealt with, all the internals are now accessible directly only in numatune_conf.c and outside this file accessors must be used.
Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- po/POTFILES.in | 1 + src/conf/domain_conf.c | 159 ++--------- src/conf/domain_conf.h | 8 +- src/conf/numatune_conf.c | 318 +++++++++++++++++++++ src/conf/numatune_conf.h | 72 ++++- src/libvirt_private.syms | 11 + src/lxc/lxc_cgroup.c | 19 +- src/lxc/lxc_controller.c | 5 +- src/lxc/lxc_native.c | 15 +- src/parallels/parallels_driver.c | 7 +- src/qemu/qemu_cgroup.c | 23 +- src/qemu/qemu_driver.c | 84 +++--- src/qemu/qemu_process.c | 8 +- src/util/virnuma.c | 48 ++-- src/util/virnuma.h | 2 +- .../qemuxml2argv-numatune-auto-prefer.xml | 29 ++ .../qemuxml2xmlout-numatune-auto-prefer.xml | 29 ++ tests/qemuxml2xmltest.c | 2 + 18 files changed, 555 insertions(+), 285 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-numatune-auto-prefer.xml create mode 100644 tests/qemuxml2xmloutdata/qemuxml2xmlout-numatune-auto-prefer.xml
Looks like this breaks build with clang: gmake[3]: Entering directory `/usr/home/novel/code/libvirt/src' CC util/libvirt_util_la-virclosecallbacks.lo In file included from util/virclosecallbacks.c:28: In file included from ../src/util/virclosecallbacks.h:28: ../src/conf/domain_conf.h:70:35: error: redefinition of typedef 'virDomainNumatune' is a C11 feature [-Werror,-Wtypedef-redefinition] typedef struct _virDomainNumatune virDomainNumatune; ^ ../src/conf/numatune_conf.h:43:35: note: previous definition is here typedef struct _virDomainNumatune virDomainNumatune; ^ In file included from util/virclosecallbacks.c:28: In file included from ../src/util/virclosecallbacks.h:28: ../src/conf/domain_conf.h:71:28: error: redefinition of typedef 'virDomainNumatunePtr' is a C11 feature [-Werror,-Wtypedef-redefinition] typedef virDomainNumatune *virDomainNumatunePtr; ^ ../src/conf/numatune_conf.h:44:28: note: previous definition is here typedef virDomainNumatune *virDomainNumatunePtr; ^ In file included from util/virclosecallbacks.c:28: In file included from ../src/util/virclosecallbacks.h:28: ../src/conf/domain_conf.h:1857:30: error: redefinition of typedef 'virDomainDef' is a C11 feature [-Werror,-Wtypedef-redefinition] typedef struct _virDomainDef virDomainDef; ^ ../src/conf/numatune_conf.h:39:30: note: previous definition is here typedef struct _virDomainDef virDomainDef; ^ In file included from util/virclosecallbacks.c:28: In file included from ../src/util/virclosecallbacks.h:28: ../src/conf/domain_conf.h:1858:23: error: redefinition of typedef 'virDomainDefPtr' is a C11 feature [-Werror,-Wtypedef-redefinition] typedef virDomainDef *virDomainDefPtr; ^ ../src/conf/numatune_conf.h:40:23: note: previous definition is here typedef virDomainDef *virDomainDefPtr; ^ 4 errors generated. Should we probably drop the repeating definitions from domain_conf.h as we're including numatune_conf.h anyway? Roman Bogorodskiy

Roman Bogorodskiy wrote:
Martin Kletzander wrote:
There were numerous places where numatune configuration (and thus domain config as well) was changed in different ways. On some places this even resulted in persistent domain definition not to be stable (it would change with daemon's restart).
In order to uniformly change how numatune config is dealt with, all the internals are now accessible directly only in numatune_conf.c and outside this file accessors must be used.
Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- po/POTFILES.in | 1 + src/conf/domain_conf.c | 159 ++--------- src/conf/domain_conf.h | 8 +- src/conf/numatune_conf.c | 318 +++++++++++++++++++++ src/conf/numatune_conf.h | 72 ++++- src/libvirt_private.syms | 11 + src/lxc/lxc_cgroup.c | 19 +- src/lxc/lxc_controller.c | 5 +- src/lxc/lxc_native.c | 15 +- src/parallels/parallels_driver.c | 7 +- src/qemu/qemu_cgroup.c | 23 +- src/qemu/qemu_driver.c | 84 +++--- src/qemu/qemu_process.c | 8 +- src/util/virnuma.c | 48 ++-- src/util/virnuma.h | 2 +- .../qemuxml2argv-numatune-auto-prefer.xml | 29 ++ .../qemuxml2xmlout-numatune-auto-prefer.xml | 29 ++ tests/qemuxml2xmltest.c | 2 + 18 files changed, 555 insertions(+), 285 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-numatune-auto-prefer.xml create mode 100644 tests/qemuxml2xmloutdata/qemuxml2xmlout-numatune-auto-prefer.xml
Looks like this breaks build with clang:
gmake[3]: Entering directory `/usr/home/novel/code/libvirt/src' CC util/libvirt_util_la-virclosecallbacks.lo In file included from util/virclosecallbacks.c:28: In file included from ../src/util/virclosecallbacks.h:28: ../src/conf/domain_conf.h:70:35: error: redefinition of typedef 'virDomainNumatune' is a C11 feature [-Werror,-Wtypedef-redefinition] typedef struct _virDomainNumatune virDomainNumatune; ^ ../src/conf/numatune_conf.h:43:35: note: previous definition is here typedef struct _virDomainNumatune virDomainNumatune; ^ In file included from util/virclosecallbacks.c:28: In file included from ../src/util/virclosecallbacks.h:28: ../src/conf/domain_conf.h:71:28: error: redefinition of typedef 'virDomainNumatunePtr' is a C11 feature [-Werror,-Wtypedef-redefinition] typedef virDomainNumatune *virDomainNumatunePtr; ^ ../src/conf/numatune_conf.h:44:28: note: previous definition is here typedef virDomainNumatune *virDomainNumatunePtr; ^ In file included from util/virclosecallbacks.c:28: In file included from ../src/util/virclosecallbacks.h:28: ../src/conf/domain_conf.h:1857:30: error: redefinition of typedef 'virDomainDef' is a C11 feature [-Werror,-Wtypedef-redefinition] typedef struct _virDomainDef virDomainDef; ^ ../src/conf/numatune_conf.h:39:30: note: previous definition is here typedef struct _virDomainDef virDomainDef; ^ In file included from util/virclosecallbacks.c:28: In file included from ../src/util/virclosecallbacks.h:28: ../src/conf/domain_conf.h:1858:23: error: redefinition of typedef 'virDomainDefPtr' is a C11 feature [-Werror,-Wtypedef-redefinition] typedef virDomainDef *virDomainDefPtr; ^ ../src/conf/numatune_conf.h:40:23: note: previous definition is here typedef virDomainDef *virDomainDefPtr; ^ 4 errors generated.
Should we probably drop the repeating definitions from domain_conf.h as we're including numatune_conf.h anyway?
I got it fixed by the following diff: diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 4c9b7e8..e4d7988 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -67,9 +67,6 @@ typedef virDomainFSDef *virDomainFSDefPtr; typedef struct _virDomainNetDef virDomainNetDef; typedef virDomainNetDef *virDomainNetDefPtr; -typedef struct _virDomainNumatune virDomainNumatune; -typedef virDomainNumatune *virDomainNumatunePtr; - typedef struct _virDomainInputDef virDomainInputDef; typedef virDomainInputDef *virDomainInputDefPtr; @@ -1854,8 +1851,6 @@ struct _virDomainResourceDef { * NB: if adding to this struct, virDomainDefCheckABIStability * may well need an update */ -typedef struct _virDomainDef virDomainDef; -typedef virDomainDef *virDomainDefPtr; struct _virDomainDef { int virtType; int id; I didn't check it beyond build and check/syntax-check though. Anyway, it doesn't look quite clean to have typedefs in numatune_conf.h for the struct declared in domain_conf.h. Roman Bogorodskiy

On 07/17/2014 10:57 AM, Roman Bogorodskiy wrote:
Roman Bogorodskiy wrote:
Looks like this breaks build with clang:
gmake[3]: Entering directory `/usr/home/novel/code/libvirt/src' CC util/libvirt_util_la-virclosecallbacks.lo In file included from util/virclosecallbacks.c:28: In file included from ../src/util/virclosecallbacks.h:28: ../src/conf/domain_conf.h:70:35: error: redefinition of typedef 'virDomainNumatune' is a C11 feature [-Werror,-Wtypedef-redefinition] typedef struct _virDomainNumatune virDomainNumatune; ^ ../src/conf/numatune_conf.h:43:35: note: previous definition is here typedef struct _virDomainNumatune virDomainNumatune; ^
I got it fixed by the following diff:
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 4c9b7e8..e4d7988 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -67,9 +67,6 @@ typedef virDomainFSDef *virDomainFSDefPtr; typedef struct _virDomainNetDef virDomainNetDef; typedef virDomainNetDef *virDomainNetDefPtr;
-typedef struct _virDomainNumatune virDomainNumatune; -typedef virDomainNumatune *virDomainNumatunePtr; - typedef struct _virDomainInputDef virDomainInputDef; typedef virDomainInputDef *virDomainInputDefPtr;
ACK to this hunk.
@@ -1854,8 +1851,6 @@ struct _virDomainResourceDef { * NB: if adding to this struct, virDomainDefCheckABIStability * may well need an update */ -typedef struct _virDomainDef virDomainDef; -typedef virDomainDef *virDomainDefPtr; struct _virDomainDef {
But this hunk feels fishy. Why does numatune_conf.h need virDomainDef? It's a leaky abstraction - virDomainNumatuneNodeParseXML is accessing def->cpu directly instead of limiting itself to just def->numatune. Also, virDomainNumatuneParseXML is accessing def->placement_mode, which argues that placement_mode should be part of def->numatune rather than an independent variable. Yes, this hunk solves the compiler fix, but it means that we are just cementing that we didn't abstract things into a single object. That is, my ideal setup would be that numatune has access to all the pieces that it reads/modifies as parameters, but not access the entire virDomainDef, and then we don't have a circular referencing situation and don't need numatune_conf.h to be the source of our typedef declaration of virDomainDef.
I didn't check it beyond build and check/syntax-check though. Anyway, it doesn't look quite clean to have typedefs in numatune_conf.h for the struct declared in domain_conf.h.
I'd be fine with your patch going in as a stop-gap compilation fix, even if I still think that we could restructure the code better to make numatune_conf.h self-contained and move the typedef for virDomainDef back to domain_conf.h. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

Eric Blake wrote:
On 07/17/2014 10:57 AM, Roman Bogorodskiy wrote:
Roman Bogorodskiy wrote:
Looks like this breaks build with clang:
gmake[3]: Entering directory `/usr/home/novel/code/libvirt/src' CC util/libvirt_util_la-virclosecallbacks.lo In file included from util/virclosecallbacks.c:28: In file included from ../src/util/virclosecallbacks.h:28: ../src/conf/domain_conf.h:70:35: error: redefinition of typedef 'virDomainNumatune' is a C11 feature [-Werror,-Wtypedef-redefinition] typedef struct _virDomainNumatune virDomainNumatune; ^ ../src/conf/numatune_conf.h:43:35: note: previous definition is here typedef struct _virDomainNumatune virDomainNumatune; ^
I got it fixed by the following diff:
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 4c9b7e8..e4d7988 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -67,9 +67,6 @@ typedef virDomainFSDef *virDomainFSDefPtr; typedef struct _virDomainNetDef virDomainNetDef; typedef virDomainNetDef *virDomainNetDefPtr;
-typedef struct _virDomainNumatune virDomainNumatune; -typedef virDomainNumatune *virDomainNumatunePtr; - typedef struct _virDomainInputDef virDomainInputDef; typedef virDomainInputDef *virDomainInputDefPtr;
ACK to this hunk.
@@ -1854,8 +1851,6 @@ struct _virDomainResourceDef { * NB: if adding to this struct, virDomainDefCheckABIStability * may well need an update */ -typedef struct _virDomainDef virDomainDef; -typedef virDomainDef *virDomainDefPtr; struct _virDomainDef {
But this hunk feels fishy. Why does numatune_conf.h need virDomainDef? It's a leaky abstraction - virDomainNumatuneNodeParseXML is accessing def->cpu directly instead of limiting itself to just def->numatune. Also, virDomainNumatuneParseXML is accessing def->placement_mode, which argues that placement_mode should be part of def->numatune rather than an independent variable.
Yes, this hunk solves the compiler fix, but it means that we are just cementing that we didn't abstract things into a single object. That is, my ideal setup would be that numatune has access to all the pieces that it reads/modifies as parameters, but not access the entire virDomainDef, and then we don't have a circular referencing situation and don't need numatune_conf.h to be the source of our typedef declaration of virDomainDef.
I didn't check it beyond build and check/syntax-check though. Anyway, it doesn't look quite clean to have typedefs in numatune_conf.h for the struct declared in domain_conf.h.
I'd be fine with your patch going in as a stop-gap compilation fix, even if I still think that we could restructure the code better to make numatune_conf.h self-contained and move the typedef for virDomainDef back to domain_conf.h.
Good, so I pushed the patch. Roman Bogorodskiy

On Thu, Jul 17, 2014 at 10:02:45PM +0400, Roman Bogorodskiy wrote:
Eric Blake wrote:
On 07/17/2014 10:57 AM, Roman Bogorodskiy wrote:
Roman Bogorodskiy wrote:
Looks like this breaks build with clang:
gmake[3]: Entering directory `/usr/home/novel/code/libvirt/src' CC util/libvirt_util_la-virclosecallbacks.lo In file included from util/virclosecallbacks.c:28: In file included from ../src/util/virclosecallbacks.h:28: ../src/conf/domain_conf.h:70:35: error: redefinition of typedef 'virDomainNumatune' is a C11 feature [-Werror,-Wtypedef-redefinition] typedef struct _virDomainNumatune virDomainNumatune; ^ ../src/conf/numatune_conf.h:43:35: note: previous definition is here typedef struct _virDomainNumatune virDomainNumatune; ^
I got it fixed by the following diff:
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 4c9b7e8..e4d7988 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -67,9 +67,6 @@ typedef virDomainFSDef *virDomainFSDefPtr; typedef struct _virDomainNetDef virDomainNetDef; typedef virDomainNetDef *virDomainNetDefPtr;
-typedef struct _virDomainNumatune virDomainNumatune; -typedef virDomainNumatune *virDomainNumatunePtr; - typedef struct _virDomainInputDef virDomainInputDef; typedef virDomainInputDef *virDomainInputDefPtr;
ACK to this hunk.
@@ -1854,8 +1851,6 @@ struct _virDomainResourceDef { * NB: if adding to this struct, virDomainDefCheckABIStability * may well need an update */ -typedef struct _virDomainDef virDomainDef; -typedef virDomainDef *virDomainDefPtr; struct _virDomainDef {
But this hunk feels fishy. Why does numatune_conf.h need virDomainDef? It's a leaky abstraction - virDomainNumatuneNodeParseXML is accessing def->cpu directly instead of limiting itself to just def->numatune. Also, virDomainNumatuneParseXML is accessing def->placement_mode, which argues that placement_mode should be part of def->numatune rather than an independent variable.
I tried abstracting as much as possible, and the numatune parsing code was really a mess. Now that it's abstracted a bit, I could rework it so that it doesn't require virDomainDef at all. It also needs to change something in the virDomainDef, but that could be some kind of output of the function.
Yes, this hunk solves the compiler fix, but it means that we are just cementing that we didn't abstract things into a single object. That is, my ideal setup would be that numatune has access to all the pieces that it reads/modifies as parameters, but not access the entire virDomainDef, and then we don't have a circular referencing situation and don't need numatune_conf.h to be the source of our typedef declaration of virDomainDef.
I didn't check it beyond build and check/syntax-check though. Anyway, it doesn't look quite clean to have typedefs in numatune_conf.h for the struct declared in domain_conf.h.
It wasn't problem with newer gcc since the pre-definition is the same as the actual typedef. And because all the gcc versions me and Michal compiled this on were newer, we didn't hit the bug and therefore I thought circular dependencies can be solved like this, so I haven't try that hard to abstract the whole numatune all the way :).
I'd be fine with your patch going in as a stop-gap compilation fix, even if I still think that we could restructure the code better to make numatune_conf.h self-contained and move the typedef for virDomainDef back to domain_conf.h.
Good, so I pushed the patch.
And I'm reworking it so it looks and works as we want. Martin

Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- docs/formatdomain.html.in | 15 ++ docs/schemas/domaincommon.rng | 17 ++ src/conf/numatune_conf.c | 187 +++++++++++++++++++-- .../qemuxml2argv-numatune-memnode-no-memory.xml | 30 ++++ .../qemuxml2argv-numatune-memnode-nocpu.xml | 25 +++ .../qemuxml2argv-numatune-memnode.xml | 33 ++++ .../qemuxml2argv-numatune-memnodes-problematic.xml | 31 ++++ tests/qemuxml2argvtest.c | 2 + .../qemuxml2xmlout-numatune-memnode.xml | 33 ++++ tests/qemuxml2xmltest.c | 2 + 10 files changed, 362 insertions(+), 13 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode-no-memory.xml create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode-nocpu.xml create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode.xml create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-numatune-memnodes-problematic.xml create mode 100644 tests/qemuxml2xmloutdata/qemuxml2xmlout-numatune-memnode.xml diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 9f1082b..1301639 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -709,6 +709,8 @@ ... <numatune> <memory mode="strict" nodeset="1-4,^3"/> + <memnode cellid="0" mode="strict" nodeset="1"/> + <memnode cellid="2" mode="preferred" nodeset="2"/> </numatune> ... </domain> @@ -745,6 +747,19 @@ <span class='since'>Since 0.9.3</span> </dd> + <dt><code>memnode</code></dt> + <dd> + Optional <code>memnode</code> elements can specify memory allocation + policies per each guest NUMA node. For those nodes having no + corresponding <code>memnode</code> element, the default from + element <code>memory</code> will be used. Attribute <code>cellid</code> + addresses guest NUMA node for which the settings are applied. + Attributes <code>mode</code> and <code>nodeset</code> have the same + meaning and syntax as in <code>memory</code> element. + + This setting is not compatible with automatic placement. + <span class='since'>QEMU Since 1.2.7</span> + </dd> </dl> diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index 155a33e..0b31261 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -789,6 +789,23 @@ </choice> </element> </optional> + <zeroOrMore> + <element name="memnode"> + <attribute name="cellid"> + <ref name="unsignedInt"/> + </attribute> + <attribute name="mode"> + <choice> + <value>strict</value> + <value>preferred</value> + <value>interleave</value> + </choice> + </attribute> + <attribute name='nodeset'> + <ref name='cpuset'/> + </attribute> + </element> + </zeroOrMore> </element> </define> diff --git a/src/conf/numatune_conf.c b/src/conf/numatune_conf.c index 6ce1e2d..a39c028 100644 --- a/src/conf/numatune_conf.c +++ b/src/conf/numatune_conf.c @@ -42,17 +42,140 @@ VIR_ENUM_IMPL(virDomainNumatunePlacement, "static", "auto"); +typedef struct _virDomainNumatuneNode virDomainNumatuneNode; +typedef virDomainNumatuneNode *virDomainNumatuneNodePtr; + struct _virDomainNumatune { struct { + bool specified; virBitmapPtr nodeset; virDomainNumatuneMemMode mode; virDomainNumatunePlacement placement; } memory; /* pinning for all the memory */ + struct _virDomainNumatuneNode { + virBitmapPtr nodeset; + virDomainNumatuneMemMode mode; + } *mem_nodes; /* fine tuning per guest node */ + size_t nmem_nodes; + /* Future NUMA tuning related stuff should go here. */ }; +static int +virDomainNumatuneNodeParseXML(virDomainDefPtr def, + xmlXPathContextPtr ctxt) +{ + char *tmp = NULL; + int n = 0;; + int ret = -1; + size_t i = 0; + xmlNodePtr *nodes = NULL; + + if ((n = virXPathNodeSet("./numatune/memnode", ctxt, &nodes)) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Cannot extract memnode nodes")); + goto cleanup; + } + + if (!n) + return 0; + + if (def->numatune && def->numatune->memory.specified && + def->numatune->memory.placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Per-node binding is not compatible with " + "automatic NUMA placement.")); + goto cleanup; + } + + if (!def->cpu || !def->cpu->ncells) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Element 'memnode' is invalid without " + "any guest NUMA cells")); + goto cleanup; + } + + if (!def->numatune && VIR_ALLOC(def->numatune) < 0) + goto cleanup; + + VIR_FREE(def->numatune->mem_nodes); + if (VIR_ALLOC_N(def->numatune->mem_nodes, def->cpu->ncells) < 0) + goto cleanup; + + def->numatune->nmem_nodes = def->cpu->ncells; + + for (i = 0; i < n; i++) { + int mode = 0; + unsigned int cellid = 0; + virDomainNumatuneNodePtr mem_node = NULL; + xmlNodePtr cur_node = nodes[i]; + + tmp = virXMLPropString(cur_node, "cellid"); + if (!tmp) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Missing required cellid attribute " + "in memnode element")); + goto cleanup; + } + if (virStrToLong_uip(tmp, NULL, 10, &cellid) < 0) { + virReportError(VIR_ERR_XML_ERROR, + _("Invalid cellid attribute in memnode element: %s"), + tmp); + goto cleanup; + } + VIR_FREE(tmp); + + if (cellid >= def->numatune->nmem_nodes) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Argument 'cellid' in memnode element must " + "correspond to existing guest's NUMA cell")); + goto cleanup; + } + + mem_node = &def->numatune->mem_nodes[cellid]; + + if (mem_node->nodeset) { + virReportError(VIR_ERR_XML_ERROR, + _("Multiple memnode elements with cellid %u"), + cellid); + goto cleanup; + } + + tmp = virXMLPropString(cur_node, "mode"); + if (!tmp) { + mem_node->mode = VIR_DOMAIN_NUMATUNE_MEM_STRICT; + } else { + if ((mode = virDomainNumatuneMemModeTypeFromString(tmp)) < 0) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Invalid mode attribute in memnode element")); + goto cleanup; + } + VIR_FREE(tmp); + mem_node->mode = mode; + } + + tmp = virXMLPropString(cur_node, "nodeset"); + if (!tmp) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Missing required nodeset attribute " + "in memnode element")); + goto cleanup; + } + if (virBitmapParse(tmp, 0, &mem_node->nodeset, + VIR_DOMAIN_CPUMASK_LEN) < 0) + goto cleanup; + VIR_FREE(tmp); + } + + ret = 0; + cleanup: + VIR_FREE(nodes); + VIR_FREE(tmp); + return ret; +} + int virDomainNumatuneParseXML(virDomainDefPtr def, xmlXPathContextPtr ctxt) @@ -82,8 +205,11 @@ virDomainNumatuneParseXML(virDomainDefPtr def, def->numatune = NULL; } - if (!node && def->placement_mode != VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) + if (!node && def->placement_mode != VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) { + if (virDomainNumatuneNodeParseXML(def, ctxt) < 0) + goto cleanup; return 0; + } if (!node) { /* We know that def->placement_mode is "auto" if we're here */ @@ -125,10 +251,9 @@ virDomainNumatuneParseXML(virDomainDefPtr def, if (virDomainNumatuneSet(def, placement, mode, nodeset) < 0) goto cleanup; - if (!n) { - ret = 0; + if (virDomainNumatuneNodeParseXML(def, ctxt) < 0) goto cleanup; - } + ret = 0; cleanup: @@ -143,6 +268,7 @@ virDomainNumatuneFormatXML(virBufferPtr buf, { const char *tmp = NULL; char *nodeset = NULL; + size_t i = 0; if (!numatune) return 0; @@ -150,17 +276,36 @@ virDomainNumatuneFormatXML(virBufferPtr buf, virBufferAddLit(buf, "<numatune>\n"); virBufferAdjustIndent(buf, 2); - tmp = virDomainNumatuneMemModeTypeToString(numatune->memory.mode); - virBufferAsprintf(buf, "<memory mode='%s' ", tmp); + if (numatune->memory.specified) { + tmp = virDomainNumatuneMemModeTypeToString(numatune->memory.mode); + virBufferAsprintf(buf, "<memory mode='%s' ", tmp); + + if (numatune->memory.placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC) { + if (!(nodeset = virBitmapFormat(numatune->memory.nodeset))) + return -1; + virBufferAsprintf(buf, "nodeset='%s'/>\n", nodeset); + VIR_FREE(nodeset); + } else if (numatune->memory.placement) { + tmp = virDomainNumatunePlacementTypeToString(numatune->memory.placement); + virBufferAsprintf(buf, "placement='%s'/>\n", tmp); + } + } + + for (i = 0; i < numatune->nmem_nodes; i++) { + virDomainNumatuneNodePtr mem_node = &numatune->mem_nodes[i]; - if (numatune->memory.placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC) { - if (!(nodeset = virBitmapFormat(numatune->memory.nodeset))) + if (!mem_node->nodeset) + continue; + + if (!(nodeset = virBitmapFormat(mem_node->nodeset))) return -1; - virBufferAsprintf(buf, "nodeset='%s'/>\n", nodeset); + + virBufferAsprintf(buf, + "<memnode cellid='%zu' mode='%s' nodeset='%s'/>\n", + i, + virDomainNumatuneMemModeTypeToString(mem_node->mode), + nodeset); VIR_FREE(nodeset); - } else if (numatune->memory.placement) { - tmp = virDomainNumatunePlacementTypeToString(numatune->memory.placement); - virBufferAsprintf(buf, "placement='%s'/>\n", tmp); } virBufferAdjustIndent(buf, -2); @@ -171,10 +316,15 @@ virDomainNumatuneFormatXML(virBufferPtr buf, void virDomainNumatuneFree(virDomainNumatunePtr numatune) { + size_t i = 0; + if (!numatune) return; virBitmapFree(numatune->memory.nodeset); + for (i = 0; i < numatune->nmem_nodes; i++) + virBitmapFree(numatune->mem_nodes[i].nodeset); + VIR_FREE(numatune->mem_nodes); VIR_FREE(numatune); } @@ -182,7 +332,7 @@ virDomainNumatuneFree(virDomainNumatunePtr numatune) virDomainNumatuneMemMode virDomainNumatuneGetMode(virDomainNumatunePtr numatune) { - return numatune ? numatune->memory.mode : 0; + return (numatune && numatune->memory.specified) ? numatune->memory.mode : 0; } virBitmapPtr @@ -318,6 +468,8 @@ virDomainNumatuneSet(virDomainDefPtr def, if (placement != -1) numatune->memory.placement = placement; + numatune->memory.specified = true; + ret = 0; cleanup: return ret; @@ -333,6 +485,12 @@ virDomainNumatuneEquals(virDomainNumatunePtr n1, if (!n1 || !n2) return false; + if (!n1->memory.specified && !n2->memory.specified) + return true; + + if (!n1->memory.specified || !n2->memory.specified) + return false; + if (n1->memory.mode != n2->memory.mode) return false; @@ -348,6 +506,9 @@ virDomainNumatuneHasPlacementAuto(virDomainNumatunePtr numatune) if (!numatune) return false; + if (!numatune->memory.specified) + return false; + if (numatune->memory.placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO) return true; diff --git a/tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode-no-memory.xml b/tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode-no-memory.xml new file mode 100644 index 0000000..4b2efa2 --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode-no-memory.xml @@ -0,0 +1,30 @@ +<domain type='qemu'> + <name>QEMUGuest</name> + <uuid>9f4b6512-e73a-4a25-93e8-5307802821ce</uuid> + <memory unit='KiB'>65536</memory> + <currentMemory unit='KiB'>65536</currentMemory> + <vcpu placement='static'>2</vcpu> + <numatune> + <memnode cellid='0' mode='preferred' nodeset='3'/> + </numatune> + <os> + <type arch='x86_64' machine='pc'>hvm</type> + <boot dev='hd'/> + </os> + <cpu> + <numa> + <cell id='0' cpus='0' memory='32768'/> + <cell id='1' cpus='1' memory='32768'/> + </numa> + </cpu> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/kvm</emulator> + <controller type='usb' index='0'/> + <controller type='pci' index='0' model='pci-root'/> + <memballoon model='virtio'/> + </devices> +</domain> diff --git a/tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode-nocpu.xml b/tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode-nocpu.xml new file mode 100644 index 0000000..7b0e248 --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode-nocpu.xml @@ -0,0 +1,25 @@ +<domain type='kvm'> + <name>QEMUGuest</name> + <uuid>9f4b6512-e73a-4a25-93e8-5307802821ce</uuid> + <memory unit='KiB'>65536</memory> + <currentMemory unit='KiB'>65536</currentMemory> + <vcpu placement='static'>1</vcpu> + <numatune> + <memory mode='strict' nodeset='0,2'/> + <memnode cellid='0' mode='interleave' nodeset='3'/> + </numatune> + <os> + <type arch='x86_64' machine='pc'>hvm</type> + <boot dev='hd'/> + </os> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/kvm</emulator> + <controller type='usb' index='0'/> + <controller type='pci' index='0' model='pci-root'/> + <memballoon model='virtio'/> + </devices> +</domain> diff --git a/tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode.xml b/tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode.xml new file mode 100644 index 0000000..49b328c --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode.xml @@ -0,0 +1,33 @@ +<domain type='qemu'> + <name>QEMUGuest</name> + <uuid>9f4b6512-e73a-4a25-93e8-5307802821ce</uuid> + <memory unit='KiB'>24682468</memory> + <currentMemory unit='KiB'>24682468</currentMemory> + <vcpu placement='static'>32</vcpu> + <numatune> + <memory mode='strict' nodeset='0-7'/> + <memnode cellid='0' mode='preferred' nodeset='3'/> + <memnode cellid='2' mode='strict' nodeset='1-2,5-7,^6'/> + </numatune> + <os> + <type arch='x86_64' machine='pc'>hvm</type> + <boot dev='hd'/> + </os> + <cpu> + <numa> + <cell id='0' cpus='0' memory='20002'/> + <cell id='1' cpus='1-27,29' memory='660066'/> + <cell id='2' cpus='28-31,^29' memory='24002400'/> + </numa> + </cpu> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/kvm</emulator> + <controller type='usb' index='0'/> + <controller type='pci' index='0' model='pci-root'/> + <memballoon model='virtio'/> + </devices> +</domain> diff --git a/tests/qemuxml2argvdata/qemuxml2argv-numatune-memnodes-problematic.xml b/tests/qemuxml2argvdata/qemuxml2argv-numatune-memnodes-problematic.xml new file mode 100644 index 0000000..bb4e4af --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-numatune-memnodes-problematic.xml @@ -0,0 +1,31 @@ +<domain type='qemu'> + <name>QEMUGuest</name> + <uuid>9f4b6512-e73a-4a25-93e8-5307802821ce</uuid> + <memory unit='KiB'>65536</memory> + <currentMemory unit='KiB'>65536</currentMemory> + <vcpu placement='auto'>2</vcpu> + <numatune> + <memory placement='auto'/> + <memnode cellid='0' mode='strict' nodeset='3'/> + </numatune> + <os> + <type arch='x86_64' machine='pc'>hvm</type> + <boot dev='hd'/> + </os> + <cpu> + <numa> + <cell id='0' cpus='0' memory='32768'/> + <cell id='1' cpus='1' memory='32768'/> + </numa> + </cpu> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/kvm</emulator> + <controller type='usb' index='0'/> + <controller type='pci' index='0' model='pci-root'/> + <memballoon model='virtio'/> + </devices> +</domain> diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c index 414afda..308a376 100644 --- a/tests/qemuxml2argvtest.c +++ b/tests/qemuxml2argvtest.c @@ -1196,6 +1196,8 @@ mymain(void) DO_TEST("cputune-zero-shares", QEMU_CAPS_NAME); DO_TEST("numatune-memory", NONE); DO_TEST("numatune-auto-nodeset-invalid", NONE); + DO_TEST_PARSE_ERROR("numatune-memnode-nocpu", NONE); + DO_TEST_PARSE_ERROR("numatune-memnodes-problematic", NONE); DO_TEST("numad", NONE); DO_TEST("numad-auto-vcpu-static-numatune", NONE); DO_TEST("numad-auto-memory-vcpu-cpuset", NONE); diff --git a/tests/qemuxml2xmloutdata/qemuxml2xmlout-numatune-memnode.xml b/tests/qemuxml2xmloutdata/qemuxml2xmlout-numatune-memnode.xml new file mode 100644 index 0000000..82b5f61 --- /dev/null +++ b/tests/qemuxml2xmloutdata/qemuxml2xmlout-numatune-memnode.xml @@ -0,0 +1,33 @@ +<domain type='qemu'> + <name>QEMUGuest</name> + <uuid>9f4b6512-e73a-4a25-93e8-5307802821ce</uuid> + <memory unit='KiB'>24682468</memory> + <currentMemory unit='KiB'>24682468</currentMemory> + <vcpu placement='static'>32</vcpu> + <numatune> + <memory mode='strict' nodeset='0-7'/> + <memnode cellid='0' mode='preferred' nodeset='3'/> + <memnode cellid='2' mode='strict' nodeset='1-2,5,7'/> + </numatune> + <os> + <type arch='x86_64' machine='pc'>hvm</type> + <boot dev='hd'/> + </os> + <cpu> + <numa> + <cell id='0' cpus='0' memory='20002'/> + <cell id='1' cpus='1-27,29' memory='660066'/> + <cell id='2' cpus='28-31,^29' memory='24002400'/> + </numa> + </cpu> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/kvm</emulator> + <controller type='usb' index='0'/> + <controller type='pci' index='0' model='pci-root'/> + <memballoon model='virtio'/> + </devices> +</domain> diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c index b650f54..7398f16 100644 --- a/tests/qemuxml2xmltest.c +++ b/tests/qemuxml2xmltest.c @@ -373,6 +373,8 @@ mymain(void) DO_TEST_DIFFERENT("cpu-numa2"); DO_TEST_DIFFERENT("numatune-auto-prefer"); + DO_TEST_DIFFERENT("numatune-memnode"); + DO_TEST("numatune-memnode-no-memory"); virObjectUnref(driver.caps); virObjectUnref(driver.xmlopt); -- 2.0.0

On 07/16/2014 08:42 AM, Martin Kletzander wrote:
Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- docs/formatdomain.html.in | 15 ++ docs/schemas/domaincommon.rng | 17 ++ src/conf/numatune_conf.c | 187 +++++++++++++++++++-- .../qemuxml2argv-numatune-memnode-no-memory.xml | 30 ++++ .../qemuxml2argv-numatune-memnode-nocpu.xml | 25 +++ .../qemuxml2argv-numatune-memnode.xml | 33 ++++ .../qemuxml2argv-numatune-memnodes-problematic.xml | 31 ++++ tests/qemuxml2argvtest.c | 2 + .../qemuxml2xmlout-numatune-memnode.xml | 33 ++++ tests/qemuxml2xmltest.c | 2 + 10 files changed, 362 insertions(+), 13 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode-no-memory.xml create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode-nocpu.xml create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode.xml create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-numatune-memnodes-problematic.xml create mode 100644 tests/qemuxml2xmloutdata/qemuxml2xmlout-numatune-memnode.xml
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 9f1082b..1301639 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -709,6 +709,8 @@ ... <numatune> <memory mode="strict" nodeset="1-4,^3"/> + <memnode cellid="0" mode="strict" nodeset="1"/> + <memnode cellid="2" mode="preferred" nodeset="2"/>
Ah, the ability to skip cellid's (for the cells that inherit the default policies from <memory> makes it essential to have ids for correlation in the earlier patch in this series.
</numatune> ... </domain> @@ -745,6 +747,19 @@
<span class='since'>Since 0.9.3</span> </dd> + <dt><code>memnode</code></dt> + <dd> + Optional <code>memnode</code> elements can specify memory allocation + policies per each guest NUMA node. For those nodes having no + corresponding <code>memnode</code> element, the default from + element <code>memory</code> will be used. Attribute <code>cellid</code> + addresses guest NUMA node for which the settings are applied. + Attributes <code>mode</code> and <code>nodeset</code> have the same + meaning and syntax as in <code>memory</code> element. + + This setting is not compatible with automatic placement. + <span class='since'>QEMU Since 1.2.7</span>
Again, schema documentation looks okay to me.
+++ b/docs/schemas/domaincommon.rng @@ -789,6 +789,23 @@ </choice> </element> </optional> + <zeroOrMore> + <element name="memnode"> + <attribute name="cellid"> + <ref name="unsignedInt"/> + </attribute> + <attribute name="mode"> + <choice> + <value>strict</value> + <value>preferred</value> + <value>interleave</value> + </choice> + </attribute> + <attribute name='nodeset'> + <ref name='cpuset'/> + </attribute> + </element> + </zeroOrMore> </element>
Missing an <interleave> here (<memory> and <memnode> should be swappable with one another). -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

On Wed, Jul 16, 2014 at 12:21:24PM -0600, Eric Blake wrote:
On 07/16/2014 08:42 AM, Martin Kletzander wrote:
Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- [...] +++ b/docs/schemas/domaincommon.rng @@ -789,6 +789,23 @@ </choice> </element> </optional> + <zeroOrMore> + <element name="memnode"> + <attribute name="cellid"> + <ref name="unsignedInt"/> + </attribute> + <attribute name="mode"> + <choice> + <value>strict</value> + <value>preferred</value> + <value>interleave</value> + </choice> + </attribute> + <attribute name='nodeset'> + <ref name='cpuset'/> + </attribute> + </element> + </zeroOrMore> </element>
Missing an <interleave> here (<memory> and <memnode> should be swappable with one another).
Oh! My bad. So sorry :-( Good point, is this OK to push as trivial (git diff -w): diff --git i/docs/schemas/domaincommon.rng w/docs/schemas/domaincommon.rng index a0ea300..fb5bdb3 100644 --- i/docs/schemas/domaincommon.rng +++ w/docs/schemas/domaincommon.rng @@ -790,6 +790,7 @@ </element> </optional> <zeroOrMore> + <interleave> <element name="memnode"> <attribute name="cellid"> <ref name="unsignedInt"/> @@ -805,6 +806,7 @@ <ref name='cpuset'/> </attribute> </element> + </interleave> </zeroOrMore> </element> </define> -- Martin

On 07/16/2014 12:33 PM, Martin Kletzander wrote:
On Wed, Jul 16, 2014 at 12:21:24PM -0600, Eric Blake wrote:
On 07/16/2014 08:42 AM, Martin Kletzander wrote:
Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- [...]
Missing an <interleave> here (<memory> and <memnode> should be swappable with one another).
Oh! My bad. So sorry :-(
Good point, is this OK to push as trivial (git diff -w):
Count this as my ACK :)
diff --git i/docs/schemas/domaincommon.rng w/docs/schemas/domaincommon.rng index a0ea300..fb5bdb3 100644 --- i/docs/schemas/domaincommon.rng +++ w/docs/schemas/domaincommon.rng @@ -790,6 +790,7 @@ </element> </optional> <zeroOrMore> + <interleave> <element name="memnode">
I'm assuming the odd spacing here is due to pasting into the email body, not how it actually looked in the diff. That, and diff -w already plays games with spacing. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

On Wed, Jul 16, 2014 at 12:43:07PM -0600, Eric Blake wrote:
On 07/16/2014 12:33 PM, Martin Kletzander wrote:
On Wed, Jul 16, 2014 at 12:21:24PM -0600, Eric Blake wrote:
On 07/16/2014 08:42 AM, Martin Kletzander wrote:
Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- [...]
Missing an <interleave> here (<memory> and <memnode> should be swappable with one another).
Oh! My bad. So sorry :-(
Good point, is this OK to push as trivial (git diff -w):
Count this as my ACK :)
I pushed it then, thank you.
diff --git i/docs/schemas/domaincommon.rng w/docs/schemas/domaincommon.rng index a0ea300..fb5bdb3 100644 --- i/docs/schemas/domaincommon.rng +++ w/docs/schemas/domaincommon.rng @@ -790,6 +790,7 @@ </element> </optional> <zeroOrMore> + <interleave> <element name="memnode">
I'm assuming the odd spacing here is due to pasting into the email body, not how it actually looked in the diff. That, and diff -w already plays games with spacing.
diff -w looked OK when I pasted it in the mail body, but somewhere on the way it got smudged. Martin

On 07/16/2014 02:43 PM, Martin Kletzander wrote:
Good point, is this OK to push as trivial (git diff -w):
Count this as my ACK :)
I pushed it then, thank you.
Not my day. I was so focused on the 'diff -w' aspect that I completely overlooked another aspect. The patch is wrong:
diff --git i/docs/schemas/domaincommon.rng w/docs/schemas/domaincommon.rng index a0ea300..fb5bdb3 100644 --- i/docs/schemas/domaincommon.rng +++ w/docs/schemas/domaincommon.rng @@ -790,6 +790,7 @@ </element> </optional> <zeroOrMore> + <interleave>
<interleave> makes no difference here. As the <zeroOrMore> has only one <element> child, there is nothing to be interleaved. I meant for it to go one level higher, outside the <zeroOrMore>, where it can also interleave with <memory>.
<element name="memnode">
I'm assuming the odd spacing here is due to pasting into the email body, not how it actually looked in the diff. That, and diff -w already plays games with spacing.
diff -w looked OK when I pasted it in the mail body, but somewhere on the way it got smudged.
As penance, I'm proposing this followup: diff --git i/docs/schemas/domaincommon.rng w/docs/schemas/domaincommon.rng index fb5bdb3..2caeef9 100644 --- i/docs/schemas/domaincommon.rng +++ w/docs/schemas/domaincommon.rng @@ -759,6 +759,7 @@ <!-- All the NUMA related tunables would go in the numatune --> <define name="numatune"> <element name="numatune"> + <interleave> <optional> <element name="memory"> <optional> @@ -790,7 +791,6 @@ </element> </optional> <zeroOrMore> - <interleave> <element name="memnode"> <attribute name="cellid"> <ref name="unsignedInt"/> @@ -806,8 +806,8 @@ <ref name='cpuset'/> </attribute> </element> - </interleave> </zeroOrMore> + </interleave> </element> </define> -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

On 07/16/2014 02:53 PM, Eric Blake wrote:
<zeroOrMore> + <interleave>
<interleave> makes no difference here. As the <zeroOrMore> has only one <element> child, there is nothing to be interleaved. I meant for it to go one level higher, outside the <zeroOrMore>, where it can also interleave with <memory>.
<element name="memnode">
I'm assuming the odd spacing here is due to pasting into the email body, not how it actually looked in the diff. That, and diff -w already plays games with spacing.
diff -w looked OK when I pasted it in the mail body, but somewhere on the way it got smudged.
As penance, I'm proposing this followup:
and also squashing in this change to the testsuite, to expose the difference: diff --git a/tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode.xml b/tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode.xml index 49b328c..440413b 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode.xml +++ b/tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode.xml @@ -5,8 +5,8 @@ <currentMemory unit='KiB'>24682468</currentMemory> <vcpu placement='static'>32</vcpu> <numatune> - <memory mode='strict' nodeset='0-7'/> <memnode cellid='0' mode='preferred' nodeset='3'/> + <memory mode='strict' nodeset='0-7'/> <memnode cellid='2' mode='strict' nodeset='1-2,5-7,^6'/> </numatune> <os> -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

On Wed, Jul 16, 2014 at 02:53:09PM -0600, Eric Blake wrote:
On 07/16/2014 02:43 PM, Martin Kletzander wrote:
Good point, is this OK to push as trivial (git diff -w):
Count this as my ACK :)
I pushed it then, thank you.
Not my day. I was so focused on the 'diff -w' aspect that I completely overlooked another aspect. The patch is wrong:
diff --git i/docs/schemas/domaincommon.rng w/docs/schemas/domaincommon.rng index a0ea300..fb5bdb3 100644 --- i/docs/schemas/domaincommon.rng +++ w/docs/schemas/domaincommon.rng @@ -790,6 +790,7 @@ </element> </optional> <zeroOrMore> + <interleave>
<interleave> makes no difference here. As the <zeroOrMore> has only one <element> child, there is nothing to be interleaved. I meant for it to go one level higher, outside the <zeroOrMore>, where it can also interleave with <memory>.
<element name="memnode">
I'm assuming the odd spacing here is due to pasting into the email body, not how it actually looked in the diff. That, and diff -w already plays games with spacing.
diff -w looked OK when I pasted it in the mail body, but somewhere on the way it got smudged.
As penance, I'm proposing this followup:
diff --git i/docs/schemas/domaincommon.rng w/docs/schemas/domaincommon.rng index fb5bdb3..2caeef9 100644 --- i/docs/schemas/domaincommon.rng +++ w/docs/schemas/domaincommon.rng @@ -759,6 +759,7 @@ <!-- All the NUMA related tunables would go in the numatune --> <define name="numatune"> <element name="numatune"> + <interleave> <optional> <element name="memory"> <optional> @@ -790,7 +791,6 @@ </element> </optional> <zeroOrMore> - <interleave> <element name="memnode"> <attribute name="cellid"> <ref name="unsignedInt"/> @@ -806,8 +806,8 @@ <ref name='cpuset'/> </attribute> </element> - </interleave> </zeroOrMore> + </interleave> </element> </define>
Seeing this diff I see what I did wrong. Completely wrong to be accurate. It wasn't my day either, hopefully today will be better. ACK from me. Martin

On 07/16/2014 09:38 PM, Martin Kletzander wrote:
<interleave> makes no difference here. As the <zeroOrMore> has only one <element> child, there is nothing to be interleaved. I meant for it to go one level higher, outside the <zeroOrMore>, where it can also interleave with <memory>.
As penance, I'm proposing this followup:
Seeing this diff I see what I did wrong. Completely wrong to be accurate. It wasn't my day either, hopefully today will be better.
ACK from me.
Pushed, including the testsuite enhancement. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- src/conf/numatune_conf.c | 111 ++++++++++++++++++++++++++++++++++++++++------- src/conf/numatune_conf.h | 14 ++++-- src/libvirt_private.syms | 1 + src/lxc/lxc_cgroup.c | 3 +- src/qemu/qemu_cgroup.c | 2 +- src/qemu/qemu_driver.c | 10 ++--- src/util/virnuma.c | 6 +-- 7 files changed, 117 insertions(+), 30 deletions(-) diff --git a/src/conf/numatune_conf.c b/src/conf/numatune_conf.c index a39c028..82418aa 100644 --- a/src/conf/numatune_conf.c +++ b/src/conf/numatune_conf.c @@ -63,6 +63,18 @@ struct _virDomainNumatune { }; +static inline bool +virDomainNumatuneNodeSpecified(virDomainNumatunePtr numatune, + int cellid) +{ + if (numatune && + cellid >= 0 && + cellid < numatune->nmem_nodes) + return numatune->mem_nodes[cellid].nodeset; + + return false; +} + static int virDomainNumatuneNodeParseXML(virDomainDefPtr def, xmlXPathContextPtr ctxt) @@ -330,26 +342,37 @@ virDomainNumatuneFree(virDomainNumatunePtr numatune) } virDomainNumatuneMemMode -virDomainNumatuneGetMode(virDomainNumatunePtr numatune) +virDomainNumatuneGetMode(virDomainNumatunePtr numatune, + int cellid) { - return (numatune && numatune->memory.specified) ? numatune->memory.mode : 0; + if (!numatune) + return 0; + + if (virDomainNumatuneNodeSpecified(numatune, cellid)) + return numatune->mem_nodes[cellid].mode; + + if (numatune->memory.specified) + return numatune->memory.mode; + + return 0; } virBitmapPtr virDomainNumatuneGetNodeset(virDomainNumatunePtr numatune, - virBitmapPtr auto_nodeset) + virBitmapPtr auto_nodeset, + int cellid) { if (!numatune) return NULL; - if (numatune->memory.placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO) + if (numatune->memory.specified && + numatune->memory.placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO) return auto_nodeset; - /* - * This weird logic has the same meaning as switch with - * auto/static/default, but can be more readably changed later. - */ - if (numatune->memory.placement != VIR_DOMAIN_NUMATUNE_PLACEMENT_STATIC) + if (virDomainNumatuneNodeSpecified(numatune, cellid)) + return numatune->mem_nodes[cellid].nodeset; + + if (!numatune->memory.specified) return NULL; return numatune->memory.nodeset; @@ -357,23 +380,31 @@ virDomainNumatuneGetNodeset(virDomainNumatunePtr numatune, char * virDomainNumatuneFormatNodeset(virDomainNumatunePtr numatune, - virBitmapPtr auto_nodeset) + virBitmapPtr auto_nodeset, + int cellid) { return virBitmapFormat(virDomainNumatuneGetNodeset(numatune, - auto_nodeset)); + auto_nodeset, + cellid)); } int virDomainNumatuneMaybeFormatNodeset(virDomainNumatunePtr numatune, virBitmapPtr auto_nodeset, - char **mask) + char **mask, + int cellid) { *mask = NULL; if (!numatune) return 0; - if (numatune->memory.placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO && + if (!virDomainNumatuneNodeSpecified(numatune, cellid) && + !numatune->memory.specified) + return 0; + + if (numatune->memory.specified && + numatune->memory.placement == VIR_DOMAIN_NUMATUNE_PLACEMENT_AUTO && !auto_nodeset) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("Advice from numad is needed in case of " @@ -381,7 +412,7 @@ virDomainNumatuneMaybeFormatNodeset(virDomainNumatunePtr numatune, return -1; } - *mask = virDomainNumatuneFormatNodeset(numatune, auto_nodeset); + *mask = virDomainNumatuneFormatNodeset(numatune, auto_nodeset, cellid); if (!*mask) return -1; @@ -475,6 +506,35 @@ virDomainNumatuneSet(virDomainDefPtr def, return ret; } +static bool +virDomainNumatuneNodesEqual(virDomainNumatunePtr n1, + virDomainNumatunePtr n2) +{ + size_t i = 0; + + if (n1->nmem_nodes != n2->nmem_nodes) + return false; + + for (i = 0; i < n1->nmem_nodes; i++) { + virDomainNumatuneNodePtr nd1 = &n1->mem_nodes[i]; + virDomainNumatuneNodePtr nd2 = &n2->mem_nodes[i]; + + if (!nd1->nodeset && !nd2->nodeset) + continue; + + if (!nd1->nodeset || !nd2->nodeset) + return false; + + if (nd1->mode != nd2->mode) + return false; + + if (!virBitmapEqual(nd1->nodeset, nd2->nodeset)) + return false; + } + + return true; +} + bool virDomainNumatuneEquals(virDomainNumatunePtr n1, virDomainNumatunePtr n2) @@ -486,7 +546,7 @@ virDomainNumatuneEquals(virDomainNumatunePtr n1, return false; if (!n1->memory.specified && !n2->memory.specified) - return true; + return virDomainNumatuneNodesEqual(n1, n2); if (!n1->memory.specified || !n2->memory.specified) return false; @@ -497,7 +557,10 @@ virDomainNumatuneEquals(virDomainNumatunePtr n1, if (n1->memory.placement != n2->memory.placement) return false; - return virBitmapEqual(n1->memory.nodeset, n2->memory.nodeset); + if (!virBitmapEqual(n1->memory.nodeset, n2->memory.nodeset)) + return false; + + return virDomainNumatuneNodesEqual(n1, n2); } bool @@ -514,3 +577,19 @@ virDomainNumatuneHasPlacementAuto(virDomainNumatunePtr numatune) return false; } + +bool +virDomainNumatuneHasPerNodeBinding(virDomainNumatunePtr numatune) +{ + size_t i = 0; + + if (!numatune) + return false; + + for (i = 0; i < numatune->nmem_nodes; i++) { + if (numatune->mem_nodes[i].nodeset) + return true; + } + + return false; +} diff --git a/src/conf/numatune_conf.h b/src/conf/numatune_conf.h index 888cff1..c86118f 100644 --- a/src/conf/numatune_conf.h +++ b/src/conf/numatune_conf.h @@ -69,20 +69,24 @@ int virDomainNumatuneFormatXML(virBufferPtr buf, virDomainNumatunePtr numatune) /* * Getters */ -virDomainNumatuneMemMode virDomainNumatuneGetMode(virDomainNumatunePtr numatune); +virDomainNumatuneMemMode virDomainNumatuneGetMode(virDomainNumatunePtr numatune, + int cellid); virBitmapPtr virDomainNumatuneGetNodeset(virDomainNumatunePtr numatune, - virBitmapPtr auto_nodeset); + virBitmapPtr auto_nodeset, + int cellid); /* * Formatters */ char *virDomainNumatuneFormatNodeset(virDomainNumatunePtr numatune, - virBitmapPtr auto_nodeset); + virBitmapPtr auto_nodeset, + int cellid); int virDomainNumatuneMaybeFormatNodeset(virDomainNumatunePtr numatune, virBitmapPtr auto_nodeset, - char **mask); + char **mask, + int cellid); /* * Setters @@ -99,4 +103,6 @@ bool virDomainNumatuneEquals(virDomainNumatunePtr n1, bool virDomainNumatuneHasPlacementAuto(virDomainNumatunePtr numatune); +bool virDomainNumatuneHasPerNodeBinding(virDomainNumatunePtr numatune); + #endif /* __NUMATUNE_CONF_H__ */ diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 0494d9d..8d3671c 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -614,6 +614,7 @@ virDomainNumatuneFormatXML; virDomainNumatuneFree; virDomainNumatuneGetMode; virDomainNumatuneGetNodeset; +virDomainNumatuneHasPerNodeBinding; virDomainNumatuneHasPlacementAuto; virDomainNumatuneMaybeFormatNodeset; virDomainNumatuneMemModeTypeFromString; diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c index cc93823..395ea05 100644 --- a/src/lxc/lxc_cgroup.c +++ b/src/lxc/lxc_cgroup.c @@ -79,7 +79,8 @@ static int virLXCCgroupSetupCpusetTune(virDomainDefPtr def, goto cleanup; } - if (virDomainNumatuneMaybeFormatNodeset(def->numatune, nodemask, &mask) < 0) + if (virDomainNumatuneMaybeFormatNodeset(def->numatune, nodemask, + &mask, -1) < 0) goto cleanup; if (mask && virCgroupSetCpusetMems(cgroup, mask) < 0) diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index dd393ee..40fe448 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -603,7 +603,7 @@ qemuSetupCpusetCgroup(virDomainObjPtr vm, if (virDomainNumatuneMaybeFormatNodeset(vm->def->numatune, nodemask, - &mem_mask) < 0) + &mem_mask, -1) < 0) goto cleanup; if (mem_mask && diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index e166ca4..33541d3 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -8637,7 +8637,7 @@ qemuDomainSetNumaParamsLive(virDomainObjPtr vm, size_t i = 0; int ret = -1; - if (virDomainNumatuneGetMode(vm->def->numatune) != + if (virDomainNumatuneGetMode(vm->def->numatune, -1) != VIR_DOMAIN_NUMATUNE_MEM_STRICT) { virReportError(VIR_ERR_OPERATION_INVALID, "%s", _("change of nodeset for running domain " @@ -8778,7 +8778,7 @@ qemuDomainSetNumaParameters(virDomainPtr dom, if (flags & VIR_DOMAIN_AFFECT_LIVE) { if (mode != -1 && - virDomainNumatuneGetMode(vm->def->numatune) != mode) { + virDomainNumatuneGetMode(vm->def->numatune, -1) != mode) { virReportError(VIR_ERR_OPERATION_INVALID, "%s", _("can't change numatune mode for running domain")); goto cleanup; @@ -8874,15 +8874,15 @@ qemuDomainGetNumaParameters(virDomainPtr dom, goto cleanup; if (flags & VIR_DOMAIN_AFFECT_CONFIG) - param->value.i = virDomainNumatuneGetMode(persistentDef->numatune); + param->value.i = virDomainNumatuneGetMode(persistentDef->numatune, -1); else - param->value.i = virDomainNumatuneGetMode(vm->def->numatune); + param->value.i = virDomainNumatuneGetMode(vm->def->numatune, -1); break; case 1: /* fill numa nodeset here */ if (flags & VIR_DOMAIN_AFFECT_CONFIG) { nodeset = virDomainNumatuneFormatNodeset(persistentDef->numatune, - NULL); + NULL, -1); if (!nodeset) goto cleanup; } else { diff --git a/src/util/virnuma.c b/src/util/virnuma.c index 087f679..46f48d2 100644 --- a/src/util/virnuma.c +++ b/src/util/virnuma.c @@ -98,7 +98,7 @@ virNumaSetupMemoryPolicy(virDomainNumatunePtr numatune, int maxnode = 0; virBitmapPtr tmp_nodemask = NULL; - tmp_nodemask = virDomainNumatuneGetNodeset(numatune, nodemask); + tmp_nodemask = virDomainNumatuneGetNodeset(numatune, nodemask, -1); if (!tmp_nodemask) return 0; @@ -123,7 +123,7 @@ virNumaSetupMemoryPolicy(virDomainNumatunePtr numatune, nodemask_set(&mask, bit); } - switch (virDomainNumatuneGetMode(numatune)) { + switch (virDomainNumatuneGetMode(numatune, -1)) { case VIR_DOMAIN_NUMATUNE_MEM_STRICT: numa_set_bind_policy(1); numa_set_membind(&mask); @@ -320,7 +320,7 @@ int virNumaSetupMemoryPolicy(virDomainNumatunePtr numatune, virBitmapPtr nodemask ATTRIBUTE_UNUSED) { - if (virDomainNumatuneGetNodeset(numatune, NULL)) { + if (virDomainNumatuneGetNodeset(numatune, NULL, -1)) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("libvirt is compiled without NUMA tuning support")); -- 2.0.0

That can be lately achieved with by having .param == NULL in the virQEMUCapsCommandLineProps struct. Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- src/qemu/qemu_capabilities.c | 10 ++++++++-- src/qemu/qemu_monitor.c | 6 ++++-- src/qemu/qemu_monitor.h | 3 ++- src/qemu/qemu_monitor_json.c | 8 +++++++- src/qemu/qemu_monitor_json.h | 3 ++- tests/qemumonitorjsontest.c | 20 +++++++++++++++++--- 6 files changed, 40 insertions(+), 10 deletions(-) diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index c5fef53..a8d4648 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -2429,6 +2429,7 @@ static int virQEMUCapsProbeQMPCommandLine(virQEMUCapsPtr qemuCaps, qemuMonitorPtr mon) { + bool found = false; int nvalues; char **values; size_t i, j; @@ -2436,10 +2437,15 @@ virQEMUCapsProbeQMPCommandLine(virQEMUCapsPtr qemuCaps, for (i = 0; i < ARRAY_CARDINALITY(virQEMUCapsCommandLine); i++) { if ((nvalues = qemuMonitorGetCommandLineOptionParameters(mon, virQEMUCapsCommandLine[i].option, - &values)) < 0) + &values, + &found)) < 0) return -1; + + if (found && !virQEMUCapsCommandLine[i].param) + virQEMUCapsSet(qemuCaps, virQEMUCapsCommandLine[i].flag); + for (j = 0; j < nvalues; j++) { - if (STREQ(virQEMUCapsCommandLine[i].param, values[j])) { + if (STREQ_NULLABLE(virQEMUCapsCommandLine[i].param, values[j])) { virQEMUCapsSet(qemuCaps, virQEMUCapsCommandLine[i].flag); break; } diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c index db3dd73..3d9f87b 100644 --- a/src/qemu/qemu_monitor.c +++ b/src/qemu/qemu_monitor.c @@ -3659,7 +3659,8 @@ int qemuMonitorGetEvents(qemuMonitorPtr mon, int qemuMonitorGetCommandLineOptionParameters(qemuMonitorPtr mon, const char *option, - char ***params) + char ***params, + bool *found) { VIR_DEBUG("mon=%p option=%s params=%p", mon, option, params); @@ -3675,7 +3676,8 @@ qemuMonitorGetCommandLineOptionParameters(qemuMonitorPtr mon, return -1; } - return qemuMonitorJSONGetCommandLineOptionParameters(mon, option, params); + return qemuMonitorJSONGetCommandLineOptionParameters(mon, option, + params, found); } diff --git a/src/qemu/qemu_monitor.h b/src/qemu/qemu_monitor.h index 8a23267..c3695f2 100644 --- a/src/qemu/qemu_monitor.h +++ b/src/qemu/qemu_monitor.h @@ -748,7 +748,8 @@ int qemuMonitorGetEvents(qemuMonitorPtr mon, char ***events); int qemuMonitorGetCommandLineOptionParameters(qemuMonitorPtr mon, const char *option, - char ***params); + char ***params, + bool *found); int qemuMonitorGetKVMState(qemuMonitorPtr mon, bool *enabled, diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c index bef6a14..a62c02f 100644 --- a/src/qemu/qemu_monitor_json.c +++ b/src/qemu/qemu_monitor_json.c @@ -4504,7 +4504,8 @@ int qemuMonitorJSONGetEvents(qemuMonitorPtr mon, int qemuMonitorJSONGetCommandLineOptionParameters(qemuMonitorPtr mon, const char *option, - char ***params) + char ***params, + bool *found) { int ret; virJSONValuePtr cmd = NULL; @@ -4516,6 +4517,8 @@ qemuMonitorJSONGetCommandLineOptionParameters(qemuMonitorPtr mon, size_t i; *params = NULL; + if (found) + *found = false; /* query-command-line-options has fixed output for a given qemu * binary; but since callers want to query parameters for one @@ -4576,6 +4579,9 @@ qemuMonitorJSONGetCommandLineOptionParameters(qemuMonitorPtr mon, goto cleanup; } + if (found) + *found = true; + if ((n = virJSONValueArraySize(data)) < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("query-command-line-options parameter data was not " diff --git a/src/qemu/qemu_monitor_json.h b/src/qemu/qemu_monitor_json.h index 385211c..5f6c846 100644 --- a/src/qemu/qemu_monitor_json.h +++ b/src/qemu/qemu_monitor_json.h @@ -332,7 +332,8 @@ int qemuMonitorJSONGetEvents(qemuMonitorPtr mon, ATTRIBUTE_NONNULL(2); int qemuMonitorJSONGetCommandLineOptionParameters(qemuMonitorPtr mon, const char *option, - char ***params) + char ***params, + bool *found) ATTRIBUTE_NONNULL(2) ATTRIBUTE_NONNULL(3); int qemuMonitorJSONGetKVMState(qemuMonitorPtr mon, diff --git a/tests/qemumonitorjsontest.c b/tests/qemumonitorjsontest.c index 6debe13..baee80a 100644 --- a/tests/qemumonitorjsontest.c +++ b/tests/qemumonitorjsontest.c @@ -583,6 +583,7 @@ testQemuMonitorJSONGetCommandLineOptionParameters(const void *data) int ret = -1; char **params = NULL; int nparams = 0; + bool found = false; if (!test) return -1; @@ -604,7 +605,8 @@ testQemuMonitorJSONGetCommandLineOptionParameters(const void *data) /* present with params */ if ((nparams = qemuMonitorGetCommandLineOptionParameters(qemuMonitorTestGetMonitor(test), "option-rom", - ¶ms)) < 0) + ¶ms, + NULL)) < 0) goto cleanup; if (nparams != 2) { @@ -634,7 +636,8 @@ testQemuMonitorJSONGetCommandLineOptionParameters(const void *data) /* present but empty */ if ((nparams = qemuMonitorGetCommandLineOptionParameters(qemuMonitorTestGetMonitor(test), "acpi", - ¶ms)) < 0) + ¶ms, + &found)) < 0) goto cleanup; if (nparams != 0) { @@ -642,6 +645,11 @@ testQemuMonitorJSONGetCommandLineOptionParameters(const void *data) "nparams was %d, expected 0", nparams); goto cleanup; } + if (!found) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + "found was false, expected true"); + goto cleanup; + } if (params && params[0]) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", "unexpected array contents"); @@ -654,7 +662,8 @@ testQemuMonitorJSONGetCommandLineOptionParameters(const void *data) /* no such option */ if ((nparams = qemuMonitorGetCommandLineOptionParameters(qemuMonitorTestGetMonitor(test), "foobar", - ¶ms)) < 0) + ¶ms, + &found)) < 0) goto cleanup; if (nparams != 0) { @@ -662,6 +671,11 @@ testQemuMonitorJSONGetCommandLineOptionParameters(const void *data) "nparams was %d, expected 0", nparams); goto cleanup; } + if (found) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + "found was true, expected false"); + goto cleanup; + } if (params && params[0]) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", "unexpected array contents"); -- 2.0.0

The numa patch series in qemu adds "memory-backend-ram" object type by which we can tell whether we can use such objects. Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- src/qemu/qemu_capabilities.c | 3 +++ src/qemu/qemu_capabilities.h | 1 + 2 files changed, 4 insertions(+) diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index a8d4648..c1a8947 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -260,6 +260,8 @@ VIR_ENUM_IMPL(virQEMUCaps, QEMU_CAPS_LAST, "msg-timestamp", "active-commit", "change-backing-file", + + "memory-backend-ram", /* 170 */ ); @@ -1477,6 +1479,7 @@ struct virQEMUCapsStringFlags virQEMUCapsObjectTypes[] = { { "ich9-intel-hda", QEMU_CAPS_DEVICE_ICH9_INTEL_HDA }, { "pvpanic", QEMU_CAPS_DEVICE_PANIC }, { "usb-kbd", QEMU_CAPS_DEVICE_USB_KBD }, + { "memory-backend-ram", QEMU_CAPS_OBJECT_MEMORY_RAM }, }; static struct virQEMUCapsStringFlags virQEMUCapsObjectPropsVirtioBlk[] = { diff --git a/src/qemu/qemu_capabilities.h b/src/qemu/qemu_capabilities.h index 99cf9ed..c661d5a 100644 --- a/src/qemu/qemu_capabilities.h +++ b/src/qemu/qemu_capabilities.h @@ -209,6 +209,7 @@ typedef enum { QEMU_CAPS_MSG_TIMESTAMP = 167, /* -msg timestamp */ QEMU_CAPS_ACTIVE_COMMIT = 168, /* block-commit works without 'top' */ QEMU_CAPS_CHANGE_BACKING_FILE = 169, /* change name of backing file in metadata */ + QEMU_CAPS_OBJECT_MEMORY_RAM = 170, /* -object memory-backend-ram */ QEMU_CAPS_LAST, /* this must always be the last item */ } virQEMUCapsFlags; -- 2.0.0

When qemu switched to using OptsVisitor for -numa parameter, it did two things in the same patch. One of them is that the numa parameter is now visible in "query-command-line-options", the second one is that it enabled using disjoint cpu ranges for -numa specification. This will be used in later patch. Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- src/qemu/qemu_capabilities.c | 2 ++ src/qemu/qemu_capabilities.h | 1 + tests/qemucapabilitiesdata/caps_1.6.50-1.caps | 1 + tests/qemucapabilitiesdata/caps_1.6.50-1.replies | 5 +++++ 4 files changed, 9 insertions(+) diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index c1a8947..07306e5 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -262,6 +262,7 @@ VIR_ENUM_IMPL(virQEMUCaps, QEMU_CAPS_LAST, "change-backing-file", "memory-backend-ram", /* 170 */ + "numa", ); @@ -2426,6 +2427,7 @@ static struct virQEMUCapsCommandLineProps virQEMUCapsCommandLine[] = { { "boot-opts", "reboot-timeout", QEMU_CAPS_REBOOT_TIMEOUT }, { "spice", "disable-agent-file-xfer", QEMU_CAPS_SPICE_FILE_XFER_DISABLE }, { "msg", "timestamp", QEMU_CAPS_MSG_TIMESTAMP }, + { "numa", NULL, QEMU_CAPS_NUMA }, }; static int diff --git a/src/qemu/qemu_capabilities.h b/src/qemu/qemu_capabilities.h index c661d5a..4332633 100644 --- a/src/qemu/qemu_capabilities.h +++ b/src/qemu/qemu_capabilities.h @@ -210,6 +210,7 @@ typedef enum { QEMU_CAPS_ACTIVE_COMMIT = 168, /* block-commit works without 'top' */ QEMU_CAPS_CHANGE_BACKING_FILE = 169, /* change name of backing file in metadata */ QEMU_CAPS_OBJECT_MEMORY_RAM = 170, /* -object memory-backend-ram */ + QEMU_CAPS_NUMA = 171, /* newer -numa handling with disjoint cpu ranges */ QEMU_CAPS_LAST, /* this must always be the last item */ } virQEMUCapsFlags; diff --git a/tests/qemucapabilitiesdata/caps_1.6.50-1.caps b/tests/qemucapabilitiesdata/caps_1.6.50-1.caps index 32bccdb..4b9f693 100644 --- a/tests/qemucapabilitiesdata/caps_1.6.50-1.caps +++ b/tests/qemucapabilitiesdata/caps_1.6.50-1.caps @@ -142,4 +142,5 @@ <flag name='usb-kbd'/> <flag name='host-pci-multidomain'/> <flag name='msg-timestamp'/> + <flag name='numa'/> </qemuCaps> diff --git a/tests/qemucapabilitiesdata/caps_1.6.50-1.replies b/tests/qemucapabilitiesdata/caps_1.6.50-1.replies index a60542a..ec7451f 100644 --- a/tests/qemucapabilitiesdata/caps_1.6.50-1.replies +++ b/tests/qemucapabilitiesdata/caps_1.6.50-1.replies @@ -2389,6 +2389,11 @@ { "parameters": [ ], + "option": "numa" + }, + { + "parameters": [ + ], "option": "device" }, { -- 2.0.0

Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- src/qemu/qemu_command.c | 26 ++++++++++---------- .../qemuxml2argv-cpu-numa-disjoint.args | 6 +++++ .../qemuxml2argv-cpu-numa-disjoint.xml | 28 ++++++++++++++++++++++ tests/qemuxml2argvtest.c | 2 ++ tests/qemuxml2xmltest.c | 1 + 5 files changed, 51 insertions(+), 12 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-cpu-numa-disjoint.args create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-cpu-numa-disjoint.xml diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index bf0f56e..f1dbb34 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -6376,11 +6376,13 @@ qemuBuildSmpArgStr(const virDomainDef *def, } static int -qemuBuildNumaArgStr(const virDomainDef *def, virCommandPtr cmd) +qemuBuildNumaArgStr(const virDomainDef *def, + virCommandPtr cmd, + virQEMUCapsPtr qemuCaps) { size_t i; virBuffer buf = VIR_BUFFER_INITIALIZER; - char *cpumask = NULL; + char *cpumask = NULL, *tmpmask = NULL, *next = NULL; int ret = -1; for (i = 0; i < def->cpu->ncells; i++) { @@ -6392,7 +6394,8 @@ qemuBuildNumaArgStr(const virDomainDef *def, virCommandPtr cmd) if (!(cpumask = virBitmapFormat(def->cpu->cells[i].cpumask))) goto cleanup; - if (strchr(cpumask, ',')) { + if (strchr(cpumask, ',') && + !virQEMUCapsGet(qemuCaps, QEMU_CAPS_NUMA)) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", _("disjoint NUMA cpu ranges are not supported " "with this QEMU")); @@ -6401,15 +6404,14 @@ qemuBuildNumaArgStr(const virDomainDef *def, virCommandPtr cmd) virCommandAddArg(cmd, "-numa"); virBufferAsprintf(&buf, "node,nodeid=%zu", i); - virBufferAddLit(&buf, ",cpus="); - /* Up through qemu 1.4, -numa does not accept a cpus - * argument any more complex than start-stop. - * - * XXX For qemu 1.5, the syntax has not yet been decided; - * but when it is, we need a capability bit and - * translation of our cpumask into the qemu syntax. */ - virBufferAdd(&buf, cpumask, -1); + for (tmpmask = cpumask; tmpmask; tmpmask = next) { + if ((next = strchr(tmpmask, ','))) + *(next++) = '\0'; + virBufferAddLit(&buf, ",cpus="); + virBufferAdd(&buf, tmpmask, -1); + } + virBufferAsprintf(&buf, ",mem=%d", cellmem); virCommandAddArgBuffer(cmd, &buf); @@ -7240,7 +7242,7 @@ qemuBuildCommandLine(virConnectPtr conn, VIR_FREE(smp); if (def->cpu && def->cpu->ncells) - if (qemuBuildNumaArgStr(def, cmd) < 0) + if (qemuBuildNumaArgStr(def, cmd, qemuCaps) < 0) goto error; if (virQEMUCapsGet(qemuCaps, QEMU_CAPS_UUID)) diff --git a/tests/qemuxml2argvdata/qemuxml2argv-cpu-numa-disjoint.args b/tests/qemuxml2argvdata/qemuxml2argv-cpu-numa-disjoint.args new file mode 100644 index 0000000..073e84b --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-cpu-numa-disjoint.args @@ -0,0 +1,6 @@ +LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test QEMU_AUDIO_DRV=none \ +/usr/bin/qemu -S -M pc \ +-m 214 -smp 16 -numa node,nodeid=0,cpus=0-3,cpus=8-11,mem=107 \ +-numa node,nodeid=1,cpus=4-7,cpus=12-15,mem=107 -nographic -monitor \ +unix:/tmp/test-monitor,server,nowait -no-acpi -boot n -usb -net none -serial none \ +-parallel none diff --git a/tests/qemuxml2argvdata/qemuxml2argv-cpu-numa-disjoint.xml b/tests/qemuxml2argvdata/qemuxml2argv-cpu-numa-disjoint.xml new file mode 100644 index 0000000..474a238 --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-cpu-numa-disjoint.xml @@ -0,0 +1,28 @@ +<domain type='qemu'> + <name>QEMUGuest1</name> + <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid> + <memory unit='KiB'>219100</memory> + <currentMemory unit='KiB'>219100</currentMemory> + <vcpu placement='static'>16</vcpu> + <os> + <type arch='x86_64' machine='pc'>hvm</type> + <boot dev='network'/> + </os> + <cpu> + <topology sockets='2' cores='4' threads='2'/> + <numa> + <cell id='0' cpus='0-3,8-11' memory='109550'/> + <cell id='1' cpus='4-7,12-15' memory='109550'/> + </numa> + </cpu> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu</emulator> + <controller type='usb' index='0'/> + <controller type='pci' index='0' model='pci-root'/> + <memballoon model='virtio'/> + </devices> +</domain> diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c index 308a376..56d3fd5 100644 --- a/tests/qemuxml2argvtest.c +++ b/tests/qemuxml2argvtest.c @@ -1178,6 +1178,8 @@ mymain(void) DO_TEST("cpu-numa1", NONE); DO_TEST("cpu-numa2", QEMU_CAPS_SMP_TOPOLOGY); DO_TEST_PARSE_ERROR("cpu-numa3", NONE); + DO_TEST_FAILURE("cpu-numa-disjoint", NONE); + DO_TEST("cpu-numa-disjoint", QEMU_CAPS_NUMA); DO_TEST("cpu-host-model", NONE); skipLegacyCPUs = true; DO_TEST("cpu-host-model-fallback", NONE); diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c index 7398f16..ac073ac 100644 --- a/tests/qemuxml2xmltest.c +++ b/tests/qemuxml2xmltest.c @@ -371,6 +371,7 @@ mymain(void) DO_TEST_DIFFERENT("cpu-numa1"); DO_TEST_DIFFERENT("cpu-numa2"); + DO_TEST("cpu-numa-disjoint"); DO_TEST_DIFFERENT("numatune-auto-prefer"); DO_TEST_DIFFERENT("numatune-memnode"); -- 2.0.0

Currently, we only bind the whole QEMU domain to memory nodes specified in nodemask altogether. That, however, doesn't make much sense when one wants to control from where the memory for particular guest nodes should be allocated. QEMU allows us to do that by specifying 'host-nodes' parameter for the 'memory-backend-ram' object, so let's use that. Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- src/qemu/qemu_command.c | 59 +++++++++++++++++++++- .../qemuxml2argv-numatune-memnode-no-memory.args | 8 +++ .../qemuxml2argv-numatune-memnode.args | 11 ++++ tests/qemuxml2argvtest.c | 7 +++ 4 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode-no-memory.args create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode.args diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index f1dbb34..6235a74 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -150,6 +150,11 @@ VIR_ENUM_IMPL(qemuDomainFSDriver, VIR_DOMAIN_FS_DRIVER_TYPE_LAST, NULL, NULL); +VIR_ENUM_DECL(qemuNumaPolicy) +VIR_ENUM_IMPL(qemuNumaPolicy, VIR_DOMAIN_NUMATUNE_MEM_LAST, + "bind", + "preferred", + "interleave"); /** * qemuPhysIfaceConnect: @@ -6383,13 +6388,23 @@ qemuBuildNumaArgStr(const virDomainDef *def, size_t i; virBuffer buf = VIR_BUFFER_INITIALIZER; char *cpumask = NULL, *tmpmask = NULL, *next = NULL; + char *nodemask = NULL; int ret = -1; + if (virDomainNumatuneHasPerNodeBinding(def->numatune) && + !virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_RAM)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Per-node memory binding is not supported " + "with this QEMU")); + goto cleanup; + } + for (i = 0; i < def->cpu->ncells; i++) { int cellmem = VIR_DIV_UP(def->cpu->cells[i].mem, 1024); def->cpu->cells[i].mem = cellmem * 1024; VIR_FREE(cpumask); + VIR_FREE(nodemask); if (!(cpumask = virBitmapFormat(def->cpu->cells[i].cpumask))) goto cleanup; @@ -6402,6 +6417,43 @@ qemuBuildNumaArgStr(const virDomainDef *def, goto cleanup; } + if (virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_RAM)) { + virDomainNumatuneMemMode mode; + const char *policy = NULL; + + mode = virDomainNumatuneGetMode(def->numatune, i); + policy = qemuNumaPolicyTypeToString(mode); + + virBufferAsprintf(&buf, "memory-backend-ram,size=%dM,id=ram-node%zu", + cellmem, i); + + if (virDomainNumatuneMaybeFormatNodeset(def->numatune, NULL, + &nodemask, i) < 0) + goto cleanup; + + if (nodemask) { + if (strchr(nodemask, ',') && + !virQEMUCapsGet(qemuCaps, QEMU_CAPS_NUMA)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("disjoint NUMA node ranges are not supported " + "with this QEMU")); + goto cleanup; + } + + for (tmpmask = nodemask; tmpmask; tmpmask = next) { + if ((next = strchr(tmpmask, ','))) + *(next++) = '\0'; + virBufferAddLit(&buf, ",host-nodes="); + virBufferAdd(&buf, tmpmask, -1); + } + + virBufferAsprintf(&buf, ",policy=%s", policy); + } + + virCommandAddArg(cmd, "-object"); + virCommandAddArgBuffer(cmd, &buf); + } + virCommandAddArg(cmd, "-numa"); virBufferAsprintf(&buf, "node,nodeid=%zu", i); @@ -6412,7 +6464,11 @@ qemuBuildNumaArgStr(const virDomainDef *def, virBufferAdd(&buf, tmpmask, -1); } - virBufferAsprintf(&buf, ",mem=%d", cellmem); + if (virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_RAM)) { + virBufferAsprintf(&buf, ",memdev=ram-node%zu", i); + } else { + virBufferAsprintf(&buf, ",mem=%d", cellmem); + } virCommandAddArgBuffer(cmd, &buf); } @@ -6420,6 +6476,7 @@ qemuBuildNumaArgStr(const virDomainDef *def, cleanup: VIR_FREE(cpumask); + VIR_FREE(nodemask); virBufferFreeAndReset(&buf); return ret; } diff --git a/tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode-no-memory.args b/tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode-no-memory.args new file mode 100644 index 0000000..b0e274c --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode-no-memory.args @@ -0,0 +1,8 @@ +LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test QEMU_AUDIO_DRV=none \ +/usr/bin/kvm -S -M pc -m 64 -smp 2 \ +-object memory-backend-ram,size=32M,id=ram-node0,host-nodes=3,policy=preferred \ +-numa node,nodeid=0,cpus=0,memdev=ram-node0 \ +-object memory-backend-ram,size=32M,id=ram-node1 \ +-numa node,nodeid=1,cpus=1,memdev=ram-node1 \ +-nographic -monitor unix:/tmp/test-monitor,server,nowait \ +-no-acpi -boot c -usb -net none -serial none -parallel none diff --git a/tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode.args b/tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode.args new file mode 100644 index 0000000..e4beb98 --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode.args @@ -0,0 +1,11 @@ +LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test QEMU_AUDIO_DRV=none \ +/usr/bin/kvm -S -M pc -m 24104 -smp 32 \ +-object memory-backend-ram,size=20M,id=ram-node0,host-nodes=3,policy=preferred \ +-numa node,nodeid=0,cpus=0,memdev=ram-node0 \ +-object memory-backend-ram,size=645M,id=ram-node1,host-nodes=0-7,policy=bind \ +-numa node,nodeid=1,cpus=1-27,cpus=29,memdev=ram-node1 \ +-object memory-backend-ram,size=23440M,id=ram-node2,\ +host-nodes=1-2,host-nodes=5,host-nodes=7,policy=bind \ +-numa node,nodeid=2,cpus=28,cpus=30-31,memdev=ram-node2 \ +-nographic -monitor unix:/tmp/test-monitor,server,nowait \ +-no-acpi -boot c -usb -net none -serial none -parallel none diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c index 56d3fd5..fb96d80 100644 --- a/tests/qemuxml2argvtest.c +++ b/tests/qemuxml2argvtest.c @@ -1196,7 +1196,14 @@ mymain(void) DO_TEST("blkiotune-device", QEMU_CAPS_NAME); DO_TEST("cputune", QEMU_CAPS_NAME); DO_TEST("cputune-zero-shares", QEMU_CAPS_NAME); + DO_TEST("numatune-memory", NONE); + DO_TEST("numatune-memnode", QEMU_CAPS_NUMA, QEMU_CAPS_OBJECT_MEMORY_RAM); + DO_TEST_FAILURE("numatune-memnode", NONE); + + DO_TEST("numatune-memnode-no-memory", QEMU_CAPS_NUMA, QEMU_CAPS_OBJECT_MEMORY_RAM); + DO_TEST_FAILURE("numatune-memnode-no-memory", NONE); + DO_TEST("numatune-auto-nodeset-invalid", NONE); DO_TEST_PARSE_ERROR("numatune-memnode-nocpu", NONE); DO_TEST_PARSE_ERROR("numatune-memnodes-problematic", NONE); -- 2.0.0

Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- src/qemu/qemu_cgroup.c | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 40fe448..e95ad17 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -589,13 +589,11 @@ qemuSetupDevicesCgroup(virQEMUDriverPtr driver, static int -qemuSetupCpusetCgroup(virDomainObjPtr vm, - virBitmapPtr nodemask, - virCapsPtr caps) +qemuSetupCpusetMems(virDomainObjPtr vm, + virBitmapPtr nodemask) { qemuDomainObjPrivatePtr priv = vm->privateData; char *mem_mask = NULL; - char *cpu_mask = NULL; int ret = -1; if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) @@ -610,6 +608,28 @@ qemuSetupCpusetCgroup(virDomainObjPtr vm, virCgroupSetCpusetMems(priv->cgroup, mem_mask) < 0) goto cleanup; + ret = 0; + cleanup: + VIR_FREE(mem_mask); + return ret; +} + + +static int +qemuSetupCpusetCgroup(virDomainObjPtr vm, + virBitmapPtr nodemask, + virCapsPtr caps) +{ + qemuDomainObjPrivatePtr priv = vm->privateData; + char *cpu_mask = NULL; + int ret = -1; + + if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) + return 0; + + if (qemuSetupCpusetMems(vm, nodemask) < 0) + goto cleanup; + if (vm->def->cpumask || (vm->def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO)) { @@ -632,7 +652,6 @@ qemuSetupCpusetCgroup(virDomainObjPtr vm, ret = 0; cleanup: - VIR_FREE(mem_mask); VIR_FREE(cpu_mask); return ret; } -- 2.0.0

When domain is started with numatune memory mode strict and the nodeset does not include host NUMA node with DMA and DMA32 zones, KVM initialization fails. This is because cgroup restrict even kernel allocations. We are already doing numa_set_membind() which does the same thing, only it does not restrict kernel allocations. This patch leaves the userspace numa_set_membind() in place and moves the cpuset.mems setting after the point where monitor comes up, but before vcpu and emulator sub-groups are created. Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- Notes: Another approach would be not using cgroups for this at all; it should still work as expected. src/qemu/qemu_cgroup.c | 10 +++++++--- src/qemu/qemu_cgroup.h | 4 +++- src/qemu/qemu_process.c | 4 ++++ 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index e95ad17..62a8f81 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -627,9 +627,6 @@ qemuSetupCpusetCgroup(virDomainObjPtr vm, if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) return 0; - if (qemuSetupCpusetMems(vm, nodemask) < 0) - goto cleanup; - if (vm->def->cpumask || (vm->def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO)) { @@ -826,6 +823,13 @@ qemuSetupCgroup(virQEMUDriverPtr driver, } int +qemuSetupCgroupPostInit(virDomainObjPtr vm, + virBitmapPtr nodemask) +{ + return qemuSetupCpusetMems(vm, nodemask); +} + +int qemuSetupCgroupVcpuBW(virCgroupPtr cgroup, unsigned long long period, long long quota) diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h index 732860e..7394969 100644 --- a/src/qemu/qemu_cgroup.h +++ b/src/qemu/qemu_cgroup.h @@ -1,7 +1,7 @@ /* * qemu_cgroup.h: QEMU cgroup management * - * Copyright (C) 2006-2007, 2009-2013 Red Hat, Inc. + * Copyright (C) 2006-2007, 2009-2014 Red Hat, Inc. * Copyright (C) 2006 Daniel P. Berrange * * This library is free software; you can redistribute it and/or @@ -47,6 +47,8 @@ int qemuConnectCgroup(virQEMUDriverPtr driver, int qemuSetupCgroup(virQEMUDriverPtr driver, virDomainObjPtr vm, virBitmapPtr nodemask); +int qemuSetupCgroupPostInit(virDomainObjPtr vm, + virBitmapPtr nodemask); int qemuSetupCgroupVcpuBW(virCgroupPtr cgroup, unsigned long long period, long long quota); diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 4c57f15..8a6b384 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -4155,6 +4155,10 @@ int qemuProcessStart(virConnectPtr conn, if (!qemuProcessVerifyGuestCPU(driver, vm)) goto cleanup; + VIR_DEBUG("Setting up post-init cgroup restrictions"); + if (qemuSetupCgroupPostInit(vm, nodemask) < 0) + goto cleanup; + VIR_DEBUG("Detecting VCPU PIDs"); if (qemuProcessDetectVcpuPIDs(driver, vm) < 0) goto cleanup; -- 2.0.0

On 16.07.2014 16:42, Martin Kletzander wrote:
v3 of https://www.redhat.com/archives/libvir-list/2014-July/msg00372.html
v3: - Michal's suggestions worked in - rebased on current master
Martin Kletzander (16): qemu: purely a code movement qemu: remove useless error check conf: purely a code movement conf, schema: add 'id' field for cells numatune: create new module for numatune numatune: unify numatune struct and enum names numatune: Encapsulate numatune configuration in order to unify results conf, schema: add support for memnode elements numatune: add support for per-node memory bindings in private APIs qemu: allow qmp probing for cmdline options without params qemu: memory-backend-ram capability probing qemu: newer -numa parameter capability probing qemu: enable disjoint numa cpu ranges qemu: pass numa node binding preferences to qemu qemu: split out cpuset.mems setting qemu: leave restricting cpuset.mems after initialization
docs/formatdomain.html.in | 32 +- docs/schemas/domaincommon.rng | 22 + po/POTFILES.in | 1 + src/Makefile.am | 3 +- src/conf/cpu_conf.c | 41 +- src/conf/cpu_conf.h | 3 +- src/conf/domain_conf.c | 203 ++----- src/conf/domain_conf.h | 10 +- src/conf/numatune_conf.c | 595 +++++++++++++++++++++ src/conf/numatune_conf.h | 108 ++++ src/libvirt_private.syms | 24 +- src/lxc/lxc_cgroup.c | 20 +- src/lxc/lxc_controller.c | 5 +- src/lxc/lxc_native.c | 15 +- src/parallels/parallels_driver.c | 7 +- src/qemu/qemu_capabilities.c | 16 +- src/qemu/qemu_capabilities.h | 2 + src/qemu/qemu_cgroup.c | 52 +- src/qemu/qemu_cgroup.h | 4 +- src/qemu/qemu_command.c | 98 +++- src/qemu/qemu_driver.c | 84 ++- src/qemu/qemu_monitor.c | 6 +- src/qemu/qemu_monitor.h | 3 +- src/qemu/qemu_monitor_json.c | 8 +- src/qemu/qemu_monitor_json.h | 3 +- src/qemu/qemu_process.c | 12 +- src/util/virnuma.c | 61 +-- src/util/virnuma.h | 28 +- tests/qemucapabilitiesdata/caps_1.6.50-1.caps | 1 + tests/qemucapabilitiesdata/caps_1.6.50-1.replies | 5 + tests/qemumonitorjsontest.c | 20 +- .../qemuxml2argv-cpu-numa-disjoint.args | 6 + .../qemuxml2argv-cpu-numa-disjoint.xml | 28 + tests/qemuxml2argvdata/qemuxml2argv-cpu-numa1.xml | 6 +- tests/qemuxml2argvdata/qemuxml2argv-cpu-numa2.xml | 6 +- tests/qemuxml2argvdata/qemuxml2argv-cpu-numa3.xml | 25 + .../qemuxml2argv-numatune-auto-prefer.xml | 29 + .../qemuxml2argv-numatune-memnode-no-memory.args | 8 + .../qemuxml2argv-numatune-memnode-no-memory.xml | 30 ++ .../qemuxml2argv-numatune-memnode-nocpu.xml | 25 + .../qemuxml2argv-numatune-memnode.args | 11 + .../qemuxml2argv-numatune-memnode.xml | 33 ++ .../qemuxml2argv-numatune-memnodes-problematic.xml | 31 ++ tests/qemuxml2argvtest.c | 12 + .../qemuxml2xmlout-cpu-numa1.xml | 28 + .../qemuxml2xmlout-cpu-numa2.xml | 28 + .../qemuxml2xmlout-numatune-auto-prefer.xml | 29 + .../qemuxml2xmlout-numatune-memnode.xml | 33 ++ tests/qemuxml2xmltest.c | 8 + 49 files changed, 1479 insertions(+), 389 deletions(-) create mode 100644 src/conf/numatune_conf.c create mode 100644 src/conf/numatune_conf.h create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-cpu-numa-disjoint.args create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-cpu-numa-disjoint.xml create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-cpu-numa3.xml create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-numatune-auto-prefer.xml create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode-no-memory.args create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode-no-memory.xml create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode-nocpu.xml create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode.args create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode.xml create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-numatune-memnodes-problematic.xml create mode 100644 tests/qemuxml2xmloutdata/qemuxml2xmlout-cpu-numa1.xml create mode 100644 tests/qemuxml2xmloutdata/qemuxml2xmlout-cpu-numa2.xml create mode 100644 tests/qemuxml2xmloutdata/qemuxml2xmlout-numatune-auto-prefer.xml create mode 100644 tests/qemuxml2xmloutdata/qemuxml2xmlout-numatune-memnode.xml
-- 2.0.0
-- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
ACK series. Although, if anybody has objections to the XML schema, speak now! Michal

On Wed, Jul 16, 2014 at 07:48:45PM +0200, Michal Privoznik wrote:
On 16.07.2014 16:42, Martin Kletzander wrote:
v3 of https://www.redhat.com/archives/libvir-list/2014-July/msg00372.html
v3: - Michal's suggestions worked in - rebased on current master
[...] ACK series.
Although, if anybody has objections to the XML schema, speak now!
I pushed it and if anyone has an objection, there's still time till the release comes ;-) Martin

On 07/16/2014 12:21 PM, Martin Kletzander wrote:
On Wed, Jul 16, 2014 at 07:48:45PM +0200, Michal Privoznik wrote:
On 16.07.2014 16:42, Martin Kletzander wrote:
v3 of https://www.redhat.com/archives/libvir-list/2014-July/msg00372.html
v3: - Michal's suggestions worked in - rebased on current master
[...] ACK series.
Although, if anybody has objections to the XML schema, speak now!
I pushed it and if anyone has an objection, there's still time till the release comes ;-)
I just missed you; I pointed out good idea for a followup to the RNG schema in 8/16. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

On 07/16/2014 04:42 PM, Martin Kletzander wrote:
v3 of https://www.redhat.com/archives/libvir-list/2014-July/msg00372.html
v3: - Michal's suggestions worked in - rebased on current master
Martin Kletzander (16): qemu: purely a code movement qemu: remove useless error check conf: purely a code movement conf, schema: add 'id' field for cells numatune: create new module for numatune numatune: unify numatune struct and enum names numatune: Encapsulate numatune configuration in order to unify results conf, schema: add support for memnode elements numatune: add support for per-node memory bindings in private APIs qemu: allow qmp probing for cmdline options without params qemu: memory-backend-ram capability probing qemu: newer -numa parameter capability probing qemu: enable disjoint numa cpu ranges qemu: pass numa node binding preferences to qemu qemu: split out cpuset.mems setting qemu: leave restricting cpuset.mems after initialization
docs/formatdomain.html.in | 32 +- docs/schemas/domaincommon.rng | 22 + po/POTFILES.in | 1 + src/Makefile.am | 3 +- src/conf/cpu_conf.c | 41 +- src/conf/cpu_conf.h | 3 +- src/conf/domain_conf.c | 203 ++----- src/conf/domain_conf.h | 10 +- src/conf/numatune_conf.c | 595 +++++++++++++++++++++ src/conf/numatune_conf.h | 108 ++++ src/libvirt_private.syms | 24 +- src/lxc/lxc_cgroup.c | 20 +- src/lxc/lxc_controller.c | 5 +- src/lxc/lxc_native.c | 15 +- src/parallels/parallels_driver.c | 7 +- src/qemu/qemu_capabilities.c | 16 +- src/qemu/qemu_capabilities.h | 2 + src/qemu/qemu_cgroup.c | 52 +- src/qemu/qemu_cgroup.h | 4 +- src/qemu/qemu_command.c | 98 +++- src/qemu/qemu_driver.c | 84 ++- src/qemu/qemu_monitor.c | 6 +- src/qemu/qemu_monitor.h | 3 +- src/qemu/qemu_monitor_json.c | 8 +- src/qemu/qemu_monitor_json.h | 3 +- src/qemu/qemu_process.c | 12 +- src/util/virnuma.c | 61 +-- src/util/virnuma.h | 28 +- tests/qemucapabilitiesdata/caps_1.6.50-1.caps | 1 + tests/qemucapabilitiesdata/caps_1.6.50-1.replies | 5 + tests/qemumonitorjsontest.c | 20 +- .../qemuxml2argv-cpu-numa-disjoint.args | 6 + .../qemuxml2argv-cpu-numa-disjoint.xml | 28 + tests/qemuxml2argvdata/qemuxml2argv-cpu-numa1.xml | 6 +- tests/qemuxml2argvdata/qemuxml2argv-cpu-numa2.xml | 6 +- tests/qemuxml2argvdata/qemuxml2argv-cpu-numa3.xml | 25 + .../qemuxml2argv-numatune-auto-prefer.xml | 29 + .../qemuxml2argv-numatune-memnode-no-memory.args | 8 + .../qemuxml2argv-numatune-memnode-no-memory.xml | 30 ++ .../qemuxml2argv-numatune-memnode-nocpu.xml | 25 + .../qemuxml2argv-numatune-memnode.args | 11 + .../qemuxml2argv-numatune-memnode.xml | 33 ++ .../qemuxml2argv-numatune-memnodes-problematic.xml | 31 ++ tests/qemuxml2argvtest.c | 12 + .../qemuxml2xmlout-cpu-numa1.xml | 28 + .../qemuxml2xmlout-cpu-numa2.xml | 28 + .../qemuxml2xmlout-numatune-auto-prefer.xml | 29 + .../qemuxml2xmlout-numatune-memnode.xml | 33 ++ tests/qemuxml2xmltest.c | 8 + 49 files changed, 1479 insertions(+), 389 deletions(-) create mode 100644 src/conf/numatune_conf.c create mode 100644 src/conf/numatune_conf.h create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-cpu-numa-disjoint.args create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-cpu-numa-disjoint.xml create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-cpu-numa3.xml create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-numatune-auto-prefer.xml create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode-no-memory.args create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode-no-memory.xml create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode-nocpu.xml create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode.args create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode.xml create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-numatune-memnodes-problematic.xml create mode 100644 tests/qemuxml2xmloutdata/qemuxml2xmlout-cpu-numa1.xml create mode 100644 tests/qemuxml2xmloutdata/qemuxml2xmlout-cpu-numa2.xml create mode 100644 tests/qemuxml2xmloutdata/qemuxml2xmlout-numatune-auto-prefer.xml create mode 100644 tests/qemuxml2xmloutdata/qemuxml2xmlout-numatune-memnode.xml
-- 2.0.0
This series broke the build for me (clang 3.4.1): In file included from util/virclosecallbacks.c:28: In file included from ../src/util/virclosecallbacks.h:28: ../src/conf/domain_conf.h:70:35: error: redefinition of typedef 'virDomainNumatune' is a C11 feature [-Werror,-Wtypedef-redefinition] typedef struct _virDomainNumatune virDomainNumatune; ^ ../src/conf/numatune_conf.h:43:35: note: previous definition is here typedef struct _virDomainNumatune virDomainNumatune; ^ In file included from util/virclosecallbacks.c:28: In file included from ../src/util/virclosecallbacks.h:28: ../src/conf/domain_conf.h:71:28: error: redefinition of typedef 'virDomainNumatunePtr' is a C11 feature [-Werror,-Wtypedef-redefinition] typedef virDomainNumatune *virDomainNumatunePtr; ^ ../src/conf/numatune_conf.h:44:28: note: previous definition is here typedef virDomainNumatune *virDomainNumatunePtr; ^ In file included from util/virclosecallbacks.c:28: In file included from ../src/util/virclosecallbacks.h:28: ../src/conf/domain_conf.h:1857:30: error: redefinition of typedef 'virDomainDef' is a C11 feature [-Werror,-Wtypedef-redefinition] typedef struct _virDomainDef virDomainDef; ^ ../src/conf/numatune_conf.h:39:30: note: previous definition is here typedef struct _virDomainDef virDomainDef; ^ In file included from util/virclosecallbacks.c:28: In file included from ../src/util/virclosecallbacks.h:28: ../src/conf/domain_conf.h:1858:23: error: redefinition of typedef 'virDomainDefPtr' is a C11 feature [-Werror,-Wtypedef-redefinition] typedef virDomainDef *virDomainDefPtr; ^ ../src/conf/numatune_conf.h:40:23: note: previous definition is here typedef virDomainDef *virDomainDefPtr; ^ 4 errors generated. Jan

On 07/17/2014 01:37 AM, Ján Tomko wrote:
On 07/16/2014 04:42 PM, Martin Kletzander wrote:
v3 of https://www.redhat.com/archives/libvir-list/2014-July/msg00372.html
This series broke the build for me (clang 3.4.1): In file included from util/virclosecallbacks.c:28: In file included from ../src/util/virclosecallbacks.h:28: ../src/conf/domain_conf.h:70:35: error: redefinition of typedef 'virDomainNumatune' is a C11 feature [-Werror,-Wtypedef-redefinition] typedef struct _virDomainNumatune virDomainNumatune; ^ ../src/conf/numatune_conf.h:43:35: note: previous definition is here typedef struct _virDomainNumatune virDomainNumatune;
Roman hit the same; see the sub-thread at 7/16. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org
participants (5)
-
Eric Blake
-
Ján Tomko
-
Martin Kletzander
-
Michal Privoznik
-
Roman Bogorodskiy