[libvirt] [PATCH 0/2] Don't choke on valid NUMA nodesets on daemon restart

Details in the commit message for 2/2 and the bug report referenced therein. Andrea Bolognani (2): tests: Create full host NUMA topology by default qemu: Figure out nodeset bitmap size correctly src/qemu/qemu_domain.c | 11 +++++- tests/qemustatusxml2xmldata/modern-in.xml | 2 +- tests/testutilsqemu.c | 57 +++++++++++++++++++++++++++- tests/vircapstest.c | 62 +------------------------------ 4 files changed, 68 insertions(+), 64 deletions(-) -- 2.14.3

vircapstest has code to add a full host NUMA topology, that is, one that includes all information about nodes and CPUs including IDs; the function used to create a mock virCapsPtr by most of the test suite, however, just fakes it by setting nnumaCell_max to some number. While the latter approach has served us well so far, we're going to need all the information to be filled in soon. In order to do that, we can just move the existing code from vircapstest to testutilsqemu and, with some renaming and trivial tweaking, use it as-is. Interestingly, the NUMA topology generated by the function is rigged up so that the NUMA nodes aren't (necessarily) numbered starting from 0, which is a nice way to spot mistaken assumptions in our codebase. Signed-off-by: Andrea Bolognani <abologna@redhat.com> --- tests/testutilsqemu.c | 57 +++++++++++++++++++++++++++++++++++++++++++++- tests/vircapstest.c | 62 +-------------------------------------------------- 2 files changed, 57 insertions(+), 62 deletions(-) diff --git a/tests/testutilsqemu.c b/tests/testutilsqemu.c index f8182033fc..ec32c4e106 100644 --- a/tests/testutilsqemu.c +++ b/tests/testutilsqemu.c @@ -375,6 +375,56 @@ static int testQemuAddAARCH64Guest(virCapsPtr caps) return -1; } +#define MAX_CELLS 4 +#define MAX_CPUS_IN_CELL 2 +#define MAX_MEM_IN_CELL 2097152 + +/* + * Build NUMA topology with cell id starting from (0 + seq) + * for testing + */ +static int +testQemuBuildNUMATopology(virCapsPtr caps, + int seq) +{ + virCapsHostNUMACellCPUPtr cell_cpus = NULL; + int core_id, cell_id; + int id; + + id = 0; + for (cell_id = 0; cell_id < MAX_CELLS; cell_id++) { + if (VIR_ALLOC_N(cell_cpus, MAX_CPUS_IN_CELL) < 0) + goto error; + + for (core_id = 0; core_id < MAX_CPUS_IN_CELL; core_id++) { + cell_cpus[core_id].id = id + core_id; + cell_cpus[core_id].socket_id = cell_id + seq; + cell_cpus[core_id].core_id = id + core_id; + if (!(cell_cpus[core_id].siblings = + virBitmapNew(MAX_CPUS_IN_CELL))) + goto error; + ignore_value(virBitmapSetBit(cell_cpus[core_id].siblings, id)); + } + id++; + + if (virCapabilitiesAddHostNUMACell(caps, cell_id + seq, + MAX_MEM_IN_CELL, + MAX_CPUS_IN_CELL, cell_cpus, + VIR_ARCH_NONE, NULL, + VIR_ARCH_NONE, NULL) < 0) + goto error; + + cell_cpus = NULL; + } + + return 0; + + error: + virCapabilitiesClearHostNUMACellCPUTopology(cell_cpus, MAX_CPUS_IN_CELL); + VIR_FREE(cell_cpus); + return -1; +} + virCapsPtr testQemuCapsInit(void) { virCapsPtr caps; @@ -400,7 +450,12 @@ virCapsPtr testQemuCapsInit(void) qemuTestSetHostCPU(caps, NULL); - caps->host.nnumaCell_max = 4; + /* + * Build a NUMA topology with cell_id (NUMA node id + * being 3(0 + 3),4(1 + 3), 5 and 6 + */ + if (testQemuBuildNUMATopology(caps, 3) < 0) + goto cleanup; if (testQemuAddI686Guest(caps) < 0) goto cleanup; diff --git a/tests/vircapstest.c b/tests/vircapstest.c index 664b7da143..e9d5e12158 100644 --- a/tests/vircapstest.c +++ b/tests/vircapstest.c @@ -29,62 +29,6 @@ #define VIR_FROM_THIS VIR_FROM_NONE -#define MAX_CELLS 4 -#define MAX_CPUS_IN_CELL 2 -#define MAX_MEM_IN_CELL 2097152 - - -/* - * Build NUMA Toplogy with cell id starting from (0 + seq) - * for testing - */ -static virCapsPtr -buildNUMATopology(int seq) -{ - virCapsPtr caps; - virCapsHostNUMACellCPUPtr cell_cpus = NULL; - int core_id, cell_id; - int id; - - if ((caps = virCapabilitiesNew(VIR_ARCH_X86_64, false, false)) == NULL) - goto error; - - id = 0; - for (cell_id = 0; cell_id < MAX_CELLS; cell_id++) { - if (VIR_ALLOC_N(cell_cpus, MAX_CPUS_IN_CELL) < 0) - goto error; - - for (core_id = 0; core_id < MAX_CPUS_IN_CELL; core_id++) { - cell_cpus[core_id].id = id + core_id; - cell_cpus[core_id].socket_id = cell_id + seq; - cell_cpus[core_id].core_id = id + core_id; - if (!(cell_cpus[core_id].siblings = - virBitmapNew(MAX_CPUS_IN_CELL))) - goto error; - ignore_value(virBitmapSetBit(cell_cpus[core_id].siblings, id)); - } - id++; - - if (virCapabilitiesAddHostNUMACell(caps, cell_id + seq, - MAX_MEM_IN_CELL, - MAX_CPUS_IN_CELL, cell_cpus, - VIR_ARCH_NONE, NULL, - VIR_ARCH_NONE, NULL) < 0) - goto error; - - cell_cpus = NULL; - } - - return caps; - - error: - virCapabilitiesClearHostNUMACellCPUTopology(cell_cpus, MAX_CPUS_IN_CELL); - VIR_FREE(cell_cpus); - virObjectUnref(caps); - return NULL; - -} - static int test_virCapabilitiesGetCpusForNodemask(const void *data ATTRIBUTE_UNUSED) @@ -96,11 +40,7 @@ test_virCapabilitiesGetCpusForNodemask(const void *data ATTRIBUTE_UNUSED) int mask_size = 8; int ret = -1; - /* - * Build a NUMA topology with cell_id (NUMA node id - * being 3(0 + 3),4(1 + 3), 5 and 6 - */ - if (!(caps = buildNUMATopology(3))) + if (!(caps = testQemuCapsInit())) goto error; if (virBitmapParse(nodestr, &nodemask, mask_size) < 0) -- 2.14.3

On Thu, Apr 12, 2018 at 08:47:57 +0200, Andrea Bolognani wrote:
vircapstest has code to add a full host NUMA topology, that is, one that includes all information about nodes and CPUs including IDs; the function used to create a mock virCapsPtr by most of the test suite, however, just fakes it by setting nnumaCell_max to some number.
While the latter approach has served us well so far, we're going to need all the information to be filled in soon. In order to do that, we can just move the existing code from vircapstest to testutilsqemu and, with some renaming and trivial tweaking, use it as-is.
Interestingly, the NUMA topology generated by the function is rigged up so that the NUMA nodes aren't (necessarily) numbered starting from 0, which is a nice way to spot mistaken assumptions in our codebase.
Signed-off-by: Andrea Bolognani <abologna@redhat.com> --- tests/testutilsqemu.c | 57 +++++++++++++++++++++++++++++++++++++++++++++- tests/vircapstest.c | 62 +-------------------------------------------------- 2 files changed, 57 insertions(+), 62 deletions(-)
diff --git a/tests/testutilsqemu.c b/tests/testutilsqemu.c index f8182033fc..ec32c4e106 100644 --- a/tests/testutilsqemu.c +++ b/tests/testutilsqemu.c @@ -375,6 +375,56 @@ static int testQemuAddAARCH64Guest(virCapsPtr caps) return -1; }
+#define MAX_CELLS 4 +#define MAX_CPUS_IN_CELL 2 +#define MAX_MEM_IN_CELL 2097152
Syntax-check is not happy about indentation of these. You probably missed that this whole function is wrapped inside #ifdef WITH_QEMU.
+ +/* + * Build NUMA topology with cell id starting from (0 + seq) + * for testing + */ +static int +testQemuBuildNUMATopology(virCapsPtr caps, + int seq)
Moving of the code looks sane. [...]
diff --git a/tests/vircapstest.c b/tests/vircapstest.c index 664b7da143..e9d5e12158 100644 --- a/tests/vircapstest.c +++ b/tests/vircapstest.c
[...]
@@ -96,11 +40,7 @@ test_virCapabilitiesGetCpusForNodemask(const void *data ATTRIBUTE_UNUSED) int mask_size = 8; int ret = -1;
- /* - * Build a NUMA topology with cell_id (NUMA node id - * being 3(0 + 3),4(1 + 3), 5 and 6 - */ - if (!(caps = buildNUMATopology(3))) + if (!(caps = testQemuCapsInit()))
Since this function is now declared within #ifdef WITH_QEMU but test_virCapabilitiesGetCpusForNodemask isn't you'll get a build failure with --without-qemu Since there are multiple solutions, v2 will be required.

The current private XML parsing code relies on the assumption that NUMA node IDs start from 0 and are densely allocated, neither of which is necessarily the case. Change it so that the bitmap size is dynamically calculated by looking at NUMA node IDs instead, which ensures all nodes will be able to fit and thus the bitmap will be parsed successfully. Update one of the test cases so that it would fail with the previous approach, but passes with the new one. Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1490158 Signed-off-by: Andrea Bolognani <abologna@redhat.com> --- src/qemu/qemu_domain.c | 11 ++++++++++- tests/qemustatusxml2xmldata/modern-in.xml | 2 +- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 100304fd05..b126c69490 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -2248,6 +2248,8 @@ qemuDomainObjPrivateXMLParseAutomaticPlacement(xmlXPathContextPtr ctxt, virCapsPtr caps = NULL; char *nodeset; char *cpuset; + int nodesetSize = 0; + int i; int ret = -1; nodeset = virXPathString("string(./numad/@nodeset)", ctxt); @@ -2259,8 +2261,15 @@ qemuDomainObjPrivateXMLParseAutomaticPlacement(xmlXPathContextPtr ctxt, if (!(caps = virQEMUDriverGetCapabilities(driver, false))) goto cleanup; + /* Figure out how big the nodeset bitmap needs to be. + * This is necessary because NUMA node IDs are not guaranteed to + * start from 0 or be densely allocated */ + for (i = 0; i < caps->host.nnumaCell; i++) { + nodesetSize = MAX(nodesetSize, caps->host.numaCell[i]->num + 1); + } + if (nodeset && - virBitmapParse(nodeset, &priv->autoNodeset, caps->host.nnumaCell_max) < 0) + virBitmapParse(nodeset, &priv->autoNodeset, nodesetSize) < 0) goto cleanup; if (cpuset) { diff --git a/tests/qemustatusxml2xmldata/modern-in.xml b/tests/qemustatusxml2xmldata/modern-in.xml index 2e166e6e67..c1e57618b6 100644 --- a/tests/qemustatusxml2xmldata/modern-in.xml +++ b/tests/qemustatusxml2xmldata/modern-in.xml @@ -252,7 +252,7 @@ <device alias='usb'/> <device alias='ide0-0-0'/> </devices> - <numad nodeset='0' cpuset='0-7'/> + <numad nodeset='6' cpuset='0-7'/> <libDir path='/var/lib/libvirt/qemu/domain-1-upstream'/> <channelTargetDir path='/var/lib/libvirt/qemu/channel/target/domain-1-upstream'/> <chardevStdioLogd/> -- 2.14.3

On Thu, Apr 12, 2018 at 08:47:58 +0200, Andrea Bolognani wrote:
The current private XML parsing code relies on the assumption that NUMA node IDs start from 0 and are densely allocated, neither of which is necessarily the case.
Change it so that the bitmap size is dynamically calculated by looking at NUMA node IDs instead, which ensures all nodes will be able to fit and thus the bitmap will be parsed successfully.
Update one of the test cases so that it would fail with the previous approach, but passes with the new one.
Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1490158
While the patch below will fix this case, I'd also like to see that the parsing of the bitmaps is made non-fatal. If the nodesets are missing some of the reported data will be wrong, but not having this is certainly not a deal-breaker so that we should not reconnect to qemu.
Signed-off-by: Andrea Bolognani <abologna@redhat.com> --- src/qemu/qemu_domain.c | 11 ++++++++++- tests/qemustatusxml2xmldata/modern-in.xml | 2 +- 2 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 100304fd05..b126c69490 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -2248,6 +2248,8 @@ qemuDomainObjPrivateXMLParseAutomaticPlacement(xmlXPathContextPtr ctxt, virCapsPtr caps = NULL; char *nodeset; char *cpuset; + int nodesetSize = 0; + int i; int ret = -1;
nodeset = virXPathString("string(./numad/@nodeset)", ctxt); @@ -2259,8 +2261,15 @@ qemuDomainObjPrivateXMLParseAutomaticPlacement(xmlXPathContextPtr ctxt, if (!(caps = virQEMUDriverGetCapabilities(driver, false))) goto cleanup;
+ /* Figure out how big the nodeset bitmap needs to be. + * This is necessary because NUMA node IDs are not guaranteed to + * start from 0 or be densely allocated */ + for (i = 0; i < caps->host.nnumaCell; i++) { + nodesetSize = MAX(nodesetSize, caps->host.numaCell[i]->num + 1); + }
Syntax-check is moaning about this.
+ if (nodeset && - virBitmapParse(nodeset, &priv->autoNodeset, caps->host.nnumaCell_max) < 0) + virBitmapParse(nodeset, &priv->autoNodeset, nodesetSize) < 0) goto cleanup;
if (cpuset) {
ACK if you actually run syntax-check.

On Thu, 2018-04-19 at 14:11 +0200, Peter Krempa wrote:
On Thu, Apr 12, 2018 at 08:47:58 +0200, Andrea Bolognani wrote:
The current private XML parsing code relies on the assumption that NUMA node IDs start from 0 and are densely allocated, neither of which is necessarily the case.
Change it so that the bitmap size is dynamically calculated by looking at NUMA node IDs instead, which ensures all nodes will be able to fit and thus the bitmap will be parsed successfully.
Update one of the test cases so that it would fail with the previous approach, but passes with the new one.
Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1490158
While the patch below will fix this case, I'd also like to see that the parsing of the bitmaps is made non-fatal. If the nodesets are missing some of the reported data will be wrong, but not having this is certainly not a deal-breaker so that we should not reconnect to qemu.
Mh, that's trickier than I initially though, because virBitmapParseSeparator() calls virReportError() itself on parse failure, and changing doesn't sound feasible.
ACK if you actually run syntax-check.
Shame on me for not doing so the first time around :( -- Andrea Bolognani / Red Hat / Virtualization

On Thu, Apr 19, 2018 at 15:45:52 +0200, Andrea Bolognani wrote:
On Thu, 2018-04-19 at 14:11 +0200, Peter Krempa wrote:
On Thu, Apr 12, 2018 at 08:47:58 +0200, Andrea Bolognani wrote:
The current private XML parsing code relies on the assumption that NUMA node IDs start from 0 and are densely allocated, neither of which is necessarily the case.
Change it so that the bitmap size is dynamically calculated by looking at NUMA node IDs instead, which ensures all nodes will be able to fit and thus the bitmap will be parsed successfully.
Update one of the test cases so that it would fail with the previous approach, but passes with the new one.
Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1490158
While the patch below will fix this case, I'd also like to see that the parsing of the bitmaps is made non-fatal. If the nodesets are missing some of the reported data will be wrong, but not having this is certainly not a deal-breaker so that we should not reconnect to qemu.
Mh, that's trickier than I initially though, because virBitmapParseSeparator() calls virReportError() itself on parse failure, and changing doesn't sound feasible.
I think logging of the error is okay, we just probably should reset it. At any rate this patch can be pushed as-is ... well after it passes build checks ;)

On Thu, 2018-04-19 at 16:15 +0200, Peter Krempa wrote:
On Thu, Apr 19, 2018 at 15:45:52 +0200, Andrea Bolognani wrote:
Mh, that's trickier than I initially though, because virBitmapParseSeparator() calls virReportError() itself on parse failure, and changing doesn't sound feasible.
I think logging of the error is okay, we just probably should reset it.
At any rate this patch can be pushed as-is ... well after it passes build checks ;)
Are you referring to the syntax-check failure? I posted a v2 that fixes that, but also changes 1/2, without which 2/2 can't work. -- Andrea Bolognani / Red Hat / Virtualization

On Thu, Apr 19, 2018 at 16:37:23 +0200, Andrea Bolognani wrote:
On Thu, 2018-04-19 at 16:15 +0200, Peter Krempa wrote:
On Thu, Apr 19, 2018 at 15:45:52 +0200, Andrea Bolognani wrote:
Mh, that's trickier than I initially though, because virBitmapParseSeparator() calls virReportError() itself on parse failure, and changing doesn't sound feasible.
I think logging of the error is okay, we just probably should reset it.
At any rate this patch can be pushed as-is ... well after it passes build checks ;)
Are you referring to the syntax-check failure? I posted a v2 that fixes that, but also changes 1/2, without which 2/2 can't work.
No, I was referring to making that whole function pass even when the numad hints can't be parsed. I'll do a review of v2 in a bit.
participants (2)
-
Andrea Bolognani
-
Peter Krempa