[libvirt] [PATCH 0/4] qemu: Fix qemuProcessInitCpuAffinity()

See detailed explanation in the commit message for patch 1/4 and the corresponding bug report. Andrea Bolognani (4): util: Introduce virBitmapUnion() fixup? util: Optimize virBitmapUnion() util: Introduce virNumaNodesetToCPUset() qemu: Fix qemuProcessInitCpuAffinity() src/libvirt_private.syms | 2 ++ src/qemu/qemu_process.c | 7 ++++- src/util/virbitmap.c | 32 +++++++++++++++++++++++ src/util/virbitmap.h | 4 +++ src/util/virnuma.c | 55 ++++++++++++++++++++++++++++++++++++++++ src/util/virnuma.h | 2 ++ tests/virbitmaptest.c | 37 +++++++++++++++++++++++++++ 7 files changed, 138 insertions(+), 1 deletion(-) -- 2.21.0

Signed-off-by: Andrea Bolognani <abologna@redhat.com> --- src/libvirt_private.syms | 1 + src/util/virbitmap.c | 26 ++++++++++++++++++++++++++ src/util/virbitmap.h | 4 ++++ tests/virbitmaptest.c | 37 +++++++++++++++++++++++++++++++++++++ 4 files changed, 68 insertions(+) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 909975750c..678d0348a2 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1517,6 +1517,7 @@ virBitmapSubtract; virBitmapToData; virBitmapToDataBuf; virBitmapToString; +virBitmapUnion; # util/virbuffer.h diff --git a/src/util/virbitmap.c b/src/util/virbitmap.c index 91df4b8601..1f0db563ab 100644 --- a/src/util/virbitmap.c +++ b/src/util/virbitmap.c @@ -1260,6 +1260,32 @@ virBitmapIntersect(virBitmapPtr a, } +/** + * virBitmapUnion: + * @a: bitmap, modified to contain result + * @b: other bitmap + * + * Performs union of two bitmaps: a = union(a, b) + * + * Returns 0 on success, <0 on failure. + */ +int +virBitmapUnion(virBitmapPtr a, + virBitmapPtr b) +{ + size_t i; + + for (i = 0; i < b->nbits; i++) { + if (virBitmapIsBitSet(b, i)) { + if (virBitmapSetBitExpand(a, i) < 0) + return -1; + } + } + + return 0; +} + + /** * virBitmapSubtract: * @a: minuend/result diff --git a/src/util/virbitmap.h b/src/util/virbitmap.h index 38dc06412a..49d9910fa7 100644 --- a/src/util/virbitmap.h +++ b/src/util/virbitmap.h @@ -149,6 +149,10 @@ bool virBitmapOverlaps(virBitmapPtr b1, void virBitmapIntersect(virBitmapPtr a, virBitmapPtr b) ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2); +int virBitmapUnion(virBitmapPtr a, + virBitmapPtr b) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2); + void virBitmapSubtract(virBitmapPtr a, virBitmapPtr b) ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2); diff --git a/tests/virbitmaptest.c b/tests/virbitmaptest.c index 2fbafc0a76..f9d3cfd995 100644 --- a/tests/virbitmaptest.c +++ b/tests/virbitmaptest.c @@ -740,6 +740,34 @@ test14(const void *opaque) return ret; } +/* virBitmapUnion() */ +static int +test15(const void *opaque) +{ + const struct testBinaryOpData *data = opaque; + VIR_AUTOPTR(virBitmap) amap = NULL; + VIR_AUTOPTR(virBitmap) bmap = NULL; + VIR_AUTOPTR(virBitmap) resmap = NULL; + + if (!(amap = virBitmapParseUnlimited(data->a)) || + !(bmap = virBitmapParseUnlimited(data->b)) || + !(resmap = virBitmapParseUnlimited(data->res))) { + return -1; + } + + if (virBitmapUnion(amap, bmap) < 0) + return -1; + + if (!virBitmapEqual(amap, resmap)) { + fprintf(stderr, + "\n bitmap union failed: union('%s', '%s') != '%s'\n", + data->a, data->b, data->res); + return -1; + } + + return 0; +} + #define TESTBINARYOP(A, B, RES, FUNC) \ testBinaryOpData.a = A; \ @@ -798,6 +826,15 @@ mymain(void) TESTBINARYOP("0-3", "0,^0", "0-3", test14); TESTBINARYOP("0,2", "1,3", "0,2", test14); + virTestCounterReset("test15-"); + TESTBINARYOP("0-1", "0-1", "0-1", test15); + TESTBINARYOP("0", "1", "0-1", test15); + TESTBINARYOP("0-1", "2-3", "0-3", test15); + TESTBINARYOP("0-3", "1-2", "0-3", test15); + TESTBINARYOP("0,^0", "12345", "12345", test15); + TESTBINARYOP("12345", "0,^0", "12345", test15); + TESTBINARYOP("0,^0", "0,^0", "0,^0", test15); + return ret; } -- 2.21.0

On Fri, May 31, 2019 at 05:21:59PM +0200, Andrea Bolognani wrote:
Signed-off-by: Andrea Bolognani <abologna@redhat.com> --- src/libvirt_private.syms | 1 + src/util/virbitmap.c | 26 ++++++++++++++++++++++++++ src/util/virbitmap.h | 4 ++++ tests/virbitmaptest.c | 37 +++++++++++++++++++++++++++++++++++++ 4 files changed, 68 insertions(+)
Reviewed-by: Ján Tomko <jtomko@redhat.com> Jano

The original implementation is extremely straightforward but not too efficient, because it uses the public API instead of poking the innards directly. This second implementation does the latter, and as a consequence can afford to make @b const, which is nice even though most existing virBitmap APIs use non-const pointers even for read-only bitmaps. That said, I'm not entirely sure I got it quite right, so a review from someone who's well versed in the virBitmap internals (*cough* Peter *cough*) would be appreciated, and if my implementation passes muster or can be amended through review comments I'll gladly squash this commit into the previous one. Signed-off-by: Andrea Bolognani <abologna@redhat.com> --- src/util/virbitmap.c | 18 ++++++++++++------ src/util/virbitmap.h | 2 +- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/util/virbitmap.c b/src/util/virbitmap.c index 1f0db563ab..f2127c053d 100644 --- a/src/util/virbitmap.c +++ b/src/util/virbitmap.c @@ -1271,17 +1271,23 @@ virBitmapIntersect(virBitmapPtr a, */ int virBitmapUnion(virBitmapPtr a, - virBitmapPtr b) + const virBitmap *b) { size_t i; + size_t max; - for (i = 0; i < b->nbits; i++) { - if (virBitmapIsBitSet(b, i)) { - if (virBitmapSetBitExpand(a, i) < 0) - return -1; - } + if (a->nbits < b->nbits && + virBitmapExpand(a, b->nbits) < 0) { + return -1; } + max = a->map_len; + if (max > b->map_len) + max = b->map_len; + + for (i = 0; i < max; i++) + a->map[i] |= b->map[i]; + return 0; } diff --git a/src/util/virbitmap.h b/src/util/virbitmap.h index 49d9910fa7..8696214da8 100644 --- a/src/util/virbitmap.h +++ b/src/util/virbitmap.h @@ -150,7 +150,7 @@ void virBitmapIntersect(virBitmapPtr a, virBitmapPtr b) ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2); int virBitmapUnion(virBitmapPtr a, - virBitmapPtr b) + const virBitmap *b) ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2); void virBitmapSubtract(virBitmapPtr a, virBitmapPtr b) -- 2.21.0

On Fri, May 31, 2019 at 05:22:00PM +0200, Andrea Bolognani wrote:
The original implementation is extremely straightforward but not too efficient, because it uses the public API instead of poking the innards directly. This second implementation does the latter, and as a consequence can afford to make @b const, which is nice even though most existing virBitmap APIs use non-const pointers even for read-only bitmaps.
That said, I'm not entirely sure I got it quite right, so a review from someone who's well versed in the virBitmap internals (*cough* Peter *cough*) would be appreciated, and if my implementation passes muster or can be amended through review comments I'll gladly squash this commit into the previous one.
Yes please. Also drop this paragraph. History is written by the winners ;)
Signed-off-by: Andrea Bolognani <abologna@redhat.com> --- src/util/virbitmap.c | 18 ++++++++++++------ src/util/virbitmap.h | 2 +- 2 files changed, 13 insertions(+), 7 deletions(-)
diff --git a/src/util/virbitmap.c b/src/util/virbitmap.c index 1f0db563ab..f2127c053d 100644 --- a/src/util/virbitmap.c +++ b/src/util/virbitmap.c @@ -1271,17 +1271,23 @@ virBitmapIntersect(virBitmapPtr a, */ int virBitmapUnion(virBitmapPtr a, - virBitmapPtr b) + const virBitmap *b) { size_t i; + size_t max;
- for (i = 0; i < b->nbits; i++) { - if (virBitmapIsBitSet(b, i)) { - if (virBitmapSetBitExpand(a, i) < 0) - return -1; - } + if (a->nbits < b->nbits && + virBitmapExpand(a, b->nbits) < 0) {
After this, 'b' can hold b->nbits and 'a' can hold b->nbits+1. if (b->nbits && a->nbits < b->nbits && virBitmapExpand(a, b->nbits -1) < 0) {
+ return -1; }
+ max = a->map_len; + if (max > b->map_len) + max = b->map_len;
You expanded the 'a' bitmap just a few lines above. So if you can safely go up to b->map_len without accessing invalid memory in a or missing any set bits in b.
+ + for (i = 0; i < max; i++) + a->map[i] |= b->map[i]; + return 0; }
Reviewed-by: Ján Tomko <jtomko@redhat.com> Jano

On Mon, 2019-06-03 at 14:10 +0200, Ján Tomko wrote:
On Fri, May 31, 2019 at 05:22:00PM +0200, Andrea Bolognani wrote:
+ if (a->nbits < b->nbits && + virBitmapExpand(a, b->nbits) < 0) {
After this, 'b' can hold b->nbits and 'a' can hold b->nbits+1.
if (b->nbits && a->nbits < b->nbits && virBitmapExpand(a, b->nbits -1) < 0) {
Yeah, you're right, we need to account for the zero-indexing of bits. I mean, it's not like the resulting bitmap would be incorrect either way, but we might end up allocating more memory than it's actually required. The first check seems unnecessary, though: the only case in which the argument to virBitmapExpand() would be incorrect is b->nbits == 0, but we know that both a->nbits and b->nbits are >= 0 and we also just verified that a->nbits < b->nbits, so b->nbits must be >= 1 and the argument to virBitmapExpand() will always be correct. Or am I missing something?
+ max = a->map_len; + if (max > b->map_len) + max = b->map_len;
You expanded the 'a' bitmap just a few lines above. So if you can safely go up to b->map_len without accessing invalid memory in a or missing any set bits in b.
I thought the above would be necessary to cover the case where a is bigger than b, but I realize now that iterating over the smaller b will deal with such a situation just fine. So nice catch, and consider it fixed :) -- Andrea Bolognani / Red Hat / Virtualization

On Mon, Jun 03, 2019 at 04:31:51PM +0200, Andrea Bolognani wrote:
On Mon, 2019-06-03 at 14:10 +0200, Ján Tomko wrote:
On Fri, May 31, 2019 at 05:22:00PM +0200, Andrea Bolognani wrote:
+ if (a->nbits < b->nbits && + virBitmapExpand(a, b->nbits) < 0) {
After this, 'b' can hold b->nbits and 'a' can hold b->nbits+1.
if (b->nbits && a->nbits < b->nbits && virBitmapExpand(a, b->nbits -1) < 0) {
Yeah, you're right, we need to account for the zero-indexing of bits. I mean, it's not like the resulting bitmap would be incorrect either way, but we might end up allocating more memory than it's actually required.
Actually I do think it would be incorrect. The bitmap size is as important as its values - e.g. if the resulting bitmap had 4 set bits, a size of 4 vs 5 would alter the result of virBitmapIsAllSet. It would only be a marginal over-allocation if we were dealing with map_len.
The first check seems unnecessary, though: the only case in which the argument to virBitmapExpand() would be incorrect is b->nbits == 0, but we know that both a->nbits and b->nbits are >= 0 and we also just verified that a->nbits < b->nbits, so b->nbits must be >= 1 and the argument to virBitmapExpand() will always be correct. Or am I missing something?
Right, it will work unless someone changes nbits from size_t to double. Jano

On Mon, 2019-06-03 at 16:43 +0200, Ján Tomko wrote:
On Mon, Jun 03, 2019 at 04:31:51PM +0200, Andrea Bolognani wrote:
On Mon, 2019-06-03 at 14:10 +0200, Ján Tomko wrote:
if (b->nbits && a->nbits < b->nbits && virBitmapExpand(a, b->nbits -1) < 0) {
Yeah, you're right, we need to account for the zero-indexing of bits. I mean, it's not like the resulting bitmap would be incorrect either way, but we might end up allocating more memory than it's actually required.
Actually I do think it would be incorrect. The bitmap size is as important as its values - e.g. if the resulting bitmap had 4 set bits, a size of 4 vs 5 would alter the result of virBitmapIsAllSet.
Oh, right, that's something that I hadn't considered. This is pretty much exactly why I didn't trust my implementation to be correct O:-)
The first check seems unnecessary, though: the only case in which the argument to virBitmapExpand() would be incorrect is b->nbits == 0, but we know that both a->nbits and b->nbits are >= 0 and we also just verified that a->nbits < b->nbits, so b->nbits must be >= 1 and the argument to virBitmapExpand() will always be correct. Or am I missing something?
Right, it will work unless someone changes nbits from size_t to double.
So ACK to drop the first check while adopting your version of that hunk? -- Andrea Bolognani / Red Hat / Virtualization

On Mon, Jun 03, 2019 at 05:04:51PM +0200, Andrea Bolognani wrote:
On Mon, 2019-06-03 at 16:43 +0200, Ján Tomko wrote:
On Mon, Jun 03, 2019 at 04:31:51PM +0200, Andrea Bolognani wrote:
On Mon, 2019-06-03 at 14:10 +0200, Ján Tomko wrote:
if (b->nbits && a->nbits < b->nbits && virBitmapExpand(a, b->nbits -1) < 0) {
Yeah, you're right, we need to account for the zero-indexing of bits. I mean, it's not like the resulting bitmap would be incorrect either way, but we might end up allocating more memory than it's actually required.
Actually I do think it would be incorrect. The bitmap size is as important as its values - e.g. if the resulting bitmap had 4 set bits, a size of 4 vs 5 would alter the result of virBitmapIsAllSet.
Oh, right, that's something that I hadn't considered. This is pretty much exactly why I didn't trust my implementation to be correct O:-)
The first check seems unnecessary, though: the only case in which the argument to virBitmapExpand() would be incorrect is b->nbits == 0, but we know that both a->nbits and b->nbits are >= 0 and we also just verified that a->nbits < b->nbits, so b->nbits must be >= 1 and the argument to virBitmapExpand() will always be correct. Or am I missing something?
Right, it will work unless someone changes nbits from size_t to double.
So ACK to drop the first check while adopting your version of that hunk?
ACK indeed. Or you can choose to apply my R-b tags, unlike someone ;) Jano

This helper converts a set of NUMA node to the set of CPUs they contain. Signed-off-by: Andrea Bolognani <abologna@redhat.com> --- src/libvirt_private.syms | 1 + src/util/virnuma.c | 55 ++++++++++++++++++++++++++++++++++++++++ src/util/virnuma.h | 2 ++ 3 files changed, 58 insertions(+) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 678d0348a2..3553545504 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -2556,6 +2556,7 @@ virNumaGetPages; virNumaIsAvailable; virNumaNodeIsAvailable; virNumaNodesetIsAvailable; +virNumaNodesetToCPUset; virNumaSetPagePoolSize; virNumaSetupMemoryPolicy; diff --git a/src/util/virnuma.c b/src/util/virnuma.c index dd3fb7519e..9f9f149f0a 100644 --- a/src/util/virnuma.c +++ b/src/util/virnuma.c @@ -297,6 +297,49 @@ virNumaGetNodeCPUs(int node, # undef MASK_CPU_ISSET # undef n_bits +/** + * virNumaNodesetToCPUset: + * @nodeset: bitmap containing a set of NUMA nodes + * @cpuset: return location for a bitmap containing a set of CPUs + * + * Convert a set of NUMA node to the set of CPUs they contain. + * + * Returns 0 on success, <0 on failure. + */ +int +virNumaNodesetToCPUset(virBitmapPtr nodeset, + virBitmapPtr *cpuset) +{ + VIR_AUTOPTR(virBitmap) allNodesCPUs = NULL; + size_t nodesetSize; + size_t i; + + *cpuset = NULL; + + if (!nodeset) + return 0; + + allNodesCPUs = virBitmapNewEmpty(); + nodesetSize = virBitmapSize(nodeset); + + for (i = 0; i < nodesetSize; i++) { + VIR_AUTOPTR(virBitmap) nodeCPUs = NULL; + + if (!virBitmapIsBitSet(nodeset, i)) + continue; + + if (virNumaGetNodeCPUs(i, &nodeCPUs) < 0) + return -1; + + if (virBitmapUnion(allNodesCPUs, nodeCPUs) < 0) + return -1; + } + + VIR_STEAL_PTR(*cpuset, allNodesCPUs); + + return 0; +} + #else /* !WITH_NUMACTL */ int @@ -351,6 +394,18 @@ virNumaGetNodeCPUs(int node ATTRIBUTE_UNUSED, _("NUMA isn't available on this host")); return -1; } + +int +virNumaNodesetToCPUset(virBitmapPtr nodeset, + virBitmapPtr *cpuset) +{ + *cpuset = NULL; + + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("NUMA isn't available on this host")); + return -1; +} + #endif /* !WITH_NUMACTL */ /** diff --git a/src/util/virnuma.h b/src/util/virnuma.h index cd7a3ec11f..8e95011dfa 100644 --- a/src/util/virnuma.h +++ b/src/util/virnuma.h @@ -48,6 +48,8 @@ int virNumaGetNodeMemory(int node, unsigned int virNumaGetMaxCPUs(void); int virNumaGetNodeCPUs(int node, virBitmapPtr *cpus) ATTRIBUTE_NOINLINE; +int virNumaNodesetToCPUset(virBitmapPtr nodeset, + virBitmapPtr *cpuset); int virNumaGetPageInfo(int node, unsigned int page_size, -- 2.21.0

On Fri, May 31, 2019 at 05:22:01PM +0200, Andrea Bolognani wrote:
This helper converts a set of NUMA node to the set of CPUs they contain.
Signed-off-by: Andrea Bolognani <abologna@redhat.com> --- src/libvirt_private.syms | 1 + src/util/virnuma.c | 55 ++++++++++++++++++++++++++++++++++++++++ src/util/virnuma.h | 2 ++ 3 files changed, 58 insertions(+)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 678d0348a2..3553545504 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -2556,6 +2556,7 @@ virNumaGetPages; virNumaIsAvailable; virNumaNodeIsAvailable; virNumaNodesetIsAvailable; +virNumaNodesetToCPUset; virNumaSetPagePoolSize; virNumaSetupMemoryPolicy;
diff --git a/src/util/virnuma.c b/src/util/virnuma.c index dd3fb7519e..9f9f149f0a 100644 --- a/src/util/virnuma.c +++ b/src/util/virnuma.c @@ -297,6 +297,49 @@ virNumaGetNodeCPUs(int node, # undef MASK_CPU_ISSET # undef n_bits
+/** + * virNumaNodesetToCPUset: + * @nodeset: bitmap containing a set of NUMA nodes + * @cpuset: return location for a bitmap containing a set of CPUs + * + * Convert a set of NUMA node to the set of CPUs they contain. + * + * Returns 0 on success, <0 on failure. + */ +int +virNumaNodesetToCPUset(virBitmapPtr nodeset, + virBitmapPtr *cpuset)
I'd prefer the output argument to be first - see VIR_STRDUP. But I have no stats about which style is prevailing.
+{ + VIR_AUTOPTR(virBitmap) allNodesCPUs = NULL; + size_t nodesetSize; + size_t i; + + *cpuset = NULL; + + if (!nodeset) + return 0; + + allNodesCPUs = virBitmapNewEmpty(); + nodesetSize = virBitmapSize(nodeset); + + for (i = 0; i < nodesetSize; i++) { + VIR_AUTOPTR(virBitmap) nodeCPUs = NULL; + + if (!virBitmapIsBitSet(nodeset, i)) + continue; + + if (virNumaGetNodeCPUs(i, &nodeCPUs) < 0) + return -1; + + if (virBitmapUnion(allNodesCPUs, nodeCPUs) < 0) + return -1; + } + + VIR_STEAL_PTR(*cpuset, allNodesCPUs); + + return 0; +} + #else /* !WITH_NUMACTL */
int @@ -351,6 +394,18 @@ virNumaGetNodeCPUs(int node ATTRIBUTE_UNUSED, _("NUMA isn't available on this host")); return -1; } + +int +virNumaNodesetToCPUset(virBitmapPtr nodeset, + virBitmapPtr *cpuset) +{
util/virnuma.c:399:37: error: unused parameter 'nodeset' [-Werror,-Wunused-parameter] virNumaNodesetToCPUset(virBitmapPtr nodeset, ^ 1 error generated.
+ *cpuset = NULL; + + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("NUMA isn't available on this host")); + return -1; +} + #endif /* !WITH_NUMACTL */
Reviewed-by: Ján Tomko <jtomko@redhat.com> Jano

On Mon, 2019-06-03 at 14:13 +0200, Ján Tomko wrote:
On Fri, May 31, 2019 at 05:22:01PM +0200, Andrea Bolognani wrote:
+int +virNumaNodesetToCPUset(virBitmapPtr nodeset, + virBitmapPtr *cpuset)
I'd prefer the output argument to be first - see VIR_STRDUP. But I have no stats about which style is prevailing.
The name of the function is virNumaNodesetToCPUset(), so having the cpuset argument first and the nodeset argument second would be very confusing :)
+int +virNumaNodesetToCPUset(virBitmapPtr nodeset, + virBitmapPtr *cpuset) +{
util/virnuma.c:399:37: error: unused parameter 'nodeset' [-Werror,-Wunused-parameter] virNumaNodesetToCPUset(virBitmapPtr nodeset, ^ 1 error generated.
Good catch, compiler! Fixed it locally. -- Andrea Bolognani / Red Hat / Virtualization

Commit f136b83139c6 made some changes in the way we set CPU affinity for QEMU processes, and while doing so unfortunately also introduced a logic bug in that it tried to use a NUMA node map where a CPU map was expected. Because of that, guests using <numatune> would suddenly fail to start: # virsh start guest error: Failed to start domain guest error: cannot set CPU affinity on process 40055: Invalid argument This was particularly easy to trigger on POWER 8 machines, where secondary threads always show up as offline in the host: having <numatune> <memory mode='strict' placement='static' nodeset='1'/> </numatune> in the guest configuration, for example, would result in libvirt trying to set the process affinity so that it would prefer running on CPU 1, but since that's a secondary thread and thus shows up as offline, the operation would fail, and so would starting the guest. Use the newly introduced virNumaNodesetToCPUset() to convert the NUMA node map to a CPU map, which in the example above would be 8-15 with CPU 8 showing up as online in the guest. https://bugzilla.redhat.com/show_bug.cgi?id=1703661 Signed-off-by: Andrea Bolognani <abologna@redhat.com> --- src/qemu/qemu_process.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index fa5909e9be..295ad2e1ff 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -2489,11 +2489,16 @@ qemuProcessInitCpuAffinity(virDomainObjPtr vm) if (virDomainNumaGetNodeCount(vm->def->numa) <= 1 && virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 && mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) { + virBitmapPtr nodeset = NULL; + if (virDomainNumatuneMaybeGetNodeset(vm->def->numa, priv->autoNodeset, - &cpumapToSet, + &nodeset, -1) < 0) goto cleanup; + + if (virNumaNodesetToCPUset(nodeset, &cpumapToSet) < 0) + goto cleanup; } else if (vm->def->cputune.emulatorpin) { cpumapToSet = vm->def->cputune.emulatorpin; } else { -- 2.21.0

On Fri, 2019-05-31 at 17:22 +0200, Andrea Bolognani wrote:
Commit f136b83139c6 made some changes in the way we set CPU affinity for QEMU processes, and while doing so unfortunately also introduced a logic bug in that it tried to use a NUMA node map where a CPU map was expected.
Correction: the logic bug was present even in the original implementation of the feature, introduced back in 2012 (!) with commit 0f8e7ae33ace; setting the CPU affinity earlier in the guest startup process, as we're doing since a few releases back, has probably just made the mistake easier to spot. Or maybe nobody was using it on POWER 8 and that's why we never realized something was wrong :) -- Andrea Bolognani / Red Hat / Virtualization

On Fri, May 31, 2019 at 05:22:02PM +0200, Andrea Bolognani wrote:
Commit f136b83139c6 made some changes in the way we set CPU affinity for QEMU processes, and while doing so unfortunately also introduced a logic bug in that it tried to use a NUMA node map where a CPU map was expected.
Because of that, guests using <numatune> would suddenly fail to start:
# virsh start guest error: Failed to start domain guest error: cannot set CPU affinity on process 40055: Invalid argument
This was particularly easy to trigger on POWER 8 machines, where secondary threads always show up as offline in the host: having
<numatune> <memory mode='strict' placement='static' nodeset='1'/> </numatune>
in the guest configuration, for example, would result in libvirt trying to set the process affinity so that it would prefer running on CPU 1, but since that's a secondary thread and thus shows up as offline, the operation would fail, and so would starting the guest.
Use the newly introduced virNumaNodesetToCPUset() to convert the NUMA node map to a CPU map, which in the example above would be 8-15 with CPU 8 showing up as online in the guest.
https://bugzilla.redhat.com/show_bug.cgi?id=1703661
Signed-off-by: Andrea Bolognani <abologna@redhat.com> --- src/qemu/qemu_process.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-)
Reviewed-by: Ján Tomko <jtomko@redhat.com> Jano

On 5/31/19 11:22 AM, Andrea Bolognani wrote:
Commit f136b83139c6 made some changes in the way we set CPU affinity for QEMU processes, and while doing so unfortunately also introduced a logic bug in that it tried to use a NUMA node map where a CPU map was expected.
Because of that, guests using <numatune> would suddenly fail to start:
# virsh start guest error: Failed to start domain guest error: cannot set CPU affinity on process 40055: Invalid argument
This was particularly easy to trigger on POWER 8 machines, where secondary threads always show up as offline in the host: having
<numatune> <memory mode='strict' placement='static' nodeset='1'/> </numatune>
in the guest configuration, for example, would result in libvirt trying to set the process affinity so that it would prefer running on CPU 1, but since that's a secondary thread and thus shows up as offline, the operation would fail, and so would starting the guest.
Use the newly introduced virNumaNodesetToCPUset() to convert the NUMA node map to a CPU map, which in the example above would be 8-15 with CPU 8 showing up as online in the guest.
https://bugzilla.redhat.com/show_bug.cgi?id=1703661
Signed-off-by: Andrea Bolognani <abologna@redhat.com> --- src/qemu/qemu_process.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index fa5909e9be..295ad2e1ff 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -2489,11 +2489,16 @@ qemuProcessInitCpuAffinity(virDomainObjPtr vm) if (virDomainNumaGetNodeCount(vm->def->numa) <= 1 && virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 && mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) { + virBitmapPtr nodeset = NULL; + if (virDomainNumatuneMaybeGetNodeset(vm->def->numa, priv->autoNodeset, - &cpumapToSet, + &nodeset, -1) < 0) goto cleanup; + + if (virNumaNodesetToCPUset(nodeset, &cpumapToSet) < 0) + goto cleanup;
Coverity complained this morning because virNumaNodesetToCPUset will allocate something into @cpumapToSet which isn't free'd when this code jumps to cleanup. John
} else if (vm->def->cputune.emulatorpin) { cpumapToSet = vm->def->cputune.emulatorpin; } else {

On Tue, 2019-06-04 at 07:03 -0400, John Ferlan wrote:
On 5/31/19 11:22 AM, Andrea Bolognani wrote:
@@ -2489,11 +2489,16 @@ qemuProcessInitCpuAffinity(virDomainObjPtr vm) if (virDomainNumaGetNodeCount(vm->def->numa) <= 1 && virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 && mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) { + virBitmapPtr nodeset = NULL; + if (virDomainNumatuneMaybeGetNodeset(vm->def->numa, priv->autoNodeset, - &cpumapToSet, + &nodeset, -1) < 0) goto cleanup; + + if (virNumaNodesetToCPUset(nodeset, &cpumapToSet) < 0) + goto cleanup;
Coverity complained this morning because virNumaNodesetToCPUset will allocate something into @cpumapToSet which isn't free'd when this code jumps to cleanup.
Nice catch, Coverity! I've just posted a fix (plus a bonus cleanup): https://www.redhat.com/archives/libvir-list/2019-June/msg00076.html -- Andrea Bolognani / Red Hat / Virtualization
participants (3)
-
Andrea Bolognani
-
John Ferlan
-
Ján Tomko