[libvirt] [PATCH 0/6] Fix a few issues introduced by adding vcpu hotplug

Peter Krempa (6): virsh: vcpuinfo: Report vcpu number from the structure rather than it's position qemu: driver: Fix qemuDomainHelperGetVcpus for sparse vcpu topologies doc: clarify documentation for vcpu order conf: Don't validate vcpu count in XML parser qemu: driver: Validate configuration when setting maximum vcpu count conf: Fix build with picky GCC docs/formatdomain.html.in | 7 ++++--- src/conf/domain_conf.c | 13 ++++--------- src/qemu/qemu_driver.c | 23 ++++++++++++++++++----- tools/virsh-domain.c | 3 ++- 4 files changed, 28 insertions(+), 18 deletions(-) -- 2.8.2

virVcpuInfo contains the vcpu number that the data refers to. Report what's returned by the daemon rather than the sequence number as with sparse vcpu topologies they won't match. --- tools/virsh-domain.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/virsh-domain.c b/tools/virsh-domain.c index de2a22c..90d2543 100644 --- a/tools/virsh-domain.c +++ b/tools/virsh-domain.c @@ -6315,8 +6315,8 @@ cmdVcpuinfo(vshControl *ctl, const vshCmd *cmd) } for (n = 0; n < ncpus; n++) { - vshPrint(ctl, "%-15s %d\n", _("VCPU:"), n); if (cpuinfo) { + vshPrint(ctl, "%-15s %d\n", _("VCPU:"), cpuinfo[n].number); vshPrint(ctl, "%-15s %d\n", _("CPU:"), cpuinfo[n].cpu); vshPrint(ctl, "%-15s %s\n", _("State:"), virshDomainVcpuStateToString(cpuinfo[n].state)); @@ -6328,6 +6328,7 @@ cmdVcpuinfo(vshControl *ctl, const vshCmd *cmd) vshPrint(ctl, "%-15s %.1lfs\n", _("CPU time:"), cpuUsed); } } else { + vshPrint(ctl, "%-15s %d\n", _("VCPU:"), n); vshPrint(ctl, "%-15s %s\n", _("CPU:"), _("N/A")); vshPrint(ctl, "%-15s %s\n", _("State:"), _("N/A")); vshPrint(ctl, "%-15s %s\n", _("CPU time"), _("N/A")); -- 2.8.2

On Thu, Aug 25, 2016 at 18:42:45 -0400, Peter Krempa wrote:
virVcpuInfo contains the vcpu number that the data refers to. Report what's returned by the daemon rather than the sequence number as with sparse vcpu topologies they won't match. --- tools/virsh-domain.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/tools/virsh-domain.c b/tools/virsh-domain.c index de2a22c..90d2543 100644 --- a/tools/virsh-domain.c +++ b/tools/virsh-domain.c @@ -6315,8 +6315,8 @@ cmdVcpuinfo(vshControl *ctl, const vshCmd *cmd) }
for (n = 0; n < ncpus; n++) { - vshPrint(ctl, "%-15s %d\n", _("VCPU:"), n); if (cpuinfo) { + vshPrint(ctl, "%-15s %d\n", _("VCPU:"), cpuinfo[n].number); vshPrint(ctl, "%-15s %d\n", _("CPU:"), cpuinfo[n].cpu); vshPrint(ctl, "%-15s %s\n", _("State:"), virshDomainVcpuStateToString(cpuinfo[n].state)); @@ -6328,6 +6328,7 @@ cmdVcpuinfo(vshControl *ctl, const vshCmd *cmd) vshPrint(ctl, "%-15s %.1lfs\n", _("CPU time:"), cpuUsed); } } else { + vshPrint(ctl, "%-15s %d\n", _("VCPU:"), n); vshPrint(ctl, "%-15s %s\n", _("CPU:"), _("N/A")); vshPrint(ctl, "%-15s %s\n", _("State:"), _("N/A")); vshPrint(ctl, "%-15s %s\n", _("CPU time"), _("N/A"));
ACK Jirka

ce43cca0e refactored the helper to prepare it for sparse topologies but forgot to fix the iterator used to fill the structures. This would result into a weirdly sparse populated array and possible out of bounds access and crash once sparse vcpu topologies were allowed. Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1369988 --- src/qemu/qemu_driver.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 97e2ffc..671d1ff 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -1477,15 +1477,17 @@ qemuDomainHelperGetVcpus(virDomainObjPtr vm, for (i = 0; i < virDomainDefGetVcpusMax(vm->def) && ncpuinfo < maxinfo; i++) { virDomainVcpuDefPtr vcpu = virDomainDefGetVcpu(vm->def, i); pid_t vcpupid = qemuDomainGetVcpuPid(vm, i); + virVcpuInfoPtr vcpuinfo = info + ncpuinfo; if (!vcpu->online) continue; if (info) { - info[i].number = i; - info[i].state = VIR_VCPU_RUNNING; + vcpuinfo->number = i; + vcpuinfo->state = VIR_VCPU_RUNNING; - if (qemuGetProcessInfo(&(info[i].cpuTime), &(info[i].cpu), NULL, + if (qemuGetProcessInfo(&vcpuinfo->cpuTime, + &vcpuinfo->cpu, NULL, vm->pid, vcpupid) < 0) { virReportSystemError(errno, "%s", _("cannot get vCPU placement & pCPU time")); @@ -1494,7 +1496,7 @@ qemuDomainHelperGetVcpus(virDomainObjPtr vm, } if (cpumaps) { - unsigned char *cpumap = VIR_GET_CPUMAP(cpumaps, maplen, i); + unsigned char *cpumap = VIR_GET_CPUMAP(cpumaps, maplen, ncpuinfo); virBitmapPtr map = NULL; if (!(map = virProcessGetAffinity(vcpupid))) @@ -1505,7 +1507,7 @@ qemuDomainHelperGetVcpus(virDomainObjPtr vm, } if (cpuwait) { - if (qemuGetSchedInfo(&(cpuwait[i]), vm->pid, vcpupid) < 0) + if (qemuGetSchedInfo(&(cpuwait[ncpuinfo]), vm->pid, vcpupid) < 0) return -1; } -- 2.8.2

On Thu, Aug 25, 2016 at 18:42:46 -0400, Peter Krempa wrote:
ce43cca0e refactored the helper to prepare it for sparse topologies but forgot to fix the iterator used to fill the structures. This would result into a weirdly sparse populated array and possible out of bounds access and crash once sparse vcpu topologies were allowed.
Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1369988 --- src/qemu/qemu_driver.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-)
ACK Jirka

Make it clear that vcpu order is valid for online vcpus only and state that it has to be specified for all vcpus or not provided at all. Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1370043 --- docs/formatdomain.html.in | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index f2de8e4..f1dadc6 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -564,11 +564,12 @@ all disabled vcpus must be hotpluggable. Valid values are <code>yes</code> and <code>no</code>. - <code>order</code> allows to specify the order to add the vcpus. For - hypervisors/platforms that require to insert multiple vcpus at once + <code>order</code> allows to specify the order to add the online vcpus. + For hypervisors/platforms that require to insert multiple vcpus at once the order may be be duplicated accross all vcpus that need to be enabled at once. Specifying order is not necessary, vcpus are then - added in an arbitrary order. + added in an arbitrary order. If order info is used, it must be used for + all online vcpus. Note that hypervisors may create hotpluggable vcpus differently from boot vcpus thus special initialization may be necessary. -- 2.8.2

On Thu, Aug 25, 2016 at 18:42:47 -0400, Peter Krempa wrote:
Make it clear that vcpu order is valid for online vcpus only and state that it has to be specified for all vcpus or not provided at all.
Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1370043 --- docs/formatdomain.html.in | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index f2de8e4..f1dadc6 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -564,11 +564,12 @@ all disabled vcpus must be hotpluggable. Valid values are <code>yes</code> and <code>no</code>.
- <code>order</code> allows to specify the order to add the vcpus. For - hypervisors/platforms that require to insert multiple vcpus at once + <code>order</code> allows to specify the order to add the online vcpus. + For hypervisors/platforms that require to insert multiple vcpus at once the order may be be duplicated accross all vcpus that need to be enabled at once. Specifying order is not necessary, vcpus are then - added in an arbitrary order. + added in an arbitrary order. If order info is used, it must be used for + all online vcpus.
Note that hypervisors may create hotpluggable vcpus differently from boot vcpus thus special initialization may be necessary.
ACK Jirka

Validating the vcpu count is more intricate and doing it in the XML parser will make previously valid configs (with older qemus) vanish. Now that we have the validation callbacks we can do it in a more appropriate place. This basically reverts commit b54de0830a. Partially resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1370066 --- src/conf/domain_conf.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 17c2f53..ae3eb14 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -16376,15 +16376,6 @@ virDomainDefParseXML(xmlDocPtr xml, if (def->cpu == NULL) goto error; - - if (def->cpu->sockets && - virDomainDefGetVcpusMax(def) > - def->cpu->sockets * def->cpu->cores * def->cpu->threads) { - virReportError(VIR_ERR_XML_DETAIL, "%s", - _("Maximum CPUs greater than topology limit")); - goto error; - } - } if (virDomainNumaDefCPUParseXML(def->numa, ctxt) < 0) -- 2.8.2

On Thu, Aug 25, 2016 at 18:42:48 -0400, Peter Krempa wrote:
Validating the vcpu count is more intricate and doing it in the XML parser will make previously valid configs (with older qemus) vanish.
Now that we have the validation callbacks we can do it in a more appropriate place.
This comment does not make it immediately clear that the existing callback actually contains the check already. ACK Jirka

Setting vcpu count when cpu topology is specified may result into an invalid configuration. Since the topology can't be modified, reject the setting if it doesn't match the requested topology. This will allow fixing the topology in case it was broken. Partially fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1370066 --- src/qemu/qemu_driver.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 671d1ff..5f8c11c 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -4730,6 +4730,17 @@ qemuDomainSetVcpusMax(virQEMUDriverPtr driver, goto cleanup; } + if (persistentDef && persistentDef->cpu && persistentDef->cpu->sockets) { + /* explicitly allow correcting invalid vcpu count */ + if (nvcpus != persistentDef->cpu->sockets * + persistentDef->cpu->cores * + persistentDef->cpu->threads) { + virReportError(VIR_ERR_INVALID_ARG, "%s", + _("CPU topology doesn't match the desired vcpu count")); + goto cleanup; + } + } + if (virDomainDefSetVcpusMax(persistentDef, nvcpus, driver->xmlopt) < 0) goto cleanup; -- 2.8.2

On Thu, Aug 25, 2016 at 18:42:49 -0400, Peter Krempa wrote:
Setting vcpu count when cpu topology is specified may result into an invalid configuration. Since the topology can't be modified, reject the setting if it doesn't match the requested topology. This will allow fixing the topology in case it was broken.
Partially fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1370066 --- src/qemu/qemu_driver.c | 11 +++++++++++ 1 file changed, 11 insertions(+)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 671d1ff..5f8c11c 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -4730,6 +4730,17 @@ qemuDomainSetVcpusMax(virQEMUDriverPtr driver, goto cleanup; }
+ if (persistentDef && persistentDef->cpu && persistentDef->cpu->sockets) { + /* explicitly allow correcting invalid vcpu count */
Hmm, this is more confusing than helpful :-)
+ if (nvcpus != persistentDef->cpu->sockets * + persistentDef->cpu->cores * + persistentDef->cpu->threads) { + virReportError(VIR_ERR_INVALID_ARG, "%s", + _("CPU topology doesn't match the desired vcpu count")); + goto cleanup; + } + } + if (virDomainDefSetVcpusMax(persistentDef, nvcpus, driver->xmlopt) < 0) goto cleanup;
ACK Jirka

../../src/conf/domain_conf.c:4425:21: error: potential null pointer dereference [-Werror=null-dereference] switch (vcpu->hotpluggable) { ~~~~^~~~~~~~~~~~~~ --- src/conf/domain_conf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index ae3eb14..61f6dbb 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -4422,6 +4422,10 @@ virDomainVcpuDefPostParse(virDomainDefPtr def) for (i = 0; i < maxvcpus; i++) { vcpu = virDomainDefGetVcpu(def, i); + /* impossible but some compilers don't like it */ + if (!vcpu) + continue; + switch (vcpu->hotpluggable) { case VIR_TRISTATE_BOOL_ABSENT: if (vcpu->online) -- 2.8.2

On Thu, Aug 25, 2016 at 18:42:50 -0400, Peter Krempa wrote:
../../src/conf/domain_conf.c:4425:21: error: potential null pointer dereference [-Werror=null-dereference] switch (vcpu->hotpluggable) { ~~~~^~~~~~~~~~~~~~ --- src/conf/domain_conf.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index ae3eb14..61f6dbb 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -4422,6 +4422,10 @@ virDomainVcpuDefPostParse(virDomainDefPtr def) for (i = 0; i < maxvcpus; i++) { vcpu = virDomainDefGetVcpu(def, i);
+ /* impossible but some compilers don't like it */ + if (!vcpu) + continue; + switch (vcpu->hotpluggable) { case VIR_TRISTATE_BOOL_ABSENT: if (vcpu->online)
ACK Jirka
participants (2)
-
Jiri Denemark
-
Peter Krempa