[libvirt] [PATCHv2]rework the check for the numa cpus

V1 discussion: https://www.redhat.com/archives/libvir-list/2015-April/msg00919.html new for v2: remove 1/2 because no need introduce a new flag to avoid the broken introduced by the new check. We introduce a check for numa cpu total count in 5f7b71, But seems this check cannot work well. There are some scenarioes: 1. one of cpu id is out of maxvcpus, can set success(cpu count = 5 < 10): <vcpu placement='static'>10</vcpu> <cell id='0' cpus='0-3,100' memory='512000' unit='KiB'/> the cpus '100' exceed maxvcpus, this setting is not valid (although qemu do not output error). 2. use the same cpu in 2 cell, can set success(cpu count = 8 < 10): <vcpu placement='static'>10</vcpu> <cell id='0' cpus='0-3' memory='512000' unit='KiB'/> <cell id='1' cpus='0-3' memory='512000' unit='KiB'/> I guess nobody will use this setting if he really want use numa in his vm. (qemu not output error, but we can find some interesting things in vm, all cpus have bind in one numa node) 3. use the same cpu in 2 cell, cannot set success(cpu count = 11 > 10): <vcpu placement='static'>10</vcpu> <cell id='0' cpus='0-6' memory='512000' unit='KiB'/> <cell id='1' cpus='0-3' memory='512000' unit='KiB'/> No need forbid this scenario if scenario 2 is a 'valid' setting. However add new check during parse xml will make vm has broken settings disappear after update libvirtd, so possible solutions: 1. add new check when parse xml, i chose this way in this version. 2. add new check when start vm. I think this is a configuration issue, so no need report it so late. 3. just remove this check and do not add new check... :) Luyao Huang (1): conf: rework the cpu check for vm numa settings src/conf/domain_conf.c | 6 +----- src/conf/numa_conf.c | 37 ++++++++++++++++++++++++++++++------- src/conf/numa_conf.h | 2 +- 3 files changed, 32 insertions(+), 13 deletions(-) -- 1.8.3.1

https://bugzilla.redhat.com/show_bug.cgi?id=1176020 We had a check for the vcpu count total number in <numa> before, however this check is not good enough. There are some examples: 1. one of cpu id is out of maxvcpus, can set success(cpu count = 5 < 10): <vcpu placement='static'>10</vcpu> <cell id='0' cpus='0-3,100' memory='512000' unit='KiB'/> 2. use the same cpu in 2 cell, can set success(cpu count = 8 < 10): <vcpu placement='static'>10</vcpu> <cell id='0' cpus='0-3' memory='512000' unit='KiB'/> <cell id='1' cpus='0-3' memory='512000' unit='KiB'/> 3. use the same cpu in 2 cell, cannot set success(cpu count = 11 > 10): <vcpu placement='static'>10</vcpu> <cell id='0' cpus='0-6' memory='512000' unit='KiB'/> <cell id='1' cpus='0-3' memory='512000' unit='KiB'/> Use a new check for numa cpus, check if use a cpu exceeds maxvcpus and if duplicate use one cpu in more than one cell. Signed-off-by: Luyao Huang <lhuang@redhat.com> --- src/conf/domain_conf.c | 6 +----- src/conf/numa_conf.c | 37 ++++++++++++++++++++++++++++++------- src/conf/numa_conf.h | 2 +- 3 files changed, 32 insertions(+), 13 deletions(-) diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 479b4c2..a4a2abb 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -14234,12 +14234,8 @@ virDomainDefParseXML(xmlDocPtr xml, if (virDomainNumaDefCPUParseXML(def->numa, ctxt) < 0) goto error; - if (virDomainNumaGetCPUCountTotal(def->numa) > def->maxvcpus) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("Number of CPUs in <numa> exceeds the" - " <vcpu> count")); + if (virDomainNumaCheckCPU(def->numa, def->maxvcpus) < 0) goto error; - } if (virDomainNumatuneParseXML(def->numa, def->placement_mode == diff --git a/src/conf/numa_conf.c b/src/conf/numa_conf.c index 7ad3f66..2b18225 100644 --- a/src/conf/numa_conf.c +++ b/src/conf/numa_conf.c @@ -805,16 +805,39 @@ virDomainNumaDefCPUFormat(virBufferPtr buf, } -unsigned int -virDomainNumaGetCPUCountTotal(virDomainNumaPtr numa) +int +virDomainNumaCheckCPU(virDomainNumaPtr numa, + unsigned short maxvcpus) { - size_t i; - unsigned int ret = 0; + size_t i,j; - for (i = 0; i < numa->nmem_nodes; i++) - ret += virBitmapCountBits(virDomainNumaGetNodeCpumask(numa, i)); + for (i = 0; i < numa->nmem_nodes; i++) { + virBitmapPtr nodeset = NULL; + ssize_t bit = -1; + + nodeset = virDomainNumaGetNodeCpumask(numa, i); + for (j = 0; j < i; j++) { + if (virBitmapOverlaps(virDomainNumaGetNodeCpumask(numa, j), + nodeset)) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Cannot binding one vCPU in 2 NUMA cell" + " %zu and %zu"), i, j); + return -1; + } + } - return ret; + while ((bit = virBitmapNextSetBit(nodeset, bit)) >= 0) { + if (bit <= maxvcpus-1) + continue; + + virReportError(VIR_ERR_INTERNAL_ERROR, + _("vcpu '%zu' in <numa> cell '%zu' exceeds the maxvcpus"), + bit, i); + return -1; + } + } + + return 0; } diff --git a/src/conf/numa_conf.h b/src/conf/numa_conf.h index ded6e01..d7713c8 100644 --- a/src/conf/numa_conf.h +++ b/src/conf/numa_conf.h @@ -149,7 +149,7 @@ bool virDomainNumatuneNodeSpecified(virDomainNumaPtr numatune, int virDomainNumaDefCPUParseXML(virDomainNumaPtr def, xmlXPathContextPtr ctxt); int virDomainNumaDefCPUFormat(virBufferPtr buf, virDomainNumaPtr def); -unsigned int virDomainNumaGetCPUCountTotal(virDomainNumaPtr numa); +int virDomainNumaCheckCPU(virDomainNumaPtr numa, unsigned short maxvcpus); #endif /* __NUMA_CONF_H__ */ -- 1.8.3.1

On Wed, Apr 22, 2015 at 20:34:55 +0800, Luyao Huang wrote:
https://bugzilla.redhat.com/show_bug.cgi?id=1176020
We had a check for the vcpu count total number in <numa> before, however this check is not good enough. There are some examples:
1. one of cpu id is out of maxvcpus, can set success(cpu count = 5 < 10):
<vcpu placement='static'>10</vcpu> <cell id='0' cpus='0-3,100' memory='512000' unit='KiB'/>
2. use the same cpu in 2 cell, can set success(cpu count = 8 < 10): <vcpu placement='static'>10</vcpu> <cell id='0' cpus='0-3' memory='512000' unit='KiB'/> <cell id='1' cpus='0-3' memory='512000' unit='KiB'/>
3. use the same cpu in 2 cell, cannot set success(cpu count = 11 > 10): <vcpu placement='static'>10</vcpu> <cell id='0' cpus='0-6' memory='512000' unit='KiB'/> <cell id='1' cpus='0-3' memory='512000' unit='KiB'/>
Use a new check for numa cpus, check if use a cpu exceeds maxvcpus and if duplicate use one cpu in more than one cell.
Signed-off-by: Luyao Huang <lhuang@redhat.com> --- src/conf/domain_conf.c | 6 +----- src/conf/numa_conf.c | 37 ++++++++++++++++++++++++++++++------- src/conf/numa_conf.h | 2 +- 3 files changed, 32 insertions(+), 13 deletions(-)
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 479b4c2..a4a2abb 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -14234,12 +14234,8 @@ virDomainDefParseXML(xmlDocPtr xml, if (virDomainNumaDefCPUParseXML(def->numa, ctxt) < 0) goto error;
- if (virDomainNumaGetCPUCountTotal(def->numa) > def->maxvcpus) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("Number of CPUs in <numa> exceeds the" - " <vcpu> count")); + if (virDomainNumaCheckCPU(def->numa, def->maxvcpus) < 0)
This check could be placed after all the numa nodes are parsed and thus would function correctly when combined with ...
goto error; - }
if (virDomainNumatuneParseXML(def->numa, def->placement_mode == diff --git a/src/conf/numa_conf.c b/src/conf/numa_conf.c index 7ad3f66..2b18225 100644 --- a/src/conf/numa_conf.c +++ b/src/conf/numa_conf.c @@ -805,16 +805,39 @@ virDomainNumaDefCPUFormat(virBufferPtr buf, }
-unsigned int -virDomainNumaGetCPUCountTotal(virDomainNumaPtr numa) +int +virDomainNumaCheckCPU(virDomainNumaPtr numa, + unsigned short maxvcpus) { - size_t i; - unsigned int ret = 0; + size_t i,j;
- for (i = 0; i < numa->nmem_nodes; i++) - ret += virBitmapCountBits(virDomainNumaGetNodeCpumask(numa, i)); + for (i = 0; i < numa->nmem_nodes; i++) { + virBitmapPtr nodeset = NULL; + ssize_t bit = -1; + + nodeset = virDomainNumaGetNodeCpumask(numa, i); + for (j = 0; j < i; j++) { + if (virBitmapOverlaps(virDomainNumaGetNodeCpumask(numa, j), + nodeset)) {
... this check.
+ virReportError(VIR_ERR_INTERNAL_ERROR, + _("Cannot binding one vCPU in 2 NUMA cell" + " %zu and %zu"), i, j);
This error message doesn't look very explanatory. Perhaps "NUMA cells %zu and %zu have overlapping vCPU ids".
+ return -1; + } + }
- return ret; + while ((bit = virBitmapNextSetBit(nodeset, bit)) >= 0) { + if (bit <= maxvcpus-1)
Incorrect spacing around the '-' operator.
+ continue;
This construct more-or-less reimplements virBitmapLastSetBit()
+ + virReportError(VIR_ERR_INTERNAL_ERROR, + _("vcpu '%zu' in <numa> cell '%zu' exceeds the maxvcpus"), + bit, i);
This check looks awkward. I'd go with the virDomainNumaGetCPUCountTotal and add the check for overlapping indexes.
+ return -1; + } + } + + return 0; }
Peter

On 04/22/2015 08:55 PM, Peter Krempa wrote:
On Wed, Apr 22, 2015 at 20:34:55 +0800, Luyao Huang wrote:
https://bugzilla.redhat.com/show_bug.cgi?id=1176020
We had a check for the vcpu count total number in <numa> before, however this check is not good enough. There are some examples:
1. one of cpu id is out of maxvcpus, can set success(cpu count = 5 < 10):
<vcpu placement='static'>10</vcpu> <cell id='0' cpus='0-3,100' memory='512000' unit='KiB'/>
2. use the same cpu in 2 cell, can set success(cpu count = 8 < 10): <vcpu placement='static'>10</vcpu> <cell id='0' cpus='0-3' memory='512000' unit='KiB'/> <cell id='1' cpus='0-3' memory='512000' unit='KiB'/>
3. use the same cpu in 2 cell, cannot set success(cpu count = 11 > 10): <vcpu placement='static'>10</vcpu> <cell id='0' cpus='0-6' memory='512000' unit='KiB'/> <cell id='1' cpus='0-3' memory='512000' unit='KiB'/>
Use a new check for numa cpus, check if use a cpu exceeds maxvcpus and if duplicate use one cpu in more than one cell.
Signed-off-by: Luyao Huang <lhuang@redhat.com> --- src/conf/domain_conf.c | 6 +----- src/conf/numa_conf.c | 37 ++++++++++++++++++++++++++++++------- src/conf/numa_conf.h | 2 +- 3 files changed, 32 insertions(+), 13 deletions(-)
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 479b4c2..a4a2abb 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -14234,12 +14234,8 @@ virDomainDefParseXML(xmlDocPtr xml, if (virDomainNumaDefCPUParseXML(def->numa, ctxt) < 0) goto error;
- if (virDomainNumaGetCPUCountTotal(def->numa) > def->maxvcpus) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("Number of CPUs in <numa> exceeds the" - " <vcpu> count")); + if (virDomainNumaCheckCPU(def->numa, def->maxvcpus) < 0) This check could be placed after all the numa nodes are parsed and thus would function correctly when combined with ...
Indeed
goto error; - }
if (virDomainNumatuneParseXML(def->numa, def->placement_mode == diff --git a/src/conf/numa_conf.c b/src/conf/numa_conf.c index 7ad3f66..2b18225 100644 --- a/src/conf/numa_conf.c +++ b/src/conf/numa_conf.c @@ -805,16 +805,39 @@ virDomainNumaDefCPUFormat(virBufferPtr buf, }
-unsigned int -virDomainNumaGetCPUCountTotal(virDomainNumaPtr numa) +int +virDomainNumaCheckCPU(virDomainNumaPtr numa, + unsigned short maxvcpus) { - size_t i; - unsigned int ret = 0; + size_t i,j;
- for (i = 0; i < numa->nmem_nodes; i++) - ret += virBitmapCountBits(virDomainNumaGetNodeCpumask(numa, i)); + for (i = 0; i < numa->nmem_nodes; i++) { + virBitmapPtr nodeset = NULL; + ssize_t bit = -1; + + nodeset = virDomainNumaGetNodeCpumask(numa, i); + for (j = 0; j < i; j++) { + if (virBitmapOverlaps(virDomainNumaGetNodeCpumask(numa, j), + nodeset)) {
... this check.
+ virReportError(VIR_ERR_INTERNAL_ERROR, + _("Cannot binding one vCPU in 2 NUMA cell" + " %zu and %zu"), i, j); This error message doesn't look very explanatory. Perhaps "NUMA cells %zu and %zu have overlapping vCPU ids".
Good error.
+ return -1; + } + }
- return ret; + while ((bit = virBitmapNextSetBit(nodeset, bit)) >= 0) { + if (bit <= maxvcpus-1) Incorrect spacing around the '-' operator.
right, this should be a typo mistake :)
+ continue; This construct more-or-less reimplements virBitmapLastSetBit()
Yes, i should use virBitmapLastSetBit() in this place.
+ + virReportError(VIR_ERR_INTERNAL_ERROR, + _("vcpu '%zu' in <numa> cell '%zu' exceeds the maxvcpus"), + bit, i); This check looks awkward. I'd go with the virDomainNumaGetCPUCountTotal and add the check for overlapping indexes.
Hmm...okay, i think this function (virDomainNumaGetCPUCountTotal) should be renamed because this function's function will be changed after these fix. Thanks for your quick review and help.
+ return -1; + } + } + + return 0; } Peter
Luyao
participants (2)
-
Luyao Huang
-
Peter Krempa