[libvirt] [PATCH 0/2] Fix virsh nodeset on shutoff domains

This goes on the top of Martin's (ACKed) patch: https://www.redhat.com/archives/libvir-list/2015-May/msg00585.html Michal Privoznik (2): virDomainNumatuneGetMode: Report if numatune was defined qemuDomainGetNumaParameters: Don't report spurious info src/conf/numa_conf.c | 38 ++++++++++++++++++++++-------- src/conf/numa_conf.h | 5 ++-- src/lxc/lxc_cgroup.c | 5 ++-- src/lxc/lxc_controller.c | 30 ++++++++++++------------ src/parallels/parallels_sdk.c | 5 ++-- src/qemu/qemu_cgroup.c | 15 +++++++----- src/qemu/qemu_command.c | 4 ++-- src/qemu/qemu_driver.c | 54 ++++++++++++++++++++++++++++--------------- src/qemu/qemu_process.c | 28 +++++++++++----------- 9 files changed, 113 insertions(+), 71 deletions(-) -- 2.3.6

So far, we are not reporting if numatune was even defined. The value of zero is blindly returned (which maps onto VIR_DOMAIN_NUMATUNE_MEM_STRICT). Unfortunately, we are making decisions based on this value. Instead, we should not inly return the correct value, but report to the caller if the value is valid at all. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- Notes: I know, qemuDomainGetNumaParameters() is not fixed yet, that's what the very next patch does. For better view on this patch use --ignore-space-change. src/conf/numa_conf.c | 38 +++++++++++++++++++++++++++++--------- src/conf/numa_conf.h | 5 +++-- src/lxc/lxc_cgroup.c | 5 +++-- src/lxc/lxc_controller.c | 30 +++++++++++++++--------------- src/parallels/parallels_sdk.c | 5 +++-- src/qemu/qemu_cgroup.c | 15 +++++++++------ src/qemu/qemu_command.c | 4 ++-- src/qemu/qemu_driver.c | 22 ++++++++++++++-------- src/qemu/qemu_process.c | 28 ++++++++++++++-------------- 9 files changed, 92 insertions(+), 60 deletions(-) diff --git a/src/conf/numa_conf.c b/src/conf/numa_conf.c index e4dc2f8..57da215 100644 --- a/src/conf/numa_conf.c +++ b/src/conf/numa_conf.c @@ -351,20 +351,40 @@ virDomainNumaFree(virDomainNumaPtr numa) VIR_FREE(numa); } -virDomainNumatuneMemMode -virDomainNumatuneGetMode(virDomainNumaPtr numatune, - int cellid) +/** + * virDomainNumatuneGetMode: + * @numatune: pointer to numatune definition + * @cellid: cell selector + * @mode: where to store the result + * + * Get the defined mode for domain's memory. It's safe to pass + * NULL to @mode if the return value is the only info needed. + * + * Returns: 0 on success (with @mode updated) + * -1 if no mode was defined in XML + */ +int virDomainNumatuneGetMode(virDomainNumaPtr numatune, + int cellid, + virDomainNumatuneMemMode *mode) { + int ret = -1; + virDomainNumatuneMemMode tmp_mode; + if (!numatune) - return 0; + return ret; if (virDomainNumatuneNodeSpecified(numatune, cellid)) - return numatune->mem_nodes[cellid].mode; + tmp_mode = numatune->mem_nodes[cellid].mode; + else if (numatune->memory.specified) + tmp_mode = numatune->memory.mode; + else + goto cleanup; - if (numatune->memory.specified) - return numatune->memory.mode; - - return 0; + if (mode) + *mode = tmp_mode; + ret = 0; + cleanup: + return ret; } virBitmapPtr diff --git a/src/conf/numa_conf.h b/src/conf/numa_conf.h index ded6e01..6739065 100644 --- a/src/conf/numa_conf.h +++ b/src/conf/numa_conf.h @@ -72,8 +72,9 @@ int virDomainNumatuneFormatXML(virBufferPtr buf, virDomainNumaPtr numatune) /* * Getters */ -virDomainNumatuneMemMode virDomainNumatuneGetMode(virDomainNumaPtr numatune, - int cellid); +int virDomainNumatuneGetMode(virDomainNumaPtr numatune, + int cellid, + virDomainNumatuneMemMode *mode); virBitmapPtr virDomainNumatuneGetNodeset(virDomainNumaPtr numatune, virBitmapPtr auto_nodeset, diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c index 5e959a2..507d567 100644 --- a/src/lxc/lxc_cgroup.c +++ b/src/lxc/lxc_cgroup.c @@ -69,6 +69,7 @@ static int virLXCCgroupSetupCpusetTune(virDomainDefPtr def, { int ret = -1; char *mask = NULL; + virDomainNumatuneMemMode mode; if (def->placement_mode != VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO && def->cpumask) { @@ -81,8 +82,8 @@ static int virLXCCgroupSetupCpusetTune(virDomainDefPtr def, VIR_FREE(mask); } - if (virDomainNumatuneGetMode(def->numa, -1) != - VIR_DOMAIN_NUMATUNE_MEM_STRICT) { + if (virDomainNumatuneGetMode(def->numa, -1, &mode) < 0 || + mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) { ret = 0; goto cleanup; } diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index ba44e09..efbe71f 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -741,25 +741,25 @@ static int virLXCControllerSetupResourceLimits(virLXCControllerPtr ctrl) virBitmapPtr nodeset = NULL; virDomainNumatuneMemMode mode; - mode = virDomainNumatuneGetMode(ctrl->def->numa, -1); + if (virDomainNumatuneGetMode(ctrl->def->numa, -1, &mode) == 0) { + if (mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT && + virCgroupControllerAvailable(VIR_CGROUP_CONTROLLER_CPUSET)) { + /* Use virNuma* API iff necessary. Once set and child is exec()-ed, + * there's no way for us to change it. Rely on cgroups (if available + * and enabled in the config) rather than virNuma*. */ + VIR_DEBUG("Relying on CGroups for memory binding"); + } else { - if (mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT && - virCgroupControllerAvailable(VIR_CGROUP_CONTROLLER_CPUSET)) { - /* Use virNuma* API iff necessary. Once set and child is exec()-ed, - * there's no way for us to change it. Rely on cgroups (if available - * and enabled in the config) rather than virNuma*. */ - VIR_DEBUG("Relying on CGroups for memory binding"); - } else { + VIR_DEBUG("Setting up process resource limits"); - VIR_DEBUG("Setting up process resource limits"); + if (virLXCControllerGetNumadAdvice(ctrl, &auto_nodeset) < 0) + goto cleanup; - if (virLXCControllerGetNumadAdvice(ctrl, &auto_nodeset) < 0) - goto cleanup; + nodeset = virDomainNumatuneGetNodeset(ctrl->def->numa, auto_nodeset, -1); - nodeset = virDomainNumatuneGetNodeset(ctrl->def->numa, auto_nodeset, -1); - - if (virNumaSetupMemoryPolicy(mode, nodeset) < 0) - goto cleanup; + if (virNumaSetupMemoryPolicy(mode, nodeset) < 0) + goto cleanup; + } } if (virLXCControllerSetupCpuAffinity(ctrl) < 0) diff --git a/src/parallels/parallels_sdk.c b/src/parallels/parallels_sdk.c index 786e0ad..2a44504 100644 --- a/src/parallels/parallels_sdk.c +++ b/src/parallels/parallels_sdk.c @@ -1845,6 +1845,7 @@ prlsdkCheckUnsupportedParams(PRL_HANDLE sdkdom, virDomainDefPtr def) size_t i; PRL_VM_TYPE vmType; PRL_RESULT pret; + virDomainNumatuneMemMode memMode; if (def->title) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", @@ -1924,8 +1925,8 @@ prlsdkCheckUnsupportedParams(PRL_HANDLE sdkdom, virDomainDefPtr def) * virDomainDefPtr always contain non zero NUMA configuration * So, just make sure this configuration does't differ from auto generated. */ - if ((virDomainNumatuneGetMode(def->numa, -1) != - VIR_DOMAIN_NUMATUNE_MEM_STRICT) || + if ((virDomainNumatuneGetMode(def->numa, -1, &memMode) == 0 && + memMode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) || virDomainNumatuneHasPerNodeBinding(def->numa)) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", _("numa parameters are not supported " diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index e24989b..96677dd 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -613,14 +613,15 @@ qemuSetupCpusetMems(virDomainObjPtr vm) { virCgroupPtr cgroup_temp = NULL; qemuDomainObjPrivatePtr priv = vm->privateData; + virDomainNumatuneMemMode mode; char *mem_mask = NULL; int ret = -1; if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) return 0; - if (virDomainNumatuneGetMode(vm->def->numa, -1) != - VIR_DOMAIN_NUMATUNE_MEM_STRICT) + if (virDomainNumatuneGetMode(vm->def->numa, -1, &mode) < 0 || + mode != VIR_DOMAIN_NUMATUNE_MEM_STRICT) return 0; if (virDomainNumatuneMaybeFormatNodeset(vm->def->numa, @@ -979,6 +980,7 @@ qemuSetupCgroupForVcpu(virDomainObjPtr vm) unsigned long long period = vm->def->cputune.period; long long quota = vm->def->cputune.quota; char *mem_mask = NULL; + virDomainNumatuneMemMode mem_mode; if ((period || quota) && !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) { @@ -1009,8 +1011,8 @@ qemuSetupCgroupForVcpu(virDomainObjPtr vm) return 0; } - if (virDomainNumatuneGetMode(vm->def->numa, -1) == - VIR_DOMAIN_NUMATUNE_MEM_STRICT && + if (virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 && + mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT && virDomainNumatuneMaybeFormatNodeset(vm->def->numa, priv->autoNodeset, &mem_mask, -1) < 0) @@ -1153,6 +1155,7 @@ qemuSetupCgroupForIOThreads(virDomainObjPtr vm) unsigned long long period = vm->def->cputune.period; long long quota = vm->def->cputune.quota; char *mem_mask = NULL; + virDomainNumatuneMemMode mem_mode; if ((period || quota) && !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) { @@ -1176,8 +1179,8 @@ qemuSetupCgroupForIOThreads(virDomainObjPtr vm) if (priv->cgroup == NULL) return 0; - if (virDomainNumatuneGetMode(vm->def->numa, -1) == - VIR_DOMAIN_NUMATUNE_MEM_STRICT && + if (virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 && + mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT && virDomainNumatuneMaybeFormatNodeset(vm->def->numa, priv->autoNodeset, &mem_mask, -1) < 0) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 3ccd35d..6ecc9de 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -4707,7 +4707,6 @@ qemuBuildMemoryBackendStr(unsigned long long size, return -1; memAccess = virDomainNumaGetNodeMemoryAccessMode(def->numa, guestNode); - mode = virDomainNumatuneGetMode(def->numa, guestNode); if (pagesize == 0 || pagesize != system_page_size) { /* Find the huge page size we want to use */ @@ -4823,7 +4822,8 @@ qemuBuildMemoryBackendStr(unsigned long long size, goto cleanup; } - if (nodemask) { + if (nodemask && + virDomainNumatuneGetMode(def->numa, guestNode, &mode) == 0) { if (!virNumaNodesetIsAvailable(nodemask)) goto cleanup; if (virJSONValueObjectAdd(props, diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 0cc0a29..e7f235b 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -4737,6 +4737,7 @@ qemuDomainHotplugVcpus(virQEMUDriverPtr driver, int ncpupids; virCgroupPtr cgroup_vcpu = NULL; char *mem_mask = NULL; + virDomainNumatuneMemMode mem_mode; qemuDomainObjEnterMonitor(driver, vm); @@ -4804,8 +4805,8 @@ qemuDomainHotplugVcpus(virQEMUDriverPtr driver, goto cleanup; } - if (virDomainNumatuneGetMode(vm->def->numa, -1) == - VIR_DOMAIN_NUMATUNE_MEM_STRICT && + if (virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 && + mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT && virDomainNumatuneMaybeFormatNodeset(vm->def->numa, priv->autoNodeset, &mem_mask, -1) < 0) @@ -6113,6 +6114,7 @@ qemuDomainHotplugAddIOThread(virQEMUDriverPtr driver, qemuMonitorIOThreadInfoPtr *new_iothreads = NULL; virCgroupPtr cgroup_iothread = NULL; char *mem_mask = NULL; + virDomainNumatuneMemMode mode; virDomainIOThreadIDDefPtr iothrid; virBitmapPtr cpumask; @@ -6154,8 +6156,8 @@ qemuDomainHotplugAddIOThread(virQEMUDriverPtr driver, } vm->def->iothreads = exp_niothreads; - if (virDomainNumatuneGetMode(vm->def->numa, -1) == - VIR_DOMAIN_NUMATUNE_MEM_STRICT && + if (virDomainNumatuneGetMode(vm->def->numa, -1, &mode) == 0 && + mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT && virDomainNumatuneMaybeFormatNodeset(vm->def->numa, priv->autoNodeset, &mem_mask, -1) < 0) @@ -10330,11 +10332,12 @@ qemuDomainSetNumaParamsLive(virDomainObjPtr vm, virCgroupPtr cgroup_temp = NULL; qemuDomainObjPrivatePtr priv = vm->privateData; char *nodeset_str = NULL; + virDomainNumatuneMemMode mode; size_t i = 0; int ret = -1; - if (virDomainNumatuneGetMode(vm->def->numa, -1) != - VIR_DOMAIN_NUMATUNE_MEM_STRICT) { + if (virDomainNumatuneGetMode(vm->def->numa, -1, &mode) < 0 || + mode != VIR_DOMAIN_NUMATUNE_MEM_STRICT) { virReportError(VIR_ERR_OPERATION_INVALID, "%s", _("change of nodeset for running domain " "requires strict numa mode")); @@ -10391,6 +10394,7 @@ qemuDomainSetNumaParameters(virDomainPtr dom, virCapsPtr caps = NULL; qemuDomainObjPrivatePtr priv; virBitmapPtr nodeset = NULL; + virDomainNumatuneMemMode config_mode; int mode = -1; virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | @@ -10466,7 +10470,8 @@ qemuDomainSetNumaParameters(virDomainPtr dom, if (flags & VIR_DOMAIN_AFFECT_LIVE) { if (mode != -1 && - virDomainNumatuneGetMode(vm->def->numa, -1) != mode) { + virDomainNumatuneGetMode(vm->def->numa, -1, &config_mode) == 0 && + config_mode != mode) { virReportError(VIR_ERR_OPERATION_INVALID, "%s", _("can't change numatune mode for running domain")); goto endjob; @@ -10567,7 +10572,8 @@ qemuDomainGetNumaParameters(virDomainPtr dom, VIR_TYPED_PARAM_INT, 0) < 0) goto cleanup; - param->value.i = virDomainNumatuneGetMode(def->numa, -1); + virDomainNumatuneGetMode(def->numa, -1, + (virDomainNumatuneMemMode *) ¶m->value.i); break; case 1: /* fill numa nodeset here */ diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 2b3d9b5..118fc52 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -3135,21 +3135,21 @@ static int qemuProcessHook(void *data) if (virSecurityManagerClearSocketLabel(h->driver->securityManager, h->vm->def) < 0) goto cleanup; - mode = virDomainNumatuneGetMode(h->vm->def->numa, -1); + if (virDomainNumatuneGetMode(h->vm->def->numa, -1, &mode) == 0) { + if (mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT && + h->cfg->cgroupControllers & (1 << VIR_CGROUP_CONTROLLER_CPUSET) && + virCgroupControllerAvailable(VIR_CGROUP_CONTROLLER_CPUSET)) { + /* Use virNuma* API iff necessary. Once set and child is exec()-ed, + * there's no way for us to change it. Rely on cgroups (if available + * and enabled in the config) rather than virNuma*. */ + VIR_DEBUG("Relying on CGroups for memory binding"); + } else { + nodeset = virDomainNumatuneGetNodeset(h->vm->def->numa, + priv->autoNodeset, -1); - if (mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT && - h->cfg->cgroupControllers & (1 << VIR_CGROUP_CONTROLLER_CPUSET) && - virCgroupControllerAvailable(VIR_CGROUP_CONTROLLER_CPUSET)) { - /* Use virNuma* API iff necessary. Once set and child is exec()-ed, - * there's no way for us to change it. Rely on cgroups (if available - * and enabled in the config) rather than virNuma*. */ - VIR_DEBUG("Relying on CGroups for memory binding"); - } else { - nodeset = virDomainNumatuneGetNodeset(h->vm->def->numa, - priv->autoNodeset, -1); - - if (virNumaSetupMemoryPolicy(mode, nodeset) < 0) - goto cleanup; + if (virNumaSetupMemoryPolicy(mode, nodeset) < 0) + goto cleanup; + } } ret = 0; -- 2.3.6

On Tue, May 19, 2015 at 01:33:10PM +0200, Michal Privoznik wrote:
So far, we are not reporting if numatune was even defined. The value of zero is blindly returned (which maps onto VIR_DOMAIN_NUMATUNE_MEM_STRICT). Unfortunately, we are making decisions based on this value. Instead, we should not inly return
s/inli/only/
the correct value, but report to the caller if the value is valid at all.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> ---
Notes: I know, qemuDomainGetNumaParameters() is not fixed yet, that's what the very next patch does. For better view on this patch use --ignore-space-change.
I like mentioning the usability of "-w" diff flag in the commit message, that way it's visible for people going through the history. Anyway, ACK.

This API does not work well on domains without <numatune/>. It blindly reports misleading info on a shutoff domain: # virsh numatune rhel7 numa_mode : strict numa_nodeset : This is obviously wrong as long as there's no numatune for rhel7 domain, which isn't. What we should do, is report only those NUMA parameters, that domain has defined. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_driver.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index e7f235b..9b3bc68 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -10522,13 +10522,14 @@ qemuDomainGetNumaParameters(virDomainPtr dom, unsigned int flags) { virQEMUDriverPtr driver = dom->conn->privateData; - size_t i; + size_t i, j; virDomainObjPtr vm = NULL; virDomainDefPtr persistentDef = NULL; char *nodeset = NULL; int ret = -1; virCapsPtr caps = NULL; virDomainDefPtr def = NULL; + bool hasNumatune; virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | VIR_DOMAIN_AFFECT_CONFIG | @@ -10552,31 +10553,40 @@ qemuDomainGetNumaParameters(virDomainPtr dom, &persistentDef) < 0) goto cleanup; - if ((*nparams) == 0) { - *nparams = QEMU_NB_NUMA_PARAM; - ret = 0; - goto cleanup; - } - if (flags & VIR_DOMAIN_AFFECT_CONFIG) def = persistentDef; else def = vm->def; - for (i = 0; i < QEMU_NB_NUMA_PARAM && i < *nparams; i++) { - virMemoryParameterPtr param = ¶ms[i]; + hasNumatune = virDomainNumatuneGetMode(def->numa, -1, NULL) == 0; + + if ((*nparams) == 0) { + *nparams = QEMU_NB_NUMA_PARAM - (hasNumatune ? 0 : 2); + ret = 0; + goto cleanup; + } + + for (i = 0, j = 0; i < QEMU_NB_NUMA_PARAM && j < *nparams; i++) { + virMemoryParameterPtr param = ¶ms[j]; switch (i) { case 0: /* fill numa mode here */ + if (!hasNumatune) + break; + if (virTypedParameterAssign(param, VIR_DOMAIN_NUMA_MODE, VIR_TYPED_PARAM_INT, 0) < 0) goto cleanup; virDomainNumatuneGetMode(def->numa, -1, (virDomainNumatuneMemMode *) ¶m->value.i); + j++; break; case 1: /* fill numa nodeset here */ + if (!hasNumatune) + break; + nodeset = virDomainNumatuneFormatNodeset(def->numa, NULL, -1); if (!nodeset || virTypedParameterAssign(param, VIR_DOMAIN_NUMA_NODESET, @@ -10584,6 +10594,7 @@ qemuDomainGetNumaParameters(virDomainPtr dom, goto cleanup; nodeset = NULL; + j++; break; /* coverity[dead_error_begin] */ @@ -10593,8 +10604,7 @@ qemuDomainGetNumaParameters(virDomainPtr dom, } } - if (*nparams > QEMU_NB_NUMA_PARAM) - *nparams = QEMU_NB_NUMA_PARAM; + *nparams = j; ret = 0; cleanup: -- 2.3.6

On Tue, May 19, 2015 at 01:33:11PM +0200, Michal Privoznik wrote:
This API does not work well on domains without <numatune/>. It blindly reports misleading info on a shutoff domain:
# virsh numatune rhel7 numa_mode : strict numa_nodeset :
This is obviously wrong as long as there's no numatune for rhel7 domain, which isn't. What we should do, is report only those NUMA parameters, that domain has defined.
I'm not sure, though, whether we can change the behaviour this way as clients may depend on the fact that not setting anything in the XML will cause this API to return 'strict' with nodeset "". Empty nodeset is something that was used for that purpose many times. I don't like the fact, but it is a fact. Having said that, I haven't found anything like that mentioned in the documentation, so I'm not totally against that. The thing is that the documentation says: "As a special case, calling with @params as NULL and @nparams as 0 on input will cause @nparams on output to contain the number of parameters supported by the hypervisor." But we don't check that params == NULL, only that *nparams == 0. That itself is not a problem, but let's continue... Calling virDomainGetNumaParameters() with *nparams == 0 returns 0 in nparams. Calling that API again will again return 0 etc. *Unless* someone changes the numatune parameters in the meantime, which will cause the second call to return QEMU_NB_NUMA_PARAM without filing up the values. And that's a problem. And I don't think we can return an error either.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_driver.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index e7f235b..9b3bc68 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -10522,13 +10522,14 @@ qemuDomainGetNumaParameters(virDomainPtr dom, unsigned int flags) { virQEMUDriverPtr driver = dom->conn->privateData; - size_t i; + size_t i, j; virDomainObjPtr vm = NULL; virDomainDefPtr persistentDef = NULL; char *nodeset = NULL; int ret = -1; virCapsPtr caps = NULL; virDomainDefPtr def = NULL; + bool hasNumatune;
virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | VIR_DOMAIN_AFFECT_CONFIG | @@ -10552,31 +10553,40 @@ qemuDomainGetNumaParameters(virDomainPtr dom, &persistentDef) < 0) goto cleanup;
- if ((*nparams) == 0) { - *nparams = QEMU_NB_NUMA_PARAM; - ret = 0; - goto cleanup; - } - if (flags & VIR_DOMAIN_AFFECT_CONFIG) def = persistentDef; else def = vm->def;
- for (i = 0; i < QEMU_NB_NUMA_PARAM && i < *nparams; i++) { - virMemoryParameterPtr param = ¶ms[i]; + hasNumatune = virDomainNumatuneGetMode(def->numa, -1, NULL) == 0; + + if ((*nparams) == 0) { + *nparams = QEMU_NB_NUMA_PARAM - (hasNumatune ? 0 : 2);
This is... Well, looking at it I'm wondering why didn't you do just something like the following, which I think is way more readable. diff --git i/src/qemu/qemu_driver.c w/src/qemu/qemu_driver.c index e7f235b..72e735a 100644 --- i/src/qemu/qemu_driver.c +++ w/src/qemu/qemu_driver.c @@ -10529,6 +10529,8 @@ qemuDomainGetNumaParameters(virDomainPtr dom, int ret = -1; virCapsPtr caps = NULL; virDomainDefPtr def = NULL; + virDomainNumatuneMemMode mode; + bool hasNumatune = virDomainNumatuneGetMode(def->numa, -1, &mode) == 0; virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | VIR_DOMAIN_AFFECT_CONFIG | @@ -10552,8 +10554,11 @@ qemuDomainGetNumaParameters(virDomainPtr dom, &persistentDef) < 0) goto cleanup; - if ((*nparams) == 0) { - *nparams = QEMU_NB_NUMA_PARAM; + if (!hasNumatune || (*nparams) == 0) { + if (!hasNumatune) + *nparams == 0; + else + *nparams = QEMU_NB_NUMA_PARAM; ret = 0; goto cleanup; } @@ -10572,8 +10577,7 @@ qemuDomainGetNumaParameters(virDomainPtr dom, VIR_TYPED_PARAM_INT, 0) < 0) goto cleanup; - virDomainNumatuneGetMode(def->numa, -1, - (virDomainNumatuneMemMode *) ¶m->value.i); + param->value.i = mode; break; case 1: /* fill numa nodeset here */ --

On 19.05.2015 19:15, Martin Kletzander wrote:
On Tue, May 19, 2015 at 01:33:11PM +0200, Michal Privoznik wrote:
This API does not work well on domains without <numatune/>. It blindly reports misleading info on a shutoff domain:
# virsh numatune rhel7 numa_mode : strict numa_nodeset :
This is obviously wrong as long as there's no numatune for rhel7 domain, which isn't. What we should do, is report only those NUMA parameters, that domain has defined.
I'm not sure, though, whether we can change the behaviour this way as clients may depend on the fact that not setting anything in the XML will cause this API to return 'strict' with nodeset "". Empty nodeset is something that was used for that purpose many times. I don't like the fact, but it is a fact.
Depending on a buggy behaviour is itself a bug.
Having said that, I haven't found anything like that mentioned in the documentation, so I'm not totally against that.
The thing is that the documentation says:
"As a special case, calling with @params as NULL and @nparams as 0 on input will cause @nparams on output to contain the number of parameters supported by the hypervisor."
But we don't check that params == NULL, only that *nparams == 0. That itself is not a problem, but let's continue... Calling virDomainGetNumaParameters() with *nparams == 0 returns 0 in nparams. Calling that API again will again return 0 etc. *Unless* someone changes the numatune parameters in the meantime, which will cause the second call to return QEMU_NB_NUMA_PARAM without filing up the values. And that's a problem. And I don't think we can return an error either.
I don't see why this is an error. To me it's the same as old object listing APIs we have. You call an API to get count of objects, so that you can allocate an array, then you call (in general) different API to fill up the array. If, however, there's no object to list, the count API returns zero so you can both continue to 2nd step without getting any error or just skip it as well. This is the same: the first call will tell you how much fields can we report. None. You can allocate an array to hold up zero items, and call the API again to fill up that array. Again, without any error. Correct, you'll get the value from a different code path, but that does not matter. As soon as someone adds another param to report, we're back in the track.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_driver.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index e7f235b..9b3bc68 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -10522,13 +10522,14 @@ qemuDomainGetNumaParameters(virDomainPtr dom, unsigned int flags) { virQEMUDriverPtr driver = dom->conn->privateData; - size_t i; + size_t i, j; virDomainObjPtr vm = NULL; virDomainDefPtr persistentDef = NULL; char *nodeset = NULL; int ret = -1; virCapsPtr caps = NULL; virDomainDefPtr def = NULL; + bool hasNumatune;
virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | VIR_DOMAIN_AFFECT_CONFIG | @@ -10552,31 +10553,40 @@ qemuDomainGetNumaParameters(virDomainPtr dom, &persistentDef) < 0) goto cleanup;
- if ((*nparams) == 0) { - *nparams = QEMU_NB_NUMA_PARAM; - ret = 0; - goto cleanup; - } - if (flags & VIR_DOMAIN_AFFECT_CONFIG) def = persistentDef; else def = vm->def;
- for (i = 0; i < QEMU_NB_NUMA_PARAM && i < *nparams; i++) { - virMemoryParameterPtr param = ¶ms[i]; + hasNumatune = virDomainNumatuneGetMode(def->numa, -1, NULL) == 0; + + if ((*nparams) == 0) { + *nparams = QEMU_NB_NUMA_PARAM - (hasNumatune ? 0 : 2);
This is... Well, looking at it I'm wondering why didn't you do just something like the following, which I think is way more readable.
diff --git i/src/qemu/qemu_driver.c w/src/qemu/qemu_driver.c index e7f235b..72e735a 100644 --- i/src/qemu/qemu_driver.c +++ w/src/qemu/qemu_driver.c @@ -10529,6 +10529,8 @@ qemuDomainGetNumaParameters(virDomainPtr dom, int ret = -1; virCapsPtr caps = NULL; virDomainDefPtr def = NULL; + virDomainNumatuneMemMode mode; + bool hasNumatune = virDomainNumatuneGetMode(def->numa, -1, &mode) == 0;
virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | VIR_DOMAIN_AFFECT_CONFIG | @@ -10552,8 +10554,11 @@ qemuDomainGetNumaParameters(virDomainPtr dom, &persistentDef) < 0) goto cleanup;
- if ((*nparams) == 0) { - *nparams = QEMU_NB_NUMA_PARAM; + if (!hasNumatune || (*nparams) == 0) { + if (!hasNumatune) + *nparams == 0; + else + *nparams = QEMU_NB_NUMA_PARAM; ret = 0; goto cleanup; } @@ -10572,8 +10577,7 @@ qemuDomainGetNumaParameters(virDomainPtr dom, VIR_TYPED_PARAM_INT, 0) < 0) goto cleanup;
- virDomainNumatuneGetMode(def->numa, -1, - (virDomainNumatuneMemMode *) ¶m->value.i); + param->value.i = mode; break;
case 1: /* fill numa nodeset here */ --
This may be more readable, but it's far less future proof. Michal

On Wed, May 20, 2015 at 01:53:14PM +0200, Michal Privoznik wrote:
On 19.05.2015 19:15, Martin Kletzander wrote:
On Tue, May 19, 2015 at 01:33:11PM +0200, Michal Privoznik wrote:
This API does not work well on domains without <numatune/>. It blindly reports misleading info on a shutoff domain:
# virsh numatune rhel7 numa_mode : strict numa_nodeset :
This is obviously wrong as long as there's no numatune for rhel7 domain, which isn't. What we should do, is report only those NUMA parameters, that domain has defined.
I'm not sure, though, whether we can change the behaviour this way as clients may depend on the fact that not setting anything in the XML will cause this API to return 'strict' with nodeset "". Empty nodeset is something that was used for that purpose many times. I don't like the fact, but it is a fact.
Depending on a buggy behaviour is itself a bug.
Yes, definitely, the thing is that I'm not sure whether to consider a bug as returning "" was taken as not having any numatune for a long time. But let's say this is OK.
Having said that, I haven't found anything like that mentioned in the documentation, so I'm not totally against that.
The thing is that the documentation says:
"As a special case, calling with @params as NULL and @nparams as 0 on input will cause @nparams on output to contain the number of parameters supported by the hypervisor."
But we don't check that params == NULL, only that *nparams == 0. That itself is not a problem, but let's continue... Calling virDomainGetNumaParameters() with *nparams == 0 returns 0 in nparams. Calling that API again will again return 0 etc. *Unless* someone changes the numatune parameters in the meantime, which will cause the second call to return QEMU_NB_NUMA_PARAM without filing up the values. And that's a problem. And I don't think we can return an error either.
I don't see why this is an error. To me it's the same as old object listing APIs we have. You call an API to get count of objects, so that you can allocate an array, then you call (in general) different API to fill up the array. If, however, there's no object to list, the count API returns zero so you can both continue to 2nd step without getting any error or just skip it as well. This is the same: the first call will tell you how much fields can we report. None. You can allocate an array to hold up zero items, and call the API again to fill up that array. Again, without any error. Correct, you'll get the value from a different code path, but that does not matter. As soon as someone adds another param to report, we're back in the track.
Yes, we should be reporting the tuning for each node. That might be reported as multiple TypedParameters with the same name, each one representing a guest node. That way, if there is a node without any tuning, you need to return it, but show that it has no tuning, let's say by saying it is bound to empty nodeset (just ""). Then we could document this behaviour, and skip such nodesets in virsh. You probably see where I'm heading now, right? The thing is that I'd rather consider this weird (but really old) behaviour valid and document it solving your problem by fixing it in virsh.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_driver.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index e7f235b..9b3bc68 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -10522,13 +10522,14 @@ qemuDomainGetNumaParameters(virDomainPtr dom, unsigned int flags) { virQEMUDriverPtr driver = dom->conn->privateData; - size_t i; + size_t i, j; virDomainObjPtr vm = NULL; virDomainDefPtr persistentDef = NULL; char *nodeset = NULL; int ret = -1; virCapsPtr caps = NULL; virDomainDefPtr def = NULL; + bool hasNumatune;
virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | VIR_DOMAIN_AFFECT_CONFIG | @@ -10552,31 +10553,40 @@ qemuDomainGetNumaParameters(virDomainPtr dom, &persistentDef) < 0) goto cleanup;
- if ((*nparams) == 0) { - *nparams = QEMU_NB_NUMA_PARAM; - ret = 0; - goto cleanup; - } - if (flags & VIR_DOMAIN_AFFECT_CONFIG) def = persistentDef; else def = vm->def;
- for (i = 0; i < QEMU_NB_NUMA_PARAM && i < *nparams; i++) { - virMemoryParameterPtr param = ¶ms[i]; + hasNumatune = virDomainNumatuneGetMode(def->numa, -1, NULL) == 0; + + if ((*nparams) == 0) { + *nparams = QEMU_NB_NUMA_PARAM - (hasNumatune ? 0 : 2);
This is... Well, looking at it I'm wondering why didn't you do just something like the following, which I think is way more readable.
diff --git i/src/qemu/qemu_driver.c w/src/qemu/qemu_driver.c index e7f235b..72e735a 100644 --- i/src/qemu/qemu_driver.c +++ w/src/qemu/qemu_driver.c @@ -10529,6 +10529,8 @@ qemuDomainGetNumaParameters(virDomainPtr dom, int ret = -1; virCapsPtr caps = NULL; virDomainDefPtr def = NULL; + virDomainNumatuneMemMode mode; + bool hasNumatune = virDomainNumatuneGetMode(def->numa, -1, &mode) == 0;
virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | VIR_DOMAIN_AFFECT_CONFIG | @@ -10552,8 +10554,11 @@ qemuDomainGetNumaParameters(virDomainPtr dom, &persistentDef) < 0) goto cleanup;
- if ((*nparams) == 0) { - *nparams = QEMU_NB_NUMA_PARAM; + if (!hasNumatune || (*nparams) == 0) { + if (!hasNumatune) + *nparams == 0; + else + *nparams = QEMU_NB_NUMA_PARAM; ret = 0; goto cleanup; } @@ -10572,8 +10577,7 @@ qemuDomainGetNumaParameters(virDomainPtr dom, VIR_TYPED_PARAM_INT, 0) < 0) goto cleanup;
- virDomainNumatuneGetMode(def->numa, -1, - (virDomainNumatuneMemMode *) ¶m->value.i); + param->value.i = mode; break;
case 1: /* fill numa nodeset here */ --
This may be more readable, but it's far less future proof.
Michal
participants (2)
-
Martin Kletzander
-
Michal Privoznik