[libvirt] [PATCH v2] Pin guest to memory node on NUMA system

Version 2: Fixed the string formatting errors in v1. Version 1: The patch contains the fix for defect 1009880 reported at redhat bugzilla. The root cause is, ever since the subcpusets(vcpu,emulator) were introduced, the paren cpuset cannot be modified to remove the nodes that are in use by the subcpusets. The fix is to break the memory node modification into three steps as to assign new nodes into the parent first. Change the nodes in the child nodes. Then remove the old nodes on the parent node. Signed-off-by: Shivaprasad G Bhat <sbhat@linux.vnet.ibm.com> --- src/qemu/qemu_driver.c | 129 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index e8bc04d..2435b75 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -8171,7 +8171,11 @@ qemuDomainSetNumaParameters(virDomainPtr dom, } } else if (STREQ(param->field, VIR_DOMAIN_NUMA_NODESET)) { virBitmapPtr nodeset = NULL; + virBitmapPtr old_nodeset = NULL; + virBitmapPtr temp_nodeset = NULL; char *nodeset_str = NULL; + char *old_nodeset_str = NULL; + char *temp_nodeset_str = NULL; if (virBitmapParse(params[i].value.s, 0, &nodeset, @@ -8181,6 +8185,10 @@ qemuDomainSetNumaParameters(virDomainPtr dom, } if (flags & VIR_DOMAIN_AFFECT_LIVE) { + size_t j; + virCgroupPtr cgroup_vcpu = NULL; + virCgroupPtr cgroup_emulator = NULL; + if (vm->def->numatune.memory.mode != VIR_DOMAIN_NUMATUNE_MEM_STRICT) { virReportError(VIR_ERR_OPERATION_INVALID, "%s", @@ -8200,7 +8208,128 @@ qemuDomainSetNumaParameters(virDomainPtr dom, continue; } + if (virCgroupGetCpusetMems(priv->cgroup, &old_nodeset_str) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Failed to get current system nodeset values")); + virBitmapFree(nodeset); + VIR_FREE(nodeset_str); + ret = -1; + continue; + } + + if (virBitmapParse(old_nodeset_str, 0, &old_nodeset, + VIR_DOMAIN_CPUMASK_LEN) < 0) { + virBitmapFree(nodeset); + VIR_FREE(nodeset_str); + VIR_FREE(old_nodeset_str); + ret = -1; + continue; + } + + if ((temp_nodeset = virBitmapNewCopy(old_nodeset)) == NULL) { + virBitmapFree(nodeset); + VIR_FREE(nodeset_str); + virBitmapFree(old_nodeset); + VIR_FREE(old_nodeset_str); + ret = -1; + continue; + } + virBitmapFree(old_nodeset); + VIR_FREE(old_nodeset_str); + + for (j = 0; j < caps->host.nnumaCell; j++) { + bool result; + if (virBitmapGetBit(nodeset, j, &result) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Failed to get cpuset bit values")); + ret = -1; + break; + } + if (result && (virBitmapSetBit(temp_nodeset, j) < 0)) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Failed to set temporary cpuset bit values")); + ret = -1; + break; + } + } + + if (ret) { + virBitmapFree(nodeset); + VIR_FREE(nodeset_str); + virBitmapFree(temp_nodeset); + continue; + } + + if (!(temp_nodeset_str = virBitmapFormat(temp_nodeset))) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Failed to format nodeset")); + virBitmapFree(nodeset); + VIR_FREE(nodeset_str); + virBitmapFree(temp_nodeset); + ret = -1; + continue; + } + + if (virCgroupSetCpusetMems(priv->cgroup, temp_nodeset_str) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Failed to set cpuset values")); + virBitmapFree(nodeset); + VIR_FREE(nodeset_str); + virBitmapFree(temp_nodeset); + VIR_FREE(temp_nodeset_str); + ret = -1; + continue; + } + + virBitmapFree(temp_nodeset); + VIR_FREE(temp_nodeset_str); + + for (j = 0; j < priv->nvcpupids; j++) { + if (virCgroupNewVcpu(priv->cgroup, j, false, &cgroup_vcpu) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Failed to get cpuset values for vcpu%zu"), j); + ret = -1; + break; + } + if (virCgroupSetCpusetMems(cgroup_vcpu, nodeset_str) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Failed to set cpuset values for vcpu%zu"), j); + virCgroupFree(&cgroup_vcpu); + ret = -1; + break; + } + virCgroupFree(&cgroup_vcpu); + } + + if (ret) { + virBitmapFree(nodeset); + VIR_FREE(nodeset_str); + continue; + } + + if (virCgroupNewEmulator(priv->cgroup, false, &cgroup_emulator) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Failed to get cpuset values for emulator")); + virBitmapFree(nodeset); + VIR_FREE(nodeset_str); + ret = -1; + continue; + } + + if (virCgroupSetCpusetMems(cgroup_emulator, nodeset_str) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Failed to set cpuset values for emulator")); + virBitmapFree(nodeset); + VIR_FREE(nodeset_str); + virCgroupFree(&cgroup_emulator); + ret = -1; + continue; + } + virCgroupFree(&cgroup_emulator); + if (virCgroupSetCpusetMems(priv->cgroup, nodeset_str) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Failed to set cpuset")); virBitmapFree(nodeset); VIR_FREE(nodeset_str); ret = -1;

On Tue, Nov 19, 2013 at 09:36:49AM -0500, Shivaprasad G Bhat wrote:
Version 2: Fixed the string formatting errors in v1.
Version 1: The patch contains the fix for defect 1009880 reported at redhat bugzilla.
Version changes are great to have, but it's good to add them as a 'footnote' or as 'notes' (separated from the commit with three dashes on a line, i.e. '---\n'). You can also use git-notes(1) and then use git-format-patch(1) with the '--notes' option, which adds it there properly.
The root cause is, ever since the subcpusets(vcpu,emulator) were introduced, the paren cpuset cannot be modified to remove the nodes that are in use by the subcpusets. The fix is to break the memory node modification into three steps as to assign new nodes into the parent first. Change the nodes in the child nodes. Then remove the old nodes on the parent node.
Please make sure your EDITOR wraps lines, so it's better readable and it also looks better in git log.
Signed-off-by: Shivaprasad G Bhat <sbhat@linux.vnet.ibm.com> --- src/qemu/qemu_driver.c | 129 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index e8bc04d..2435b75 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -8171,7 +8171,11 @@ qemuDomainSetNumaParameters(virDomainPtr dom, } } else if (STREQ(param->field, VIR_DOMAIN_NUMA_NODESET)) { virBitmapPtr nodeset = NULL; + virBitmapPtr old_nodeset = NULL; + virBitmapPtr temp_nodeset = NULL; char *nodeset_str = NULL; + char *old_nodeset_str = NULL; + char *temp_nodeset_str = NULL;
if (virBitmapParse(params[i].value.s, 0, &nodeset, @@ -8181,6 +8185,10 @@ qemuDomainSetNumaParameters(virDomainPtr dom, }
if (flags & VIR_DOMAIN_AFFECT_LIVE) { + size_t j; + virCgroupPtr cgroup_vcpu = NULL; + virCgroupPtr cgroup_emulator = NULL; + if (vm->def->numatune.memory.mode != VIR_DOMAIN_NUMATUNE_MEM_STRICT) { virReportError(VIR_ERR_OPERATION_INVALID, "%s", @@ -8200,7 +8208,128 @@ qemuDomainSetNumaParameters(virDomainPtr dom, continue; }
+ if (virCgroupGetCpusetMems(priv->cgroup, &old_nodeset_str) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Failed to get current system nodeset values"));
No need to report second error since virCgroup...() already set the error.
+ virBitmapFree(nodeset); + VIR_FREE(nodeset_str); + ret = -1; + continue; + } + + if (virBitmapParse(old_nodeset_str, 0, &old_nodeset, + VIR_DOMAIN_CPUMASK_LEN) < 0) { + virBitmapFree(nodeset); + VIR_FREE(nodeset_str); + VIR_FREE(old_nodeset_str); + ret = -1; + continue; + } + + if ((temp_nodeset = virBitmapNewCopy(old_nodeset)) == NULL) { + virBitmapFree(nodeset); + VIR_FREE(nodeset_str); + virBitmapFree(old_nodeset); + VIR_FREE(old_nodeset_str); + ret = -1; + continue;
Just skimming through, I see a lot of unnecessary code duplication here. Can you break it to different function? Or rework the code to free/clean the data in one place, since there is more than a few lines of code in this workpath? Martin

Thanks Martin. I use stgit. I need to figure out how to put the notes in the format you mentioned. Sending you the V3 addressing your comments. Regards, Shiva On Mon, Nov 25, 2013 at 3:01 PM, Martin Kletzander <mkletzan@redhat.com> wrote:
On Tue, Nov 19, 2013 at 09:36:49AM -0500, Shivaprasad G Bhat wrote:
Version 2: Fixed the string formatting errors in v1.
Version 1: The patch contains the fix for defect 1009880 reported at redhat bugzilla.
Version changes are great to have, but it's good to add them as a 'footnote' or as 'notes' (separated from the commit with three dashes on a line, i.e. '---\n'). You can also use git-notes(1) and then use git-format-patch(1) with the '--notes' option, which adds it there properly.
The root cause is, ever since the subcpusets(vcpu,emulator) were introduced, the paren cpuset cannot be modified to remove the nodes that are in use by the subcpusets. The fix is to break the memory node modification into three steps as to assign new nodes into the parent first. Change the nodes in the child nodes. Then remove the old nodes on the parent node.
Please make sure your EDITOR wraps lines, so it's better readable and it also looks better in git log.
Signed-off-by: Shivaprasad G Bhat <sbhat@linux.vnet.ibm.com> --- src/qemu/qemu_driver.c | 129 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index e8bc04d..2435b75 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -8171,7 +8171,11 @@ qemuDomainSetNumaParameters(virDomainPtr dom, } } else if (STREQ(param->field, VIR_DOMAIN_NUMA_NODESET)) { virBitmapPtr nodeset = NULL; + virBitmapPtr old_nodeset = NULL; + virBitmapPtr temp_nodeset = NULL; char *nodeset_str = NULL; + char *old_nodeset_str = NULL; + char *temp_nodeset_str = NULL;
if (virBitmapParse(params[i].value.s, 0, &nodeset, @@ -8181,6 +8185,10 @@ qemuDomainSetNumaParameters(virDomainPtr dom, }
if (flags & VIR_DOMAIN_AFFECT_LIVE) { + size_t j; + virCgroupPtr cgroup_vcpu = NULL; + virCgroupPtr cgroup_emulator = NULL; + if (vm->def->numatune.memory.mode != VIR_DOMAIN_NUMATUNE_MEM_STRICT) { virReportError(VIR_ERR_OPERATION_INVALID, "%s", @@ -8200,7 +8208,128 @@ qemuDomainSetNumaParameters(virDomainPtr dom, continue; }
+ if (virCgroupGetCpusetMems(priv->cgroup, &old_nodeset_str) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Failed to get current system nodeset values"));
No need to report second error since virCgroup...() already set the error.
+ virBitmapFree(nodeset); + VIR_FREE(nodeset_str); + ret = -1; + continue; + } + + if (virBitmapParse(old_nodeset_str, 0, &old_nodeset, + VIR_DOMAIN_CPUMASK_LEN) < 0) { + virBitmapFree(nodeset); + VIR_FREE(nodeset_str); + VIR_FREE(old_nodeset_str); + ret = -1; + continue; + } + + if ((temp_nodeset = virBitmapNewCopy(old_nodeset)) == NULL) { + virBitmapFree(nodeset); + VIR_FREE(nodeset_str); + virBitmapFree(old_nodeset); + VIR_FREE(old_nodeset_str); + ret = -1; + continue;
Just skimming through, I see a lot of unnecessary code duplication here. Can you break it to different function? Or rework the code to free/clean the data in one place, since there is more than a few lines of code in this workpath?
Martin
participants (3)
-
Martin Kletzander
-
Shivaprasad bhat
-
Shivaprasad G Bhat