[libvirt] [PATCH] set cpu bandwidth for the vm

The cpu bandwidth is applied at the vcpu group level. We should apply it at the vm group level too, because the vm may do heavy I/O, and it will affect the other vm. We apply cpu bandwidth at the vcpu and the vm group level, so we must ensure that max(child_quota) <= parent_quota when we modify cpu bandwidth. --- src/qemu/qemu_cgroup.c | 38 ++++++++++------- src/qemu/qemu_driver.c | 103 ++++++++++++++++++++++++++++++++++++----------- 2 files changed, 101 insertions(+), 40 deletions(-) diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index d6e4cbc..2a10bd2 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -435,6 +435,7 @@ int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm) unsigned int i; unsigned long long period = vm->def->cputune.period; long long quota = vm->def->cputune.quota; + long long vm_quota = 0; if (driver->cgroup == NULL) return 0; /* Not supported, so claim success */ @@ -447,26 +448,31 @@ int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm) goto cleanup; } + /* Set cpu bandwidth for the vm */ + if (period || quota) { + if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { + /* Ensure that we can multiply by vcpus without overflowing. */ + if (quota > LLONG_MAX / vm->def->vcpus) { + virReportSystemError(EINVAL, + _("%s"), + "Unable to set cpu bandwidth quota"); + goto cleanup; + } + + if (quota > 0) + vm_quota = quota * vm->def->vcpus; + else + vm_quota = quota; + if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0) + goto cleanup; + } + } + if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) { /* If we does not know VCPU<->PID mapping or all vcpu runs in the same * thread, we cannot control each vcpu. */ - if (period || quota) { - if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { - /* Ensure that we can multiply by vcpus without overflowing. */ - if (quota > LLONG_MAX / vm->def->vcpus) { - virReportSystemError(EINVAL, - _("%s"), - "Unable to set cpu bandwidth quota"); - goto cleanup; - } - - if (quota > 0) - quota *= vm->def->vcpus; - if (qemuSetupCgroupVcpuBW(cgroup, period, quota) < 0) - goto cleanup; - } - } + virCgroupFree(&cgroup); return 0; } diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 5df58b1..52e5d69 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -6051,43 +6051,98 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup, qemuDomainObjPrivatePtr priv = vm->privateData; virCgroupPtr cgroup_vcpu = NULL; int rc; + long long vm_quota = 0; + long long old_quota = 0; + unsigned long long old_period = 0; if (period == 0 && quota == 0) return 0; - if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) { - /* If we does not know VCPU<->PID mapping or all vcpu runs in the same - * thread, we cannot control each vcpu. - */ - /* Ensure that we can multiply by vcpus without overflowing. */ - if (quota > LLONG_MAX / vm->def->vcpus) { - virReportSystemError(EINVAL, - _("%s"), - "Unable to set cpu bandwidth quota"); - goto cleanup; - } + /* Ensure that we can multiply by vcpus without overflowing. */ + if (quota > LLONG_MAX / vm->def->vcpus) { + virReportSystemError(EINVAL, + _("%s"), + "Unable to set cpu bandwidth quota"); + goto cleanup; + } + + if (quota > 0) + vm_quota = quota * vm->def->vcpus; + else + vm_quota = quota; - if (quota > 0) - quota *= vm->def->vcpus; - return qemuSetupCgroupVcpuBW(cgroup, period, quota); + rc = virCgroupGetCpuCfsQuota(cgroup, &old_quota); + if (rc < 0) { + virReportSystemError(-rc, "%s", + _("unable to get cpu bandwidth tunable")); + goto cleanup; } - for (i = 0; i < priv->nvcpupids; i++) { - rc = virCgroupForVcpu(cgroup, i, &cgroup_vcpu, 0); - if (rc < 0) { - virReportSystemError(-rc, - _("Unable to find vcpu cgroup for %s(vcpu:" - " %d)"), - vm->def->name, i); - goto cleanup; + rc = virCgroupGetCpuCfsPeriod(cgroup, &old_period); + if (rc < 0) { + virReportSystemError(-rc, "%s", + _("unable to get cpu bandwidth period tunable")); + goto cleanup; + } + + /* + * If quota will be changed to a small value, we should modify vcpu's quota + * first. Otherwise, we should modify vm's quota first. + * + * If period will be changed to a small value, we should modify vm's period + * first. Otherwise, we should modify vcpu's period first. + * + * If both quota and period will be changed to a big/small value, we cannot + * modify period and quota together. + */ + if ((quota != 0) && (period != 0)) { + if (((quota > old_quota) && (period > old_period)) || + ((quota < old_quota) && (period < old_period))) { + /* modify period */ + if (qemuSetVcpusBWLive(vm, cgroup, period, 0) < 0) + goto cleanup; + + /* modify quota */ + if (qemuSetVcpusBWLive(vm, cgroup, 0, quota) < 0) + goto cleanup; + return 0; } + } - if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0) + if (((vm_quota != 0) && (vm_quota > old_quota)) || + ((period != 0) && (period < old_period))) + /* Set cpu bandwidth for the vm */ + if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0) goto cleanup; - virCgroupFree(&cgroup_vcpu); + /* If we does not know VCPU<->PID mapping or all vcpu runs in the same + * thread, we cannot control each vcpu. So we only modify cpu bandwidth + * when each vcpu has a separated thread. + */ + if (priv->nvcpupids != 0 && priv->vcpupids[0] != vm->pid) { + for (i = 0; i < priv->nvcpupids; i++) { + rc = virCgroupForVcpu(cgroup, i, &cgroup_vcpu, 0); + if (rc < 0) { + virReportSystemError(-rc, + _("Unable to find vcpu cgroup for %s(vcpu:" + " %d)"), + vm->def->name, i); + goto cleanup; + } + + if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0) + goto cleanup; + + virCgroupFree(&cgroup_vcpu); + } } + if (((vm_quota != 0) && (vm_quota <= old_quota)) || + ((period != 0) && (period >= old_period))) + /* Set cpu bandwidth for the vm */ + if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0) + goto cleanup; + return 0; cleanup: -- 1.7.1

Daniel, Would you get a chance to take a look at this patch. Thanks, Gui On 2011-7-25 13:37, Wen Congyang wrote:
The cpu bandwidth is applied at the vcpu group level. We should apply it at the vm group level too, because the vm may do heavy I/O, and it will affect the other vm.
We apply cpu bandwidth at the vcpu and the vm group level, so we must ensure that max(child_quota) <= parent_quota when we modify cpu bandwidth.
--- src/qemu/qemu_cgroup.c | 38 ++++++++++------- src/qemu/qemu_driver.c | 103 ++++++++++++++++++++++++++++++++++++----------- 2 files changed, 101 insertions(+), 40 deletions(-)
diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index d6e4cbc..2a10bd2 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -435,6 +435,7 @@ int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm) unsigned int i; unsigned long long period = vm->def->cputune.period; long long quota = vm->def->cputune.quota; + long long vm_quota = 0;
if (driver->cgroup == NULL) return 0; /* Not supported, so claim success */ @@ -447,26 +448,31 @@ int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm) goto cleanup; }
+ /* Set cpu bandwidth for the vm */ + if (period || quota) { + if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { + /* Ensure that we can multiply by vcpus without overflowing. */ + if (quota > LLONG_MAX / vm->def->vcpus) { + virReportSystemError(EINVAL, + _("%s"), + "Unable to set cpu bandwidth quota"); + goto cleanup; + } + + if (quota > 0) + vm_quota = quota * vm->def->vcpus; + else + vm_quota = quota; + if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0) + goto cleanup; + } + } + if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) { /* If we does not know VCPU<->PID mapping or all vcpu runs in the same * thread, we cannot control each vcpu. */ - if (period || quota) { - if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { - /* Ensure that we can multiply by vcpus without overflowing. */ - if (quota > LLONG_MAX / vm->def->vcpus) { - virReportSystemError(EINVAL, - _("%s"), - "Unable to set cpu bandwidth quota"); - goto cleanup; - } - - if (quota > 0) - quota *= vm->def->vcpus; - if (qemuSetupCgroupVcpuBW(cgroup, period, quota) < 0) - goto cleanup; - } - } + virCgroupFree(&cgroup); return 0; }
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 5df58b1..52e5d69 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -6051,43 +6051,98 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup, qemuDomainObjPrivatePtr priv = vm->privateData; virCgroupPtr cgroup_vcpu = NULL; int rc; + long long vm_quota = 0; + long long old_quota = 0; + unsigned long long old_period = 0;
if (period == 0 && quota == 0) return 0;
- if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) { - /* If we does not know VCPU<->PID mapping or all vcpu runs in the same - * thread, we cannot control each vcpu. - */ - /* Ensure that we can multiply by vcpus without overflowing. */ - if (quota > LLONG_MAX / vm->def->vcpus) { - virReportSystemError(EINVAL, - _("%s"), - "Unable to set cpu bandwidth quota"); - goto cleanup; - } + /* Ensure that we can multiply by vcpus without overflowing. */ + if (quota > LLONG_MAX / vm->def->vcpus) { + virReportSystemError(EINVAL, + _("%s"), + "Unable to set cpu bandwidth quota"); + goto cleanup; + } + + if (quota > 0) + vm_quota = quota * vm->def->vcpus; + else + vm_quota = quota;
- if (quota > 0) - quota *= vm->def->vcpus; - return qemuSetupCgroupVcpuBW(cgroup, period, quota); + rc = virCgroupGetCpuCfsQuota(cgroup, &old_quota); + if (rc < 0) { + virReportSystemError(-rc, "%s", + _("unable to get cpu bandwidth tunable")); + goto cleanup; }
- for (i = 0; i < priv->nvcpupids; i++) { - rc = virCgroupForVcpu(cgroup, i, &cgroup_vcpu, 0); - if (rc < 0) { - virReportSystemError(-rc, - _("Unable to find vcpu cgroup for %s(vcpu:" - " %d)"), - vm->def->name, i); - goto cleanup; + rc = virCgroupGetCpuCfsPeriod(cgroup, &old_period); + if (rc < 0) { + virReportSystemError(-rc, "%s", + _("unable to get cpu bandwidth period tunable")); + goto cleanup; + } + + /* + * If quota will be changed to a small value, we should modify vcpu's quota + * first. Otherwise, we should modify vm's quota first. + * + * If period will be changed to a small value, we should modify vm's period + * first. Otherwise, we should modify vcpu's period first. + * + * If both quota and period will be changed to a big/small value, we cannot + * modify period and quota together. + */ + if ((quota != 0) && (period != 0)) { + if (((quota > old_quota) && (period > old_period)) || + ((quota < old_quota) && (period < old_period))) { + /* modify period */ + if (qemuSetVcpusBWLive(vm, cgroup, period, 0) < 0) + goto cleanup; + + /* modify quota */ + if (qemuSetVcpusBWLive(vm, cgroup, 0, quota) < 0) + goto cleanup; + return 0; } + }
- if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0) + if (((vm_quota != 0) && (vm_quota > old_quota)) || + ((period != 0) && (period < old_period))) + /* Set cpu bandwidth for the vm */ + if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0) goto cleanup;
- virCgroupFree(&cgroup_vcpu); + /* If we does not know VCPU<->PID mapping or all vcpu runs in the same + * thread, we cannot control each vcpu. So we only modify cpu bandwidth + * when each vcpu has a separated thread. + */ + if (priv->nvcpupids != 0 && priv->vcpupids[0] != vm->pid) { + for (i = 0; i < priv->nvcpupids; i++) { + rc = virCgroupForVcpu(cgroup, i, &cgroup_vcpu, 0); + if (rc < 0) { + virReportSystemError(-rc, + _("Unable to find vcpu cgroup for %s(vcpu:" + " %d)"), + vm->def->name, i); + goto cleanup; + } + + if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0) + goto cleanup; + + virCgroupFree(&cgroup_vcpu); + } }
+ if (((vm_quota != 0) && (vm_quota <= old_quota)) || + ((period != 0) && (period >= old_period))) + /* Set cpu bandwidth for the vm */ + if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0) + goto cleanup; + return 0;
cleanup:

On Mon, Jul 25, 2011 at 01:37:14PM +0800, Wen Congyang wrote:
The cpu bandwidth is applied at the vcpu group level. We should apply it at the vm group level too, because the vm may do heavy I/O, and it will affect the other vm.
We apply cpu bandwidth at the vcpu and the vm group level, so we must ensure that max(child_quota) <= parent_quota when we modify cpu bandwidth.
--- src/qemu/qemu_cgroup.c | 38 ++++++++++------- src/qemu/qemu_driver.c | 103 ++++++++++++++++++++++++++++++++++++----------- 2 files changed, 101 insertions(+), 40 deletions(-)
diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index d6e4cbc..2a10bd2 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -435,6 +435,7 @@ int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm) unsigned int i; unsigned long long period = vm->def->cputune.period; long long quota = vm->def->cputune.quota; + long long vm_quota = 0;
if (driver->cgroup == NULL) return 0; /* Not supported, so claim success */ @@ -447,26 +448,31 @@ int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm) goto cleanup; }
+ /* Set cpu bandwidth for the vm */ + if (period || quota) { + if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { + /* Ensure that we can multiply by vcpus without overflowing. */ + if (quota > LLONG_MAX / vm->def->vcpus) { + virReportSystemError(EINVAL, + _("%s"), + "Unable to set cpu bandwidth quota"); + goto cleanup; + } + + if (quota > 0) + vm_quota = quota * vm->def->vcpus; + else + vm_quota = quota; + if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0) + goto cleanup; + } + } + if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) { /* If we does not know VCPU<->PID mapping or all vcpu runs in the same * thread, we cannot control each vcpu. */ - if (period || quota) { - if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { - /* Ensure that we can multiply by vcpus without overflowing. */ - if (quota > LLONG_MAX / vm->def->vcpus) { - virReportSystemError(EINVAL, - _("%s"), - "Unable to set cpu bandwidth quota"); - goto cleanup; - } - - if (quota > 0) - quota *= vm->def->vcpus; - if (qemuSetupCgroupVcpuBW(cgroup, period, quota) < 0) - goto cleanup; - } - } + virCgroupFree(&cgroup); return 0; }
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 5df58b1..52e5d69 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -6051,43 +6051,98 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup, qemuDomainObjPrivatePtr priv = vm->privateData; virCgroupPtr cgroup_vcpu = NULL; int rc; + long long vm_quota = 0; + long long old_quota = 0; + unsigned long long old_period = 0;
if (period == 0 && quota == 0) return 0;
- if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) { - /* If we does not know VCPU<->PID mapping or all vcpu runs in the same - * thread, we cannot control each vcpu. - */ - /* Ensure that we can multiply by vcpus without overflowing. */ - if (quota > LLONG_MAX / vm->def->vcpus) { - virReportSystemError(EINVAL, - _("%s"), - "Unable to set cpu bandwidth quota"); - goto cleanup; - } + /* Ensure that we can multiply by vcpus without overflowing. */ + if (quota > LLONG_MAX / vm->def->vcpus) { + virReportSystemError(EINVAL, + _("%s"), + "Unable to set cpu bandwidth quota"); + goto cleanup; + } + + if (quota > 0) + vm_quota = quota * vm->def->vcpus; + else + vm_quota = quota;
- if (quota > 0) - quota *= vm->def->vcpus; - return qemuSetupCgroupVcpuBW(cgroup, period, quota); + rc = virCgroupGetCpuCfsQuota(cgroup, &old_quota); + if (rc < 0) { + virReportSystemError(-rc, "%s", + _("unable to get cpu bandwidth tunable")); + goto cleanup; }
- for (i = 0; i < priv->nvcpupids; i++) { - rc = virCgroupForVcpu(cgroup, i, &cgroup_vcpu, 0); - if (rc < 0) { - virReportSystemError(-rc, - _("Unable to find vcpu cgroup for %s(vcpu:" - " %d)"), - vm->def->name, i); - goto cleanup; + rc = virCgroupGetCpuCfsPeriod(cgroup, &old_period); + if (rc < 0) { + virReportSystemError(-rc, "%s", + _("unable to get cpu bandwidth period tunable")); + goto cleanup; + } + + /* + * If quota will be changed to a small value, we should modify vcpu's quota + * first. Otherwise, we should modify vm's quota first. + * + * If period will be changed to a small value, we should modify vm's period + * first. Otherwise, we should modify vcpu's period first. + * + * If both quota and period will be changed to a big/small value, we cannot + * modify period and quota together. + */ + if ((quota != 0) && (period != 0)) { + if (((quota > old_quota) && (period > old_period)) || + ((quota < old_quota) && (period < old_period))) { + /* modify period */ + if (qemuSetVcpusBWLive(vm, cgroup, period, 0) < 0) + goto cleanup; + + /* modify quota */ + if (qemuSetVcpusBWLive(vm, cgroup, 0, quota) < 0) + goto cleanup; + return 0; } + }
- if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0) + if (((vm_quota != 0) && (vm_quota > old_quota)) || + ((period != 0) && (period < old_period))) + /* Set cpu bandwidth for the vm */ + if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0) goto cleanup;
- virCgroupFree(&cgroup_vcpu); + /* If we does not know VCPU<->PID mapping or all vcpu runs in the same + * thread, we cannot control each vcpu. So we only modify cpu bandwidth + * when each vcpu has a separated thread. + */ + if (priv->nvcpupids != 0 && priv->vcpupids[0] != vm->pid) { + for (i = 0; i < priv->nvcpupids; i++) { + rc = virCgroupForVcpu(cgroup, i, &cgroup_vcpu, 0); + if (rc < 0) { + virReportSystemError(-rc, + _("Unable to find vcpu cgroup for %s(vcpu:" + " %d)"), + vm->def->name, i); + goto cleanup; + } + + if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0) + goto cleanup; + + virCgroupFree(&cgroup_vcpu); + } }
+ if (((vm_quota != 0) && (vm_quota <= old_quota)) || + ((period != 0) && (period >= old_period))) + /* Set cpu bandwidth for the vm */ + if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0) + goto cleanup; + return 0;
cleanup:
I looked at it yesterday but while looking correct I was hoping someone with more interest on that thread would comment/ACK it. ACK, I just tagged libvirt-0.9.4-rc1, so that won't make it but it will be in rc2 and final which is the important point :-) Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

On Mon, Jul 25, 2011 at 01:37:14PM +0800, Wen Congyang wrote:
The cpu bandwidth is applied at the vcpu group level. We should apply it at the vm group level too, because the vm may do heavy I/O, and it will affect the other vm.
We apply cpu bandwidth at the vcpu and the vm group level, so we must ensure that max(child_quota) <= parent_quota when we modify cpu bandwidth.
Ahh, interesting problem !
--- src/qemu/qemu_cgroup.c | 38 ++++++++++------- src/qemu/qemu_driver.c | 103 ++++++++++++++++++++++++++++++++++++----------- 2 files changed, 101 insertions(+), 40 deletions(-)
ACK Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

At 07/26/2011 06:24 PM, Daniel P. Berrange write:
On Mon, Jul 25, 2011 at 01:37:14PM +0800, Wen Congyang wrote:
The cpu bandwidth is applied at the vcpu group level. We should apply it at the vm group level too, because the vm may do heavy I/O, and it will affect the other vm.
We apply cpu bandwidth at the vcpu and the vm group level, so we must ensure that max(child_quota)<= parent_quota when we modify cpu bandwidth.
Ahh, interesting problem !
--- src/qemu/qemu_cgroup.c | 38 ++++++++++------- src/qemu/qemu_driver.c | 103 ++++++++++++++++++++++++++++++++++++----------- 2 files changed, 101 insertions(+), 40 deletions(-)
ACK
Thanks, pushed.
Daniel
participants (5)
-
Daniel P. Berrange
-
Daniel Veillard
-
Gui Jianfeng
-
Wen Congyang
-
Wen Congyang