[libvirt] [PATCHv2 0/9] support to set cpu bandwidth for hypervisor threads

Currently, we only can set cpu bandwidth for vcpu. If the hypervisor threads consume too much cpu time, it may affect the vcpu. This patchset allows the user to control the cpu bandwidth for hypervisor threads. Changes: v2: - rename qemuSetupCgroupVcpuBW to qemuSetupCgroupCpuBandwidth - limit cpu bandwidth only for vcpus - update doc about hypervisor_period/hypervisor_quota - add virCgroupMoveTask to libvirt_private.syms Hu Tao (3): rename qemuSetupCgroupVcpuBW to qemuSetupCgroupCpuBandwidth limit cpu bandwidth only for vcpus update doc about hypervisor_period/hypervisor_quota Wen Congyang (6): Introduce the function virCgroupForHypervisor introduce the function virCgroupMoveTask() create a new cgroup and move all hypervisor threads to the new cgroup Update XML Schema for new entries qemu: Implement hypervisor's period and quota tunable XML configuration and parsing qemu: Implement hypervisor_period and hypervisor_quota's modification docs/schemas/domaincommon.rng | 10 ++ include/libvirt/libvirt.h.in | 16 ++++ src/conf/domain_conf.c | 25 +++++- src/conf/domain_conf.h | 2 + src/libvirt_private.syms | 2 + src/qemu/qemu_cgroup.c | 107 ++++++++++++++++------ src/qemu/qemu_cgroup.h | 8 +- src/qemu/qemu_driver.c | 201 +++++++++++++++++++++++++++-------------- src/qemu/qemu_process.c | 4 + src/util/cgroup.c | 96 ++++++++++++++++++++ src/util/cgroup.h | 6 ++ tools/virsh.pod | 11 ++- 12 files changed, 381 insertions(+), 107 deletions(-) -- 1.7.4.4

This function sets cpu bandwidth, not only for vcpu. so rename it. --- src/qemu/qemu_cgroup.c | 9 +++++---- src/qemu/qemu_cgroup.h | 6 +++--- src/qemu/qemu_driver.c | 6 +++--- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index f8f375f..91dcedd 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -431,8 +431,9 @@ cleanup: return -1; } -int qemuSetupCgroupVcpuBW(virCgroupPtr cgroup, unsigned long long period, - long long quota) +int qemuSetupCgroupCpuBandwidth(virCgroupPtr cgroup, + unsigned long long period, + long long quota) { int rc; unsigned long long old_period; @@ -517,7 +518,7 @@ int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm) vm_quota = quota * vm->def->vcpus; else vm_quota = quota; - if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0) + if (qemuSetupCgroupCpuBandwidth(cgroup, period, vm_quota) < 0) goto cleanup; } } @@ -551,7 +552,7 @@ int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm) if (period || quota) { if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { - if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0) + if (qemuSetupCgroupCpuBandwidth(cgroup_vcpu, period, quota) < 0) goto cleanup; } } diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h index c1023b3..780b4a0 100644 --- a/src/qemu/qemu_cgroup.h +++ b/src/qemu/qemu_cgroup.h @@ -50,9 +50,9 @@ int qemuSetupHostUsbDeviceCgroup(usbDevice *dev, int qemuSetupCgroup(struct qemud_driver *driver, virDomainObjPtr vm, char *nodemask); -int qemuSetupCgroupVcpuBW(virCgroupPtr cgroup, - unsigned long long period, - long long quota); +int qemuSetupCgroupCpuBandwidth(virCgroupPtr cgroup, + unsigned long long period, + long long quota); int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm); int qemuRemoveCgroup(struct qemud_driver *driver, virDomainObjPtr vm, diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 2177c30..b0b7816 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -7184,7 +7184,7 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup, if (((vm_quota != 0) && (vm_quota > old_quota)) || ((period != 0) && (period < old_period))) /* Set cpu bandwidth for the vm */ - if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0) + if (qemuSetupCgroupCpuBandwidth(cgroup, period, vm_quota) < 0) goto cleanup; /* If we does not know VCPU<->PID mapping or all vcpu runs in the same @@ -7202,7 +7202,7 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup, goto cleanup; } - if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0) + if (qemuSetupCgroupCpuBandwidth(cgroup_vcpu, period, quota) < 0) goto cleanup; virCgroupFree(&cgroup_vcpu); @@ -7212,7 +7212,7 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup, if (((vm_quota != 0) && (vm_quota <= old_quota)) || ((period != 0) && (period >= old_period))) /* Set cpu bandwidth for the vm */ - if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0) + if (qemuSetupCgroupCpuBandwidth(cgroup, period, vm_quota) < 0) goto cleanup; return 0; -- 1.7.4.4

This patch changes the behaviour of xml element cputune.period and cputune.quota to limit cpu bandwidth only for vcpus, and no longer limit cpu bandwidth for the whole guest. The reasons to do this are: - This matches docs of cputune.period and cputune.quota. - The other parts excepting vcpus are treated as "hypervisor", and there are seperate period/quota settings for hypervisor in the subsequent patches --- src/qemu/qemu_cgroup.c | 34 ++++++------------------- src/qemu/qemu_driver.c | 66 ------------------------------------------------ 2 files changed, 8 insertions(+), 92 deletions(-) diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 91dcedd..13163b6 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -490,11 +490,16 @@ int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm) unsigned int i; unsigned long long period = vm->def->cputune.period; long long quota = vm->def->cputune.quota; - long long vm_quota = 0; if (driver->cgroup == NULL) return 0; /* Not supported, so claim success */ + if (!qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { + qemuReportError(VIR_ERR_SYSTEM_ERROR, + _("cgroup cpu is not active")); + return -1; + } + rc = virCgroupForDomain(driver->cgroup, vm->def->name, &cgroup, 0); if (rc != 0) { virReportSystemError(-rc, @@ -503,26 +508,6 @@ int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm) goto cleanup; } - /* Set cpu bandwidth for the vm */ - if (period || quota) { - if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { - /* Ensure that we can multiply by vcpus without overflowing. */ - if (quota > LLONG_MAX / vm->def->vcpus) { - virReportSystemError(EINVAL, - _("%s"), - "Unable to set cpu bandwidth quota"); - goto cleanup; - } - - if (quota > 0) - vm_quota = quota * vm->def->vcpus; - else - vm_quota = quota; - if (qemuSetupCgroupCpuBandwidth(cgroup, period, vm_quota) < 0) - goto cleanup; - } - } - if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) { /* If we does not know VCPU<->PID mapping or all vcpu runs in the same * thread, we cannot control each vcpu. @@ -551,16 +536,13 @@ int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm) } if (period || quota) { - if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { - if (qemuSetupCgroupCpuBandwidth(cgroup_vcpu, period, quota) < 0) - goto cleanup; - } + if (qemuSetupCgroupCpuBandwidth(cgroup_vcpu, period, quota) < 0) + goto cleanup; } virCgroupFree(&cgroup_vcpu); } - virCgroupFree(&cgroup_vcpu); virCgroupFree(&cgroup); return 0; diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index b0b7816..c880f05 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -7123,70 +7123,10 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup, qemuDomainObjPrivatePtr priv = vm->privateData; virCgroupPtr cgroup_vcpu = NULL; int rc; - long long vm_quota = 0; - long long old_quota = 0; - unsigned long long old_period = 0; if (period == 0 && quota == 0) return 0; - /* Ensure that we can multiply by vcpus without overflowing. */ - if (quota > LLONG_MAX / vm->def->vcpus) { - virReportSystemError(EINVAL, - _("%s"), - "Unable to set cpu bandwidth quota"); - goto cleanup; - } - - if (quota > 0) - vm_quota = quota * vm->def->vcpus; - else - vm_quota = quota; - - rc = virCgroupGetCpuCfsQuota(cgroup, &old_quota); - if (rc < 0) { - virReportSystemError(-rc, "%s", - _("unable to get cpu bandwidth tunable")); - goto cleanup; - } - - rc = virCgroupGetCpuCfsPeriod(cgroup, &old_period); - if (rc < 0) { - virReportSystemError(-rc, "%s", - _("unable to get cpu bandwidth period tunable")); - goto cleanup; - } - - /* - * If quota will be changed to a small value, we should modify vcpu's quota - * first. Otherwise, we should modify vm's quota first. - * - * If period will be changed to a small value, we should modify vm's period - * first. Otherwise, we should modify vcpu's period first. - * - * If both quota and period will be changed to a big/small value, we cannot - * modify period and quota together. - */ - if ((quota != 0) && (period != 0)) { - if (((quota > old_quota) && (period > old_period)) || - ((quota < old_quota) && (period < old_period))) { - /* modify period */ - if (qemuSetVcpusBWLive(vm, cgroup, period, 0) < 0) - goto cleanup; - - /* modify quota */ - if (qemuSetVcpusBWLive(vm, cgroup, 0, quota) < 0) - goto cleanup; - return 0; - } - } - - if (((vm_quota != 0) && (vm_quota > old_quota)) || - ((period != 0) && (period < old_period))) - /* Set cpu bandwidth for the vm */ - if (qemuSetupCgroupCpuBandwidth(cgroup, period, vm_quota) < 0) - goto cleanup; - /* If we does not know VCPU<->PID mapping or all vcpu runs in the same * thread, we cannot control each vcpu. So we only modify cpu bandwidth * when each vcpu has a separated thread. @@ -7209,12 +7149,6 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup, } } - if (((vm_quota != 0) && (vm_quota <= old_quota)) || - ((period != 0) && (period >= old_period))) - /* Set cpu bandwidth for the vm */ - if (qemuSetupCgroupCpuBandwidth(cgroup, period, vm_quota) < 0) - goto cleanup; - return 0; cleanup: -- 1.7.4.4

From: Wen Congyang <wency@cn.fujitsu.com> Introduce the function virCgroupForHypervisor() to create sub directory for hypervisor thread(include I/O thread, vhost-net thread) --- src/libvirt_private.syms | 1 + src/util/cgroup.c | 42 ++++++++++++++++++++++++++++++++++++++++++ src/util/cgroup.h | 4 ++++ 3 files changed, 47 insertions(+), 0 deletions(-) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 2fe5068..7d570b5 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -68,6 +68,7 @@ virCgroupDenyAllDevices; virCgroupDenyDevicePath; virCgroupForDomain; virCgroupForDriver; +virCgroupForHypervisor; virCgroupForVcpu; virCgroupFree; virCgroupGetBlkioWeight; diff --git a/src/util/cgroup.c b/src/util/cgroup.c index 5b32881..66d98e3 100644 --- a/src/util/cgroup.c +++ b/src/util/cgroup.c @@ -946,6 +946,48 @@ int virCgroupForVcpu(virCgroupPtr driver ATTRIBUTE_UNUSED, #endif /** + * virCgroupForHypervisor: + * + * @driver: group for the domain + * @group: Pointer to returned virCgroupPtr + * + * Returns 0 on success + */ +#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R +int virCgroupForHypervisor(virCgroupPtr driver, + virCgroupPtr *group, + int create) +{ + int rc; + char *path; + + if (driver == NULL) + return -EINVAL; + + if (virAsprintf(&path, "%s/hypervisor", driver->path) < 0) + return -ENOMEM; + + rc = virCgroupNew(path, group); + VIR_FREE(path); + + if (rc == 0) { + rc = virCgroupMakeGroup(driver, *group, create, VIR_CGROUP_VCPU); + if (rc != 0) + virCgroupFree(group); + } + + return rc; +} +#else +int virCgroupForHypervisor(virCgroupPtr driver ATTRIBUTE_UNUSED, + virCgroupPtr *group ATTRIBUTE_UNUSED, + int create ATTRIBUTE_UNUSED) +{ + return -ENXIO; +} + +#endif +/** * virCgroupSetBlkioWeight: * * @group: The cgroup to change io weight for diff --git a/src/util/cgroup.h b/src/util/cgroup.h index 05325ae..315ebd6 100644 --- a/src/util/cgroup.h +++ b/src/util/cgroup.h @@ -47,6 +47,10 @@ int virCgroupForVcpu(virCgroupPtr driver, virCgroupPtr *group, int create); +int virCgroupForHypervisor(virCgroupPtr driver, + virCgroupPtr *group, + int create); + int virCgroupPathOfController(virCgroupPtr group, int controller, const char *key, -- 1.7.4.4

From: Wen Congyang <wency@cn.fujitsu.com> introduce a new API to move all tasks from a cgroup to another cgroup --- src/libvirt_private.syms | 1 + src/util/cgroup.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++ src/util/cgroup.h | 2 + 3 files changed, 57 insertions(+), 0 deletions(-) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 7d570b5..eda0808 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -88,6 +88,7 @@ virCgroupKill; virCgroupKillPainfully; virCgroupKillRecursive; virCgroupMounted; +virCgroupMoveTask; virCgroupPathOfController; virCgroupRemove; virCgroupSetBlkioDeviceWeight; diff --git a/src/util/cgroup.c b/src/util/cgroup.c index 66d98e3..84a21ce 100644 --- a/src/util/cgroup.c +++ b/src/util/cgroup.c @@ -791,6 +791,60 @@ int virCgroupAddTask(virCgroupPtr group, pid_t pid) return rc; } +static int virCgrouAddTaskStr(virCgroupPtr group, const char *pidstr) +{ + unsigned long long value; + + if (virStrToLong_ull(pidstr, NULL, 10, &value) < 0) + return -EINVAL; + + return virCgroupAddTask(group, value); +} + +/** + * virCgroupMoveTask: + * + * @src_group: The source cgroup where all tasks are removed from + * @dest_group: The destination where all tasks are added to + * + * Returns: 0 on success + */ +int virCgroupMoveTask(virCgroupPtr src_group, virCgroupPtr dest_group) +{ + int rc = 0; + int i; + char *content, *value, *next; + + for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { + /* Skip over controllers not mounted */ + if (!src_group->controllers[i].mountPoint || + !dest_group->controllers[i].mountPoint) + continue; + + rc = virCgroupGetValueStr(src_group, i, "tasks", &content); + if (rc != 0) + goto cleanup; + + value = content; + while((next = strchr(value, '\n')) != NULL) { + *next = '\0'; + if ((rc = virCgrouAddTaskStr(dest_group, value) < 0)) + goto cleanup; + value = next + 1; + } + if (*value != '\0') { + if ((rc = virCgrouAddTaskStr(dest_group, value) < 0)) + goto cleanup; + } + + VIR_FREE(content); + } + + return 0; + +cleanup: + return rc; +} /** * virCgroupForDriver: diff --git a/src/util/cgroup.h b/src/util/cgroup.h index 315ebd6..308ea47 100644 --- a/src/util/cgroup.h +++ b/src/util/cgroup.h @@ -58,6 +58,8 @@ int virCgroupPathOfController(virCgroupPtr group, int virCgroupAddTask(virCgroupPtr group, pid_t pid); +int virCgroupMoveTask(virCgroupPtr src_group, virCgroupPtr dest_group); + int virCgroupSetBlkioWeight(virCgroupPtr group, unsigned int weight); int virCgroupGetBlkioWeight(virCgroupPtr group, unsigned int *weight); -- 1.7.4.4

From: Wen Congyang <wency@cn.fujitsu.com> create a new cgroup and move all hypervisor threads to the new cgroup. And then we can limit cpu bandwidth for hypervisor threads (include vhost-net threads). --- src/qemu/qemu_cgroup.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_cgroup.h | 2 + src/qemu/qemu_process.c | 4 +++ 3 files changed, 63 insertions(+), 0 deletions(-) diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 13163b6..021f489 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -556,6 +556,63 @@ cleanup: return -1; } +int qemuSetupCgroupForHypervisor(struct qemud_driver *driver, + virDomainObjPtr vm) +{ + virCgroupPtr cgroup = NULL; + virCgroupPtr cgroup_hypervisor = NULL; + qemuDomainObjPrivatePtr priv = vm->privateData; + int rc; + + if (driver->cgroup == NULL) + return 0; /* Not supported, so claim success */ + + rc = virCgroupForDomain(driver->cgroup, vm->def->name, &cgroup, 0); + if (rc != 0) { + virReportSystemError(-rc, + _("Unable to find cgroup for %s"), + vm->def->name); + goto cleanup; + } + + if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) { + /* If we does not know VCPU<->PID mapping or all vcpu runs in the same + * thread, we cannot control each vcpu. + */ + virCgroupFree(&cgroup); + return 0; + } + + rc = virCgroupForHypervisor(cgroup, &cgroup_hypervisor, 1); + if (rc < 0) { + virReportSystemError(-rc, + _("Unable to create hypervisor cgroup for %s"), + vm->def->name); + goto cleanup; + } + + rc = virCgroupMoveTask(cgroup, cgroup_hypervisor); + if (rc < 0) { + virReportSystemError(-rc, + _("Unable to move taks from domain cgroup to " + "hypervisor cgroup for %s"), + vm->def->name); + goto cleanup; + } + + virCgroupFree(&cgroup_hypervisor); + virCgroupFree(&cgroup); + return 0; + +cleanup: + virCgroupFree(&cgroup_hypervisor); + if (cgroup) { + virCgroupRemove(cgroup); + virCgroupFree(&cgroup); + } + + return -1; +} int qemuRemoveCgroup(struct qemud_driver *driver, virDomainObjPtr vm, diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h index 780b4a0..63cb378 100644 --- a/src/qemu/qemu_cgroup.h +++ b/src/qemu/qemu_cgroup.h @@ -54,6 +54,8 @@ int qemuSetupCgroupCpuBandwidth(virCgroupPtr cgroup, unsigned long long period, long long quota); int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm); +int qemuSetupCgroupForHypervisor(struct qemud_driver *driver, + virDomainObjPtr vm); int qemuRemoveCgroup(struct qemud_driver *driver, virDomainObjPtr vm, int quiet); diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 1df3637..f84314d 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -3723,6 +3723,10 @@ int qemuProcessStart(virConnectPtr conn, if (qemuSetupCgroupForVcpu(driver, vm) < 0) goto cleanup; + VIR_DEBUG("Setting cgroup for hypervisor(if required)"); + if (qemuSetupCgroupForHypervisor(driver, vm) < 0) + goto cleanup; + VIR_DEBUG("Setting VCPU affinities"); if (qemuProcessSetVcpuAffinites(conn, vm) < 0) goto cleanup; -- 1.7.4.4

From: Wen Congyang <wency@cn.fujitsu.com> --- docs/schemas/domaincommon.rng | 10 ++++++++++ 1 files changed, 10 insertions(+), 0 deletions(-) diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index 8630625..ee2a314 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -546,6 +546,16 @@ <ref name="cpuquota"/> </element> </optional> + <optional> + <element name="hypervisor_period"> + <ref name="cpuperiod"/> + </element> + </optional> + <optional> + <element name="hypervisor_quota"> + <ref name="cpuquota"/> + </element> + </optional> <zeroOrMore> <element name="vcpupin"> <attribute name="vcpu"> -- 1.7.4.4

From: Wen Congyang <wency@cn.fujitsu.com> set hypervisor's period and quota to limit cpu bandwidth when the vm starts. --- src/conf/domain_conf.c | 25 +++++++++++++++++++++++-- src/conf/domain_conf.h | 2 ++ src/qemu/qemu_cgroup.c | 13 +++++++++++-- 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 4651765..c3959a7 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -8152,6 +8152,14 @@ static virDomainDefPtr virDomainDefParseXML(virCapsPtr caps, &def->cputune.quota) < 0) def->cputune.quota = 0; + if (virXPathULongLong("string(./cputune/hypervisor_period[1])", ctxt, + &def->cputune.hypervisor_period) < 0) + def->cputune.hypervisor_period = 0; + + if (virXPathLongLong("string(./cputune/hypervisor_quota[1])", ctxt, + &def->cputune.hypervisor_quota) < 0) + def->cputune.hypervisor_quota = 0; + if ((n = virXPathNodeSet("./cputune/vcpupin", ctxt, &nodes)) < 0) { goto error; } @@ -12752,7 +12760,8 @@ virDomainDefFormatInternal(virDomainDefPtr def, virBufferAsprintf(buf, ">%u</vcpu>\n", def->maxvcpus); if (def->cputune.shares || def->cputune.vcpupin || - def->cputune.period || def->cputune.quota) + def->cputune.period || def->cputune.quota || + def->cputune.hypervisor_period || def->cputune.hypervisor_quota) virBufferAddLit(buf, " <cputune>\n"); if (def->cputune.shares) @@ -12764,6 +12773,17 @@ virDomainDefFormatInternal(virDomainDefPtr def, if (def->cputune.quota) virBufferAsprintf(buf, " <quota>%lld</quota>\n", def->cputune.quota); + + if (def->cputune.hypervisor_period) + virBufferAsprintf(buf, " <hypervisor_period>%llu" + "</hypervisor_period>\n", + def->cputune.hypervisor_period); + + if (def->cputune.hypervisor_period) + virBufferAsprintf(buf, " <hypervisor_quota>%lld" + "</hypervisor_quota>\n", + def->cputune.hypervisor_quota); + if (def->cputune.vcpupin) { for (i = 0; i < def->cputune.nvcpupin; i++) { virBufferAsprintf(buf, " <vcpupin vcpu='%u' ", @@ -12785,7 +12805,8 @@ virDomainDefFormatInternal(virDomainDefPtr def, } if (def->cputune.shares || def->cputune.vcpupin || - def->cputune.period || def->cputune.quota) + def->cputune.period || def->cputune.quota || + def->cputune.hypervisor_period || def->cputune.hypervisor_quota) virBufferAddLit(buf, " </cputune>\n"); if (def->numatune.memory.nodemask || diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 3712785..4b42d32 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -1598,6 +1598,8 @@ struct _virDomainDef { unsigned long shares; unsigned long long period; long long quota; + unsigned long long hypervisor_period; + long long hypervisor_quota; int nvcpupin; virDomainVcpuPinDefPtr *vcpupin; } cputune; diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 021f489..b675902 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -509,7 +509,7 @@ int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm) } if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) { - /* If we does not know VCPU<->PID mapping or all vcpu runs in the same + /* If we don't know VCPU<->PID mapping or all vcpu runs in the same * thread, we cannot control each vcpu. */ virCgroupFree(&cgroup); @@ -562,6 +562,8 @@ int qemuSetupCgroupForHypervisor(struct qemud_driver *driver, virCgroupPtr cgroup = NULL; virCgroupPtr cgroup_hypervisor = NULL; qemuDomainObjPrivatePtr priv = vm->privateData; + unsigned long long period = vm->def->cputune.hypervisor_period; + long long quota = vm->def->cputune.hypervisor_quota; int rc; if (driver->cgroup == NULL) @@ -576,7 +578,7 @@ int qemuSetupCgroupForHypervisor(struct qemud_driver *driver, } if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) { - /* If we does not know VCPU<->PID mapping or all vcpu runs in the same + /* If we don't know VCPU<->PID mapping or all vcpu runs in the same * thread, we cannot control each vcpu. */ virCgroupFree(&cgroup); @@ -600,6 +602,13 @@ int qemuSetupCgroupForHypervisor(struct qemud_driver *driver, goto cleanup; } + if (period || quota) { + if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { + if (qemuSetupCgroupCpuBandwidth(cgroup_hypervisor, period, quota) < 0) + goto cleanup; + } + } + virCgroupFree(&cgroup_hypervisor); virCgroupFree(&cgroup); return 0; -- 1.7.4.4

From: Wen Congyang <wency@cn.fujitsu.com> allow the user change/get hypervisor's period and quota when the vm is running. --- include/libvirt/libvirt.h.in | 16 +++++ src/qemu/qemu_driver.c | 133 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 148 insertions(+), 1 deletions(-) diff --git a/include/libvirt/libvirt.h.in b/include/libvirt/libvirt.h.in index 024c4ec..1a30426 100644 --- a/include/libvirt/libvirt.h.in +++ b/include/libvirt/libvirt.h.in @@ -662,6 +662,22 @@ typedef virTypedParameter *virTypedParameterPtr; #define VIR_DOMAIN_SCHEDULER_VCPU_QUOTA "vcpu_quota" /** + * VIR_DOMAIN_SCHEDULER_HYPERVISOR_PERIOD: + * + * Macro represents the enforcement period for a quota, in microseconds, + * when using the posix scheduler, as a ullong. + */ +#define VIR_DOMAIN_SCHEDULER_HYPERVISOR_PERIOD "hypervisor_period" + +/** + * VIR_DOMAIN_SCHEDULER_HYPERVISOR_QUOTA: + * + * Macro represents the maximum bandwidth to be used within a period, + * when using the posix scheduler, as an llong. + */ +#define VIR_DOMAIN_SCHEDULER_HYPERVISOR_QUOTA "hypervisor_quota" + +/** * VIR_DOMAIN_SCHEDULER_WEIGHT: * * Macro represents the relative weight, when using the credit diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index c880f05..493f9c6 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -6087,7 +6087,7 @@ static char *qemuGetSchedulerType(virDomainPtr dom, else if (rc == 0) *nparams = 1; else - *nparams = 3; + *nparams = 5; } ret = strdup("posix"); @@ -7157,6 +7157,40 @@ cleanup: } static int +qemuSetHypervisorBWLive(virDomainObjPtr vm, virCgroupPtr cgroup, + unsigned long long period, long long quota) +{ + qemuDomainObjPrivatePtr priv = vm->privateData; + virCgroupPtr cgroup_hypervisor = NULL; + int rc; + + if (period == 0 && quota == 0) + return 0; + + if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) { + return 0; + } + + rc = virCgroupForHypervisor(cgroup, &cgroup_hypervisor, 0); + if (rc < 0) { + virReportSystemError(-rc, + _("Unable to find hypervisor cgroup for %s"), + vm->def->name); + goto cleanup; + } + + if (qemuSetupCgroupCpuBandwidth(cgroup_hypervisor, period, quota) < 0) + goto cleanup; + + virCgroupFree(&cgroup_hypervisor); + return 0; + +cleanup: + virCgroupFree(&cgroup_hypervisor); + return -1; +} + +static int qemuSetSchedulerParametersFlags(virDomainPtr dom, virTypedParameterPtr params, int nparams, @@ -7179,6 +7213,10 @@ qemuSetSchedulerParametersFlags(virDomainPtr dom, VIR_TYPED_PARAM_ULLONG, VIR_DOMAIN_SCHEDULER_VCPU_QUOTA, VIR_TYPED_PARAM_LLONG, + VIR_DOMAIN_SCHEDULER_HYPERVISOR_PERIOD, + VIR_TYPED_PARAM_ULLONG, + VIR_DOMAIN_SCHEDULER_HYPERVISOR_QUOTA, + VIR_TYPED_PARAM_LLONG, NULL) < 0) return -1; @@ -7261,6 +7299,32 @@ qemuSetSchedulerParametersFlags(virDomainPtr dom, if (flags & VIR_DOMAIN_AFFECT_CONFIG) { vmdef->cputune.quota = params[i].value.l; } + } else if (STREQ(param->field, VIR_DOMAIN_SCHEDULER_HYPERVISOR_PERIOD)) { + if (flags & VIR_DOMAIN_AFFECT_LIVE) { + rc = qemuSetHypervisorBWLive(vm, group, params[i].value.ul, 0); + if (rc != 0) + goto cleanup; + + if (params[i].value.ul) + vm->def->cputune.hypervisor_period = params[i].value.ul; + } + + if (flags & VIR_DOMAIN_AFFECT_CONFIG) { + vmdef->cputune.hypervisor_period = params[i].value.ul; + } + } else if (STREQ(param->field, VIR_DOMAIN_SCHEDULER_HYPERVISOR_QUOTA)) { + if (flags & VIR_DOMAIN_AFFECT_LIVE) { + rc = qemuSetHypervisorBWLive(vm, group, 0, params[i].value.l); + if (rc != 0) + goto cleanup; + + if (params[i].value.l) + vm->def->cputune.hypervisor_quota = params[i].value.l; + } + + if (flags & VIR_DOMAIN_AFFECT_CONFIG) { + vmdef->cputune.hypervisor_quota = params[i].value.l; + } } } @@ -7365,6 +7429,43 @@ cleanup: } static int +qemuGetHypervisorBWLive(virDomainObjPtr vm, virCgroupPtr cgroup, + unsigned long long *period, long long *quota) +{ + virCgroupPtr cgroup_hypervisor = NULL; + qemuDomainObjPrivatePtr priv = NULL; + int rc; + int ret = -1; + + priv = vm->privateData; + if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) { + /* We donot create sub dir for each vcpu */ + *period = 0; + *quota = 0; + return 0; + } + + /* get period and quota for hypervisor */ + rc = virCgroupForHypervisor(cgroup, &cgroup_hypervisor, 0); + if (!cgroup_hypervisor) { + virReportSystemError(-rc, + _("Unable to find hypervisor cgroup for %s"), + vm->def->name); + goto cleanup; + } + + rc = qemuGetVcpuBWLive(cgroup_hypervisor, period, quota); + if (rc < 0) + goto cleanup; + + ret = 0; + +cleanup: + virCgroupFree(&cgroup_hypervisor); + return ret; +} + +static int qemuGetSchedulerParametersFlags(virDomainPtr dom, virTypedParameterPtr params, int *nparams, @@ -7376,6 +7477,8 @@ qemuGetSchedulerParametersFlags(virDomainPtr dom, unsigned long long shares; unsigned long long period; long long quota; + unsigned long long hypervisor_period; + long long hypervisor_quota; int ret = -1; int rc; bool cpu_bw_status = false; @@ -7415,6 +7518,8 @@ qemuGetSchedulerParametersFlags(virDomainPtr dom, if (*nparams > 1 && cpu_bw_status) { period = persistentDef->cputune.period; quota = persistentDef->cputune.quota; + hypervisor_period = persistentDef->cputune.hypervisor_period; + hypervisor_quota = persistentDef->cputune.hypervisor_quota; } goto out; } @@ -7443,6 +7548,14 @@ qemuGetSchedulerParametersFlags(virDomainPtr dom, if (rc != 0) goto cleanup; } + + if (*nparams > 3 && cpu_bw_status) { + rc = qemuGetHypervisorBWLive(vm, group, &hypervisor_period, + &hypervisor_quota); + if (rc != 0) + goto cleanup; + } + out: if (virTypedParameterAssign(¶ms[0], VIR_DOMAIN_SCHEDULER_CPU_SHARES, VIR_TYPED_PARAM_ULLONG, shares) < 0) @@ -7465,6 +7578,24 @@ out: goto cleanup; saved_nparams++; } + + if (*nparams > saved_nparams) { + if (virTypedParameterAssign(¶ms[3], + VIR_DOMAIN_SCHEDULER_HYPERVISOR_PERIOD, + VIR_TYPED_PARAM_ULLONG, + hypervisor_period) < 0) + goto cleanup; + saved_nparams++; + } + + if (*nparams > saved_nparams) { + if (virTypedParameterAssign(¶ms[4], + VIR_DOMAIN_SCHEDULER_HYPERVISOR_QUOTA, + VIR_TYPED_PARAM_LLONG, + hypervisor_quota) < 0) + goto cleanup; + saved_nparams++; + } } *nparams = saved_nparams; -- 1.7.4.4

--- tools/virsh.pod | 11 ++++++----- 1 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tools/virsh.pod b/tools/virsh.pod index f83a29d..8ae3177 100644 --- a/tools/virsh.pod +++ b/tools/virsh.pod @@ -1189,7 +1189,8 @@ available for each hypervisor are: LXC (posix scheduler) : cpu_shares -QEMU/KVM (posix scheduler): cpu_shares, vcpu_period, vcpu_quota +QEMU/KVM (posix scheduler): cpu_shares, vcpu_period, vcpu_quota, +hypervisor_period, hypervisor_quota Xen (credit scheduler): weight, cap @@ -1207,10 +1208,10 @@ values 0 and 1 are automatically converted to a minimal value of 2. B<Note>: The weight and cap parameters are defined only for the XEN_CREDIT scheduler and are now I<DEPRECATED>. -B<Note>: The vcpu_period parameter has a valid value range of 1000-1000000 or -0, and the vcpu_quota parameter has a valid value range of -1000-18446744073709551 or less than 0. The value 0 for either parameter is -the same as not specifying that parameter. +B<Note>: The vcpu_period/hypervisor_period parameters have a valid value range +of 1000-1000000 or 0, and the vcpu_quota/hypervisor_quota parameters have a +valid value range of 1000-18446744073709551 or less than 0. The value 0 for +either parameter is the same as not specifying that parameter. =item B<screenshot> I<domain-id> [I<imagefilepath>] [I<--screen> B<screenID>] -- 1.7.4.4
participants (1)
-
Hu Tao