[libvirt] [PATCH 0/6] support to set cpu bandwidth for hypervisor threads

older
Re: [libvirt] libvirt chowning my...

Wen Congyang

25 Apr 2012 25 Apr '12

11:40 a.m.

Currently, we only can set cpu bandwidth for vcpu. If the hypervisor threads consume too much cpu time, it may affect the vcpu. This patchset allows the user to control the cpu bandwidth for hypervisor threads. It does not change the behavior if the cpu bandwidth for hypervisor is unlimited. Wen Congyang (6): Introduce the function virCgroupForHypervisor introduce the function virCgroupMoveTask() create a new cgroup and move all hypervisor threads to the new cgroup Update XML Schema for new entries qemu: Implement hypervisor's period and quota tunable XML configuration and parsing qemu: Implement hypervisor_period and hypervisor_quota's modification docs/schemas/domaincommon.rng | 10 ++ include/libvirt/libvirt.h.in | 16 +++ src/conf/domain_conf.c | 25 +++++- src/conf/domain_conf.h | 2 + src/libvirt_private.syms | 1 + src/qemu/qemu_cgroup.c | 94 +++++++++++++++-- src/qemu/qemu_cgroup.h | 2 + src/qemu/qemu_driver.c | 220 +++++++++++++++++++++++++++++++++++----- src/qemu/qemu_process.c | 4 + src/util/cgroup.c | 97 ++++++++++++++++++ src/util/cgroup.h | 6 + 11 files changed, 436 insertions(+), 41 deletions(-)

Show replies by date

Wen Congyang

25 Apr 25 Apr

11:44 a.m.

New subject: [libvirt] [PATCH 1/6] Introduce the function virCgroupForHypervisor

Introduce the function virCgroupForHypervisor() to create sub directory for hypervisor thread(include I/O thread, vhost-net thread) --- src/libvirt_private.syms | 1 + src/util/cgroup.c | 42 ++++++++++++++++++++++++++++++++++++++++++ src/util/cgroup.h | 4 ++++ 3 files changed, 47 insertions(+), 0 deletions(-) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 8d29e75..a11b29c 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -66,6 +66,7 @@ virCgroupDenyAllDevices; virCgroupDenyDevicePath; virCgroupForDomain; virCgroupForDriver; +virCgroupForHypervisor; virCgroupForVcpu; virCgroupFree; virCgroupGetBlkioWeight; diff --git a/src/util/cgroup.c b/src/util/cgroup.c index ad49bc2..901a841 100644 --- a/src/util/cgroup.c +++ b/src/util/cgroup.c @@ -944,6 +944,48 @@ int virCgroupForVcpu(virCgroupPtr driver ATTRIBUTE_UNUSED, #endif /** + * virCgroupForHypervisor: + * + * @driver: group for the domain + * @group: Pointer to returned virCgroupPtr + * + * Returns 0 on success + */ +#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R +int virCgroupForHypervisor(virCgroupPtr driver, + virCgroupPtr *group, + int create) +{ + int rc; + char *path; + + if (driver == NULL) + return -EINVAL; + + if (virAsprintf(&path, "%s/hypervisor", driver->path) < 0) + return -ENOMEM; + + rc = virCgroupNew(path, group); + VIR_FREE(path); + + if (rc == 0) { + rc = virCgroupMakeGroup(driver, *group, create, VIR_CGROUP_VCPU); + if (rc != 0) + virCgroupFree(group); + } + + return rc; +} +#else +int virCgroupForHypervisor(virCgroupPtr driver ATTRIBUTE_UNUSED, + virCgroupPtr *group ATTRIBUTE_UNUSED, + int create ATTRIBUTE_UNUSED) +{ + return -ENXIO; +} + +#endif +/** * virCgroupSetBlkioWeight: * * @group: The cgroup to change io weight for diff --git a/src/util/cgroup.h b/src/util/cgroup.h index 8486c42..34fcbf5 100644 --- a/src/util/cgroup.h +++ b/src/util/cgroup.h @@ -45,6 +45,10 @@ int virCgroupForVcpu(virCgroupPtr driver, virCgroupPtr *group, int create); +int virCgroupForHypervisor(virCgroupPtr driver, + virCgroupPtr *group, + int create); + int virCgroupPathOfController(virCgroupPtr group, int controller, const char *key, -- 1.7.1

Wen Congyang

11:44 a.m.

New subject: [libvirt] [PATCH 2/6] introduce the function virCgroupMoveTask()

introduce a new API to move all tasks from a cgroup to another cgroup --- src/util/cgroup.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++ src/util/cgroup.h | 2 + 2 files changed, 57 insertions(+), 0 deletions(-) diff --git a/src/util/cgroup.c b/src/util/cgroup.c index 901a841..724cc6e 100644 --- a/src/util/cgroup.c +++ b/src/util/cgroup.c @@ -789,6 +789,61 @@ int virCgroupAddTask(virCgroupPtr group, pid_t pid) return rc; } +static int virCgrouAddTaskStr(virCgroupPtr group, const char *pidstr) +{ + unsigned long long value; + + if (virStrToLong_ull(pidstr, NULL, 10, &value) < 0) + return -EINVAL; + + return virCgroupAddTask(group, value); +} + +/** + * virCgroupMoveTask: + * + * @src_group: The source cgroup where all tasks are removed from + * @dest_group: The destination where all tasks are added to + * + * Returns: 0 on success + */ +int virCgroupMoveTask(virCgroupPtr src_group, virCgroupPtr dest_group) +{ + int rc = 0; + int i; + char *content, *value, *next; + + for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { + /* Skip over controllers not mounted */ + if (!src_group->controllers[i].mountPoint || + !dest_group->controllers[i].mountPoint) + continue; + + rc = virCgroupGetValueStr(src_group, i, "tasks", &content); + if (rc != 0) + break; + + value = content; + while((next = strchr(value, '\n')) != NULL) { + *next = '\0'; + if ((rc = virCgrouAddTaskStr(dest_group, value) < 0)) + goto cleanup; + value = next + 1; + } + if (*value != '\0') { + if ((rc = virCgrouAddTaskStr(dest_group, value) < 0)) + goto cleanup; + } + + VIR_FREE(content); + } + + return 0; + +cleanup: + virCgroupMoveTask(dest_group, src_group); + return rc; +} /** * virCgroupForDriver: diff --git a/src/util/cgroup.h b/src/util/cgroup.h index 34fcbf5..2419ef6 100644 --- a/src/util/cgroup.h +++ b/src/util/cgroup.h @@ -56,6 +56,8 @@ int virCgroupPathOfController(virCgroupPtr group, int virCgroupAddTask(virCgroupPtr group, pid_t pid); +int virCgroupMoveTask(virCgroupPtr src_group, virCgroupPtr dest_group); + int virCgroupSetBlkioWeight(virCgroupPtr group, unsigned int weight); int virCgroupGetBlkioWeight(virCgroupPtr group, unsigned int *weight); -- 1.7.1

Hu Tao

14 May 14 May

12:18 p.m.

New subject: [libvirt] [PATCH 2/6] introduce the function virCgroupMoveTask()

On Wed, Apr 25, 2012 at 05:44:52PM +0800, Wen Congyang wrote:

...

introduce a new API to move all tasks from a cgroup to another cgroup

--- src/util/cgroup.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++ src/util/cgroup.h | 2 + 2 files changed, 57 insertions(+), 0 deletions(-)

diff --git a/src/util/cgroup.c b/src/util/cgroup.c index 901a841..724cc6e 100644 --- a/src/util/cgroup.c +++ b/src/util/cgroup.c @@ -789,6 +789,61 @@ int virCgroupAddTask(virCgroupPtr group, pid_t pid) return rc; }

+static int virCgrouAddTaskStr(virCgroupPtr group, const char *pidstr) +{ + unsigned long long value; + + if (virStrToLong_ull(pidstr, NULL, 10, &value) < 0) + return -EINVAL; + + return virCgroupAddTask(group, value); +} + +/** + * virCgroupMoveTask: + * + * @src_group: The source cgroup where all tasks are removed from + * @dest_group: The destination where all tasks are added to + * + * Returns: 0 on success + */ +int virCgroupMoveTask(virCgroupPtr src_group, virCgroupPtr dest_group) +{ + int rc = 0; + int i; + char *content, *value, *next; + + for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { + /* Skip over controllers not mounted */ + if (!src_group->controllers[i].mountPoint || + !dest_group->controllers[i].mountPoint) + continue; + + rc = virCgroupGetValueStr(src_group, i, "tasks", &content); + if (rc != 0) + break; + + value = content; + while((next = strchr(value, '\n')) != NULL) { + *next = '\0'; + if ((rc = virCgrouAddTaskStr(dest_group, value) < 0)) + goto cleanup; + value = next + 1; + } + if (*value != '\0') { + if ((rc = virCgrouAddTaskStr(dest_group, value) < 0)) + goto cleanup; + } + + VIR_FREE(content); + } + + return 0; + +cleanup: + virCgroupMoveTask(dest_group, src_group);

If we get here, the tasks that are originally in dest_group can be mistakenly moved to src_group. And, note that this is a recursive call, there is the possibility it ends up with a endless call to virCgroupMoveTask if all fail because of the same error. -- Thanks, Hu Tao

Wen Congyang

25 Apr 25 Apr

11:45 a.m.

New subject: [libvirt] [PATCH 3/6] create a new cgroup and move all hypervisor threads to the new cgroup

create a new cgroup and move all hypervisor threads to the new cgroup. And then we can do the other things: 1. limit only vcpu usage rather than the whole qemu 2. limit for hypervisor threads(include vhost-net threads) --- src/qemu/qemu_cgroup.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_cgroup.h | 2 + src/qemu/qemu_process.c | 4 +++ 3 files changed, 63 insertions(+), 0 deletions(-) diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index a07b6cd..727c0d4 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -564,6 +564,63 @@ cleanup: return -1; } +int qemuSetupCgroupForHypervisor(struct qemud_driver *driver, + virDomainObjPtr vm) +{ + virCgroupPtr cgroup = NULL; + virCgroupPtr cgroup_hypervisor = NULL; + qemuDomainObjPrivatePtr priv = vm->privateData; + int rc; + + if (driver->cgroup == NULL) + return 0; /* Not supported, so claim success */ + + rc = virCgroupForDomain(driver->cgroup, vm->def->name, &cgroup, 0); + if (rc != 0) { + virReportSystemError(-rc, + _("Unable to find cgroup for %s"), + vm->def->name); + goto cleanup; + } + + if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) { + /* If we does not know VCPU<->PID mapping or all vcpu runs in the same + * thread, we cannot control each vcpu. + */ + virCgroupFree(&cgroup); + return 0; + } + + rc = virCgroupForHypervisor(cgroup, &cgroup_hypervisor, 1); + if (rc < 0) { + virReportSystemError(-rc, + _("Unable to create hypervisor cgroup for %s"), + vm->def->name); + goto cleanup; + } + + rc = virCgroupMoveTask(cgroup, cgroup_hypervisor); + if (rc < 0) { + virReportSystemError(-rc, + _("Unable to move taks from domain cgroup to " + "hypervisor cgroup for %s"), + vm->def->name); + goto cleanup; + } + + virCgroupFree(&cgroup_hypervisor); + virCgroupFree(&cgroup); + return 0; + +cleanup: + virCgroupFree(&cgroup_hypervisor); + if (cgroup) { + virCgroupRemove(cgroup); + virCgroupFree(&cgroup); + } + + return -1; +} int qemuRemoveCgroup(struct qemud_driver *driver, virDomainObjPtr vm, diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h index 17164d9..92eff68 100644 --- a/src/qemu/qemu_cgroup.h +++ b/src/qemu/qemu_cgroup.h @@ -53,6 +53,8 @@ int qemuSetupCgroupVcpuBW(virCgroupPtr cgroup, unsigned long long period, long long quota); int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm); +int qemuSetupCgroupForHypervisor(struct qemud_driver *driver, + virDomainObjPtr vm); int qemuRemoveCgroup(struct qemud_driver *driver, virDomainObjPtr vm, int quiet); diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 566a17e..b605eee 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -3668,6 +3668,10 @@ int qemuProcessStart(virConnectPtr conn, if (qemuSetupCgroupForVcpu(driver, vm) < 0) goto cleanup; + VIR_DEBUG("Setting cgroup for hypervisor(if required)"); + if (qemuSetupCgroupForHypervisor(driver, vm) < 0) + goto cleanup; + VIR_DEBUG("Setting VCPU affinities"); if (qemuProcessSetVcpuAffinites(conn, vm) < 0) goto cleanup; -- 1.7.1

Wen Congyang

11:45 a.m.

New subject: [libvirt] [PATCH 4/6] Update XML Schema for new entries

--- docs/schemas/domaincommon.rng | 10 ++++++++++ 1 files changed, 10 insertions(+), 0 deletions(-) diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index f116710..8165c5b 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -544,6 +544,16 @@ <ref name="cpuquota"/> </element> </optional> + <optional> + <element name="hypervisor_period"> + <ref name="cpuperiod"/> + </element> + </optional> + <optional> + <element name="hypervisor_quota"> + <ref name="cpuquota"/> + </element> + </optional> <zeroOrMore> <element name="vcpupin"> <attribute name="vcpu"> -- 1.7.1

Wen Congyang

11:46 a.m.

New subject: [libvirt] [PATCH 5/6] qemu: Implement hypervisor's period and quota tunable XML configuration and parsing

set hypervisor's period and quota when the vm starts. It will affect to set vm's period and quota: donot set vm's period and quota if we limit hypevisor thread's bandwidth(hypervisor_quota > 0). --- src/conf/domain_conf.c | 25 ++++++++++++++- src/conf/domain_conf.h | 2 + src/qemu/qemu_cgroup.c | 37 ++++++++++++++++------- src/qemu/qemu_driver.c | 75 ++++++++++++++++++++++++++++++------------------ 4 files changed, 98 insertions(+), 41 deletions(-) diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 5ab052a..28a6436 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -7931,6 +7931,14 @@ static virDomainDefPtr virDomainDefParseXML(virCapsPtr caps, &def->cputune.quota) < 0) def->cputune.quota = 0; + if (virXPathULongLong("string(./cputune/hypervisor_period[1])", ctxt, + &def->cputune.hypervisor_period) < 0) + def->cputune.hypervisor_period = 0; + + if (virXPathLongLong("string(./cputune/hypervisor_quota[1])", ctxt, + &def->cputune.hypervisor_quota) < 0) + def->cputune.hypervisor_quota = 0; + if ((n = virXPathNodeSet("./cputune/vcpupin", ctxt, &nodes)) < 0) { goto error; } @@ -12406,7 +12414,8 @@ virDomainDefFormatInternal(virDomainDefPtr def, virBufferAsprintf(buf, ">%u</vcpu>\n", def->maxvcpus); if (def->cputune.shares || def->cputune.vcpupin || - def->cputune.period || def->cputune.quota) + def->cputune.period || def->cputune.quota || + def->cputune.hypervisor_period || def->cputune.hypervisor_quota) virBufferAddLit(buf, " <cputune>\n"); if (def->cputune.shares) @@ -12418,6 +12427,17 @@ virDomainDefFormatInternal(virDomainDefPtr def, if (def->cputune.quota) virBufferAsprintf(buf, " <quota>%lld</quota>\n", def->cputune.quota); + + if (def->cputune.hypervisor_period) + virBufferAsprintf(buf, " <hypervisor_period>%llu" + "</hypervisor_period>\n", + def->cputune.hypervisor_period); + + if (def->cputune.hypervisor_period) + virBufferAsprintf(buf, " <hypervisor_quota>%lld" + "</hypervisor_quota>\n", + def->cputune.hypervisor_quota); + if (def->cputune.vcpupin) { for (i = 0; i < def->cputune.nvcpupin; i++) { virBufferAsprintf(buf, " <vcpupin vcpu='%u' ", @@ -12439,7 +12459,8 @@ virDomainDefFormatInternal(virDomainDefPtr def, } if (def->cputune.shares || def->cputune.vcpupin || - def->cputune.period || def->cputune.quota) + def->cputune.period || def->cputune.quota || + def->cputune.hypervisor_period || def->cputune.hypervisor_quota) virBufferAddLit(buf, " </cputune>\n"); if (def->numatune.memory.nodemask) { diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 0eed60e..2a37e26 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -1558,6 +1558,8 @@ struct _virDomainDef { unsigned long shares; unsigned long long period; long long quota; + unsigned long long hypervisor_period; + long long hypervisor_quota; int nvcpupin; virDomainVcpuPinDefPtr *vcpupin; } cputune; diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 727c0d4..7c6ef33 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -493,17 +493,23 @@ int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm) goto cleanup; } - /* Set cpu bandwidth for the vm */ - if (period || quota) { - if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { - /* Ensure that we can multiply by vcpus without overflowing. */ - if (quota > LLONG_MAX / vm->def->vcpus) { - virReportSystemError(EINVAL, - _("%s"), - "Unable to set cpu bandwidth quota"); - goto cleanup; - } + /* + * Set cpu bandwidth for the vm if any of the following is true: + * 1. we donot know VCPU<->PID mapping or all vcpus run in the same thread + * 2. the hypervisor threads are not limited(quota <= 0) + */ + if ((period || quota) && + qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { + /* Ensure that we can multiply by vcpus without overflowing. */ + if (quota > LLONG_MAX / vm->def->vcpus) { + virReportSystemError(EINVAL, + _("%s"), + "Unable to set cpu bandwidth quota"); + goto cleanup; + } + if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid || + vm->def->cputune.hypervisor_quota <= 0) { if (quota > 0) vm_quota = quota * vm->def->vcpus; else @@ -514,7 +520,7 @@ int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm) } if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) { - /* If we does not know VCPU<->PID mapping or all vcpu runs in the same + /* If we donot know VCPU<->PID mapping or all vcpus run in the same * thread, we cannot control each vcpu. */ virCgroupFree(&cgroup); @@ -570,6 +576,8 @@ int qemuSetupCgroupForHypervisor(struct qemud_driver *driver, virCgroupPtr cgroup = NULL; virCgroupPtr cgroup_hypervisor = NULL; qemuDomainObjPrivatePtr priv = vm->privateData; + unsigned long long period = vm->def->cputune.hypervisor_period; + long long quota = vm->def->cputune.hypervisor_quota; int rc; if (driver->cgroup == NULL) @@ -608,6 +616,13 @@ int qemuSetupCgroupForHypervisor(struct qemud_driver *driver, goto cleanup; } + if (period || quota) { + if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { + if (qemuSetupCgroupVcpuBW(cgroup_hypervisor, period, quota) < 0) + goto cleanup; + } + } + virCgroupFree(&cgroup_hypervisor); virCgroupFree(&cgroup); return 0; diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index c3555ca..2e40aee 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -7007,6 +7007,7 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup, int rc; long long vm_quota = 0; long long old_quota = 0; + long long hypervisor_quota = vm->def->cputune.hypervisor_quota; unsigned long long old_period = 0; if (period == 0 && quota == 0) @@ -7039,6 +7040,16 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup, goto cleanup; } + /* If we donot know VCPU<->PID mapping or all vcpu runs in the same + * thread, we cannot control each vcpu. So we only modify cpu bandwidth + * for the vm. + */ + if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) { + if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0) + goto cleanup; + return 0; + } + /* * If quota will be changed to a small value, we should modify vcpu's quota * first. Otherwise, we should modify vm's quota first. @@ -7048,8 +7059,12 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup, * * If both quota and period will be changed to a big/small value, we cannot * modify period and quota together. + * + * If the hypervisor threads are limited, we can update period for vm first, + * and then we can modify period and quota for the vcpu together even if + * they will be changed to a big/small value. */ - if ((quota != 0) && (period != 0)) { + if (hypervisor_quota <= 0 && quota != 0 && period != 0) { if (((quota > old_quota) && (period > old_period)) || ((quota < old_quota) && (period < old_period))) { /* modify period */ @@ -7063,40 +7078,44 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup, } } - if (((vm_quota != 0) && (vm_quota > old_quota)) || - ((period != 0) && (period < old_period))) - /* Set cpu bandwidth for the vm */ - if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0) - goto cleanup; - - /* If we does not know VCPU<->PID mapping or all vcpu runs in the same - * thread, we cannot control each vcpu. So we only modify cpu bandwidth - * when each vcpu has a separated thread. + /* + * If the hypervisor threads are not limited, set cpu bandwidth for the + * vm. */ - if (priv->nvcpupids != 0 && priv->vcpupids[0] != vm->pid) { - for (i = 0; i < priv->nvcpupids; i++) { - rc = virCgroupForVcpu(cgroup, i, &cgroup_vcpu, 0); - if (rc < 0) { - virReportSystemError(-rc, - _("Unable to find vcpu cgroup for %s(vcpu:" - " %d)"), - vm->def->name, i); - goto cleanup; - } - - if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0) + if (vm->def->cputune.hypervisor_quota <= 0) { + if (((vm_quota != 0) && (vm_quota > old_quota)) || + ((period != 0) && (period < old_period))) + if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0) goto cleanup; + } - virCgroupFree(&cgroup_vcpu); + /* each vcpu has a separated thread, so we modify cpu bandwidth for vcpu. */ + for (i = 0; i < priv->nvcpupids; i++) { + rc = virCgroupForVcpu(cgroup, i, &cgroup_vcpu, 0); + if (rc < 0) { + virReportSystemError(-rc, + _("Unable to find vcpu cgroup for %s(vcpu:" + " %d)"), + vm->def->name, i); + goto cleanup; } - } - if (((vm_quota != 0) && (vm_quota <= old_quota)) || - ((period != 0) && (period >= old_period))) - /* Set cpu bandwidth for the vm */ - if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0) + if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0) goto cleanup; + virCgroupFree(&cgroup_vcpu); + } + + /* + * If the hypervisor threads are not limited, set cpu bandwidth for the vm. + */ + if (hypervisor_quota <= 0) { + if (((vm_quota != 0) && (vm_quota <= old_quota)) || + ((period != 0) && (period >= old_period))) + if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0) + goto cleanup; + } + return 0; cleanup: -- 1.7.1

Hu Tao

17 May 17 May

5:13 a.m.

New subject: [libvirt] [PATCH 5/6] qemu: Implement hypervisor's period and quota tunable XML configuration and parsing

On Wed, Apr 25, 2012 at 05:46:06PM +0800, Wen Congyang wrote:

...

set hypervisor's period and quota when the vm starts. It will affect to set vm's period and quota: donot set vm's period and quota if we limit hypevisor thread's bandwidth(hypervisor_quota > 0). --- src/conf/domain_conf.c | 25 ++++++++++++++- src/conf/domain_conf.h | 2 + src/qemu/qemu_cgroup.c | 37 ++++++++++++++++------- src/qemu/qemu_driver.c | 75 ++++++++++++++++++++++++++++++------------------ 4 files changed, 98 insertions(+), 41 deletions(-)

diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 5ab052a..28a6436 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -7931,6 +7931,14 @@ static virDomainDefPtr virDomainDefParseXML(virCapsPtr caps, &def->cputune.quota) < 0) def->cputune.quota = 0;

+ if (virXPathULongLong("string(./cputune/hypervisor_period[1])", ctxt, + &def->cputune.hypervisor_period) < 0) + def->cputune.hypervisor_period = 0; + + if (virXPathLongLong("string(./cputune/hypervisor_quota[1])", ctxt, + &def->cputune.hypervisor_quota) < 0) + def->cputune.hypervisor_quota = 0; + if ((n = virXPathNodeSet("./cputune/vcpupin", ctxt, &nodes)) < 0) { goto error; } @@ -12406,7 +12414,8 @@ virDomainDefFormatInternal(virDomainDefPtr def, virBufferAsprintf(buf, ">%u</vcpu>\n", def->maxvcpus);

if (def->cputune.shares || def->cputune.vcpupin || - def->cputune.period || def->cputune.quota) + def->cputune.period || def->cputune.quota || + def->cputune.hypervisor_period || def->cputune.hypervisor_quota) virBufferAddLit(buf, " <cputune>\n");

if (def->cputune.shares) @@ -12418,6 +12427,17 @@ virDomainDefFormatInternal(virDomainDefPtr def, if (def->cputune.quota) virBufferAsprintf(buf, " <quota>%lld</quota>\n", def->cputune.quota); + + if (def->cputune.hypervisor_period) + virBufferAsprintf(buf, " <hypervisor_period>%llu" + "</hypervisor_period>\n", + def->cputune.hypervisor_period); + + if (def->cputune.hypervisor_period) + virBufferAsprintf(buf, " <hypervisor_quota>%lld" + "</hypervisor_quota>\n", + def->cputune.hypervisor_quota); + if (def->cputune.vcpupin) { for (i = 0; i < def->cputune.nvcpupin; i++) { virBufferAsprintf(buf, " <vcpupin vcpu='%u' ", @@ -12439,7 +12459,8 @@ virDomainDefFormatInternal(virDomainDefPtr def, }

if (def->cputune.shares || def->cputune.vcpupin || - def->cputune.period || def->cputune.quota) + def->cputune.period || def->cputune.quota || + def->cputune.hypervisor_period || def->cputune.hypervisor_quota) virBufferAddLit(buf, " </cputune>\n");

if (def->numatune.memory.nodemask) { diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 0eed60e..2a37e26 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -1558,6 +1558,8 @@ struct _virDomainDef { unsigned long shares; unsigned long long period; long long quota; + unsigned long long hypervisor_period; + long long hypervisor_quota; int nvcpupin; virDomainVcpuPinDefPtr *vcpupin; } cputune; diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 727c0d4..7c6ef33 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -493,17 +493,23 @@ int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm) goto cleanup; }

Check if cgroup CPU is active here: if (!qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { virReportSystemError(EINVAL, _("%s"), "Cgroup CPU is not active."); goto cleanup; } and remove all following checks in this function.

...

- /* Set cpu bandwidth for the vm */ - if (period || quota) { - if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { - /* Ensure that we can multiply by vcpus without overflowing. */ - if (quota > LLONG_MAX / vm->def->vcpus) { - virReportSystemError(EINVAL, - _("%s"), - "Unable to set cpu bandwidth quota"); - goto cleanup; - } + /* + * Set cpu bandwidth for the vm if any of the following is true: + * 1. we donot know VCPU<->PID mapping or all vcpus run in the same thread + * 2. the hypervisor threads are not limited(quota <= 0) + */ + if ((period || quota) && + qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { + /* Ensure that we can multiply by vcpus without overflowing. */ + if (quota > LLONG_MAX / vm->def->vcpus) { + virReportSystemError(EINVAL, + _("%s"), + "Unable to set cpu bandwidth quota"); + goto cleanup; + }

+ if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid || + vm->def->cputune.hypervisor_quota <= 0) { if (quota > 0) vm_quota = quota * vm->def->vcpus; else @@ -514,7 +520,7 @@ int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm) }

if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) { - /* If we does not know VCPU<->PID mapping or all vcpu runs in the same + /* If we donot know VCPU<->PID mapping or all vcpus run in the same * thread, we cannot control each vcpu. */ virCgroupFree(&cgroup); @@ -570,6 +576,8 @@ int qemuSetupCgroupForHypervisor(struct qemud_driver *driver, virCgroupPtr cgroup = NULL; virCgroupPtr cgroup_hypervisor = NULL; qemuDomainObjPrivatePtr priv = vm->privateData; + unsigned long long period = vm->def->cputune.hypervisor_period; + long long quota = vm->def->cputune.hypervisor_quota; int rc;

if (driver->cgroup == NULL) @@ -608,6 +616,13 @@ int qemuSetupCgroupForHypervisor(struct qemud_driver *driver, goto cleanup; }

+ if (period || quota) { + if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { + if (qemuSetupCgroupVcpuBW(cgroup_hypervisor, period, quota) < 0)

Actually, qemuSetupCgroupVcpuBW just changes the cgroup's settings(period, quota), it doesn't care about what the cgroup is associated with. So can we change the name to qemuSetupCgroupBW?

...

+ goto cleanup; + } + } + virCgroupFree(&cgroup_hypervisor); virCgroupFree(&cgroup); return 0; diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index c3555ca..2e40aee 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -7007,6 +7007,7 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup, int rc; long long vm_quota = 0; long long old_quota = 0; + long long hypervisor_quota = vm->def->cputune.hypervisor_quota; unsigned long long old_period = 0;

if (period == 0 && quota == 0) @@ -7039,6 +7040,16 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup, goto cleanup; }

+ /* If we donot know VCPU<->PID mapping or all vcpu runs in the same + * thread, we cannot control each vcpu. So we only modify cpu bandwidth + * for the vm. + */ + if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) { + if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0) + goto cleanup; + return 0; + } + /* * If quota will be changed to a small value, we should modify vcpu's quota * first. Otherwise, we should modify vm's quota first. @@ -7048,8 +7059,12 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup, * * If both quota and period will be changed to a big/small value, we cannot * modify period and quota together. + * + * If the hypervisor threads are limited, we can update period for vm first, + * and then we can modify period and quota for the vcpu together even if + * they will be changed to a big/small value. */ - if ((quota != 0) && (period != 0)) { + if (hypervisor_quota <= 0 && quota != 0 && period != 0) { if (((quota > old_quota) && (period > old_period)) || ((quota < old_quota) && (period < old_period))) { /* modify period */ @@ -7063,40 +7078,44 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup, } }

- if (((vm_quota != 0) && (vm_quota > old_quota)) || - ((period != 0) && (period < old_period))) - /* Set cpu bandwidth for the vm */ - if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0) - goto cleanup; - - /* If we does not know VCPU<->PID mapping or all vcpu runs in the same - * thread, we cannot control each vcpu. So we only modify cpu bandwidth - * when each vcpu has a separated thread. + /* + * If the hypervisor threads are not limited, set cpu bandwidth for the + * vm. */ - if (priv->nvcpupids != 0 && priv->vcpupids[0] != vm->pid) { - for (i = 0; i < priv->nvcpupids; i++) { - rc = virCgroupForVcpu(cgroup, i, &cgroup_vcpu, 0); - if (rc < 0) { - virReportSystemError(-rc, - _("Unable to find vcpu cgroup for %s(vcpu:" - " %d)"), - vm->def->name, i); - goto cleanup; - } - - if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0) + if (vm->def->cputune.hypervisor_quota <= 0) { + if (((vm_quota != 0) && (vm_quota > old_quota)) || + ((period != 0) && (period < old_period))) + if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0) goto cleanup; + }

- virCgroupFree(&cgroup_vcpu); + /* each vcpu has a separated thread, so we modify cpu bandwidth for vcpu. */ + for (i = 0; i < priv->nvcpupids; i++) { + rc = virCgroupForVcpu(cgroup, i, &cgroup_vcpu, 0); + if (rc < 0) { + virReportSystemError(-rc, + _("Unable to find vcpu cgroup for %s(vcpu:" + " %d)"), + vm->def->name, i); + goto cleanup; } - }

- if (((vm_quota != 0) && (vm_quota <= old_quota)) || - ((period != 0) && (period >= old_period))) - /* Set cpu bandwidth for the vm */ - if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0) + if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0) goto cleanup;

+ virCgroupFree(&cgroup_vcpu); + } + + /* + * If the hypervisor threads are not limited, set cpu bandwidth for the vm. + */

This makes me confused. Why do we have to limit the whole vm when the hypervisor threads are not limited? Users may want to limit the vcpus only. We have 3 situations now: 1. limit the vcpus only. 2. limit the hypervisor but not vcpus. 3. limit the whole vm. I'm not thinking this patch is capable with all of these 3 situations.

...

+ if (hypervisor_quota <= 0) { + if (((vm_quota != 0) && (vm_quota <= old_quota)) || + ((period != 0) && (period >= old_period))) + if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0) + goto cleanup; + } + return 0;

cleanup: -- 1.7.1

-- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list

-- Thanks, Hu Tao

Wen Congyang

5:54 a.m.

New subject: [libvirt] [PATCH 5/6] qemu: Implement hypervisor's period and quota tunable XML configuration and parsing

At 05/17/2012 11:13 AM, Hu Tao Wrote:

...

On Wed, Apr 25, 2012 at 05:46:06PM +0800, Wen Congyang wrote:

...
set hypervisor's period and quota when the vm starts. It will affect to set vm's period and quota: donot set vm's period and quota if we limit hypevisor thread's bandwidth(hypervisor_quota > 0). --- src/conf/domain_conf.c | 25 ++++++++++++++- src/conf/domain_conf.h | 2 + src/qemu/qemu_cgroup.c | 37 ++++++++++++++++------- src/qemu/qemu_driver.c | 75 ++++++++++++++++++++++++++++++------------------ 4 files changed, 98 insertions(+), 41 deletions(-)

diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 5ab052a..28a6436 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -7931,6 +7931,14 @@ static virDomainDefPtr virDomainDefParseXML(virCapsPtr caps, &def->cputune.quota) < 0) def->cputune.quota = 0;

+ if (virXPathULongLong("string(./cputune/hypervisor_period[1])", ctxt, + &def->cputune.hypervisor_period) < 0) + def->cputune.hypervisor_period = 0; + + if (virXPathLongLong("string(./cputune/hypervisor_quota[1])", ctxt, + &def->cputune.hypervisor_quota) < 0) + def->cputune.hypervisor_quota = 0; + if ((n = virXPathNodeSet("./cputune/vcpupin", ctxt, &nodes)) < 0) { goto error; } @@ -12406,7 +12414,8 @@ virDomainDefFormatInternal(virDomainDefPtr def, virBufferAsprintf(buf, ">%u</vcpu>\n", def->maxvcpus);

if (def->cputune.shares || def->cputune.vcpupin || - def->cputune.period || def->cputune.quota) + def->cputune.period || def->cputune.quota || + def->cputune.hypervisor_period || def->cputune.hypervisor_quota) virBufferAddLit(buf, " <cputune>\n");

if (def->cputune.shares) @@ -12418,6 +12427,17 @@ virDomainDefFormatInternal(virDomainDefPtr def, if (def->cputune.quota) virBufferAsprintf(buf, " <quota>%lld</quota>\n", def->cputune.quota); + + if (def->cputune.hypervisor_period) + virBufferAsprintf(buf, " <hypervisor_period>%llu" + "</hypervisor_period>\n", + def->cputune.hypervisor_period); + + if (def->cputune.hypervisor_period) + virBufferAsprintf(buf, " <hypervisor_quota>%lld" + "</hypervisor_quota>\n", + def->cputune.hypervisor_quota); + if (def->cputune.vcpupin) { for (i = 0; i < def->cputune.nvcpupin; i++) { virBufferAsprintf(buf, " <vcpupin vcpu='%u' ", @@ -12439,7 +12459,8 @@ virDomainDefFormatInternal(virDomainDefPtr def, }

if (def->cputune.shares || def->cputune.vcpupin || - def->cputune.period || def->cputune.quota) + def->cputune.period || def->cputune.quota || + def->cputune.hypervisor_period || def->cputune.hypervisor_quota) virBufferAddLit(buf, " </cputune>\n");

if (def->numatune.memory.nodemask) { diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 0eed60e..2a37e26 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -1558,6 +1558,8 @@ struct _virDomainDef { unsigned long shares; unsigned long long period; long long quota; + unsigned long long hypervisor_period; + long long hypervisor_quota; int nvcpupin; virDomainVcpuPinDefPtr *vcpupin; } cputune; diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 727c0d4..7c6ef33 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -493,17 +493,23 @@ int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm) goto cleanup; }

Check if cgroup CPU is active here:

if (!qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { virReportSystemError(EINVAL, _("%s"), "Cgroup CPU is not active."); goto cleanup; }

and remove all following checks in this function.

...
- /* Set cpu bandwidth for the vm */ - if (period || quota) { - if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { - /* Ensure that we can multiply by vcpus without overflowing. */ - if (quota > LLONG_MAX / vm->def->vcpus) { - virReportSystemError(EINVAL, - _("%s"), - "Unable to set cpu bandwidth quota"); - goto cleanup; - } + /* + * Set cpu bandwidth for the vm if any of the following is true: + * 1. we donot know VCPU<->PID mapping or all vcpus run in the same thread + * 2. the hypervisor threads are not limited(quota <= 0) + */ + if ((period || quota) && + qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { + /* Ensure that we can multiply by vcpus without overflowing. */ + if (quota > LLONG_MAX / vm->def->vcpus) { + virReportSystemError(EINVAL, + _("%s"), + "Unable to set cpu bandwidth quota"); + goto cleanup; + }

+ if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid || + vm->def->cputune.hypervisor_quota <= 0) { if (quota > 0) vm_quota = quota * vm->def->vcpus; else @@ -514,7 +520,7 @@ int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm) }

if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) { - /* If we does not know VCPU<->PID mapping or all vcpu runs in the same + /* If we donot know VCPU<->PID mapping or all vcpus run in the same * thread, we cannot control each vcpu. */ virCgroupFree(&cgroup); @@ -570,6 +576,8 @@ int qemuSetupCgroupForHypervisor(struct qemud_driver *driver, virCgroupPtr cgroup = NULL; virCgroupPtr cgroup_hypervisor = NULL; qemuDomainObjPrivatePtr priv = vm->privateData; + unsigned long long period = vm->def->cputune.hypervisor_period; + long long quota = vm->def->cputune.hypervisor_quota; int rc;

if (driver->cgroup == NULL) @@ -608,6 +616,13 @@ int qemuSetupCgroupForHypervisor(struct qemud_driver *driver, goto cleanup; }

+ if (period || quota) { + if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { + if (qemuSetupCgroupVcpuBW(cgroup_hypervisor, period, quota) < 0)

Actually, qemuSetupCgroupVcpuBW just changes the cgroup's settings(period, quota), it doesn't care about what the cgroup is associated with. So can we change the name to qemuSetupCgroupBW?

...
+ goto cleanup; + } + } + virCgroupFree(&cgroup_hypervisor); virCgroupFree(&cgroup); return 0; diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index c3555ca..2e40aee 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -7007,6 +7007,7 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup, int rc; long long vm_quota = 0; long long old_quota = 0; + long long hypervisor_quota = vm->def->cputune.hypervisor_quota; unsigned long long old_period = 0;

if (period == 0 && quota == 0) @@ -7039,6 +7040,16 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup, goto cleanup; }

+ /* If we donot know VCPU<->PID mapping or all vcpu runs in the same + * thread, we cannot control each vcpu. So we only modify cpu bandwidth + * for the vm. + */ + if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) { + if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0) + goto cleanup; + return 0; + } + /* * If quota will be changed to a small value, we should modify vcpu's quota * first. Otherwise, we should modify vm's quota first. @@ -7048,8 +7059,12 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup, * * If both quota and period will be changed to a big/small value, we cannot * modify period and quota together. + * + * If the hypervisor threads are limited, we can update period for vm first, + * and then we can modify period and quota for the vcpu together even if + * they will be changed to a big/small value. */ - if ((quota != 0) && (period != 0)) { + if (hypervisor_quota <= 0 && quota != 0 && period != 0) { if (((quota > old_quota) && (period > old_period)) || ((quota < old_quota) && (period < old_period))) { /* modify period */ @@ -7063,40 +7078,44 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup, } }

- if (((vm_quota != 0) && (vm_quota > old_quota)) || - ((period != 0) && (period < old_period))) - /* Set cpu bandwidth for the vm */ - if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0) - goto cleanup; - - /* If we does not know VCPU<->PID mapping or all vcpu runs in the same - * thread, we cannot control each vcpu. So we only modify cpu bandwidth - * when each vcpu has a separated thread. + /* + * If the hypervisor threads are not limited, set cpu bandwidth for the + * vm. */ - if (priv->nvcpupids != 0 && priv->vcpupids[0] != vm->pid) { - for (i = 0; i < priv->nvcpupids; i++) { - rc = virCgroupForVcpu(cgroup, i, &cgroup_vcpu, 0); - if (rc < 0) { - virReportSystemError(-rc, - _("Unable to find vcpu cgroup for %s(vcpu:" - " %d)"), - vm->def->name, i); - goto cleanup; - } - - if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0) + if (vm->def->cputune.hypervisor_quota <= 0) { + if (((vm_quota != 0) && (vm_quota > old_quota)) || + ((period != 0) && (period < old_period))) + if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0) goto cleanup; + }

- virCgroupFree(&cgroup_vcpu); + /* each vcpu has a separated thread, so we modify cpu bandwidth for vcpu. */ + for (i = 0; i < priv->nvcpupids; i++) { + rc = virCgroupForVcpu(cgroup, i, &cgroup_vcpu, 0); + if (rc < 0) { + virReportSystemError(-rc, + _("Unable to find vcpu cgroup for %s(vcpu:" + " %d)"), + vm->def->name, i); + goto cleanup; } - }

- if (((vm_quota != 0) && (vm_quota <= old_quota)) || - ((period != 0) && (period >= old_period))) - /* Set cpu bandwidth for the vm */ - if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0) + if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0) goto cleanup;

+ virCgroupFree(&cgroup_vcpu); + } + + /* + * If the hypervisor threads are not limited, set cpu bandwidth for the vm. + */

This makes me confused. Why do we have to limit the whole vm when the hypervisor threads are not limited? Users may want to limit the vcpus only.

We have 3 situations now:

1. limit the vcpus only. 2. limit the hypervisor but not vcpus. 3. limit the whole vm.

Before this patch, we limit the whole vm when we limit the vcpus because we donot want to the hypervisor threads eat too much cpu time. So, if we donot limit the hypervisor, the behavior shoule not be changed. So we should limit the whole vm. If the hypervisor threads are limited, there is no need to limit the whole vm. Thanks Wen Congyang

...

I'm not thinking this patch is capable with all of these 3 situations.

...
+ if (hypervisor_quota <= 0) { + if (((vm_quota != 0) && (vm_quota <= old_quota)) || + ((period != 0) && (period >= old_period))) + if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0) + goto cleanup; + } + return 0;

cleanup: -- 1.7.1

-- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list

KAMEZAWA Hiroyuki

6:46 a.m.

New subject: [libvirt] [PATCH 5/6] qemu: Implement hypervisor's period and quota tunable XML configuration and parsing

(2012/05/17 12:54), Wen Congyang wrote:

...

At 05/17/2012 11:13 AM, Hu Tao Wrote:

...
On Wed, Apr 25, 2012 at 05:46:06PM +0800, Wen Congyang wrote:

...
set hypervisor's period and quota when the vm starts. It will affect to set vm's period and quota: donot set vm's period and quota if we limit hypevisor thread's bandwidth(hypervisor_quota > 0). --- src/conf/domain_conf.c | 25 ++++++++++++++- src/conf/domain_conf.h | 2 + src/qemu/qemu_cgroup.c | 37 ++++++++++++++++------- src/qemu/qemu_driver.c | 75 ++++++++++++++++++++++++++++++------------------ 4 files changed, 98 insertions(+), 41 deletions(-)

diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 5ab052a..28a6436 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -7931,6 +7931,14 @@ static virDomainDefPtr virDomainDefParseXML(virCapsPtr caps, &def->cputune.quota) < 0) def->cputune.quota = 0;

+ if (virXPathULongLong("string(./cputune/hypervisor_period[1])", ctxt, + &def->cputune.hypervisor_period) < 0) + def->cputune.hypervisor_period = 0; + + if (virXPathLongLong("string(./cputune/hypervisor_quota[1])", ctxt, + &def->cputune.hypervisor_quota) < 0) + def->cputune.hypervisor_quota = 0; + if ((n = virXPathNodeSet("./cputune/vcpupin", ctxt, &nodes)) < 0) { goto error; } @@ -12406,7 +12414,8 @@ virDomainDefFormatInternal(virDomainDefPtr def, virBufferAsprintf(buf, ">%u</vcpu>\n", def->maxvcpus);

if (def->cputune.shares || def->cputune.vcpupin || - def->cputune.period || def->cputune.quota) + def->cputune.period || def->cputune.quota || + def->cputune.hypervisor_period || def->cputune.hypervisor_quota) virBufferAddLit(buf, " <cputune>\n");

if (def->cputune.shares) @@ -12418,6 +12427,17 @@ virDomainDefFormatInternal(virDomainDefPtr def, if (def->cputune.quota) virBufferAsprintf(buf, " <quota>%lld</quota>\n", def->cputune.quota); + + if (def->cputune.hypervisor_period) + virBufferAsprintf(buf, " <hypervisor_period>%llu" + "</hypervisor_period>\n", + def->cputune.hypervisor_period); + + if (def->cputune.hypervisor_period) + virBufferAsprintf(buf, " <hypervisor_quota>%lld" + "</hypervisor_quota>\n", + def->cputune.hypervisor_quota); + if (def->cputune.vcpupin) { for (i = 0; i < def->cputune.nvcpupin; i++) { virBufferAsprintf(buf, " <vcpupin vcpu='%u' ", @@ -12439,7 +12459,8 @@ virDomainDefFormatInternal(virDomainDefPtr def, }

if (def->cputune.shares || def->cputune.vcpupin || - def->cputune.period || def->cputune.quota) + def->cputune.period || def->cputune.quota || + def->cputune.hypervisor_period || def->cputune.hypervisor_quota) virBufferAddLit(buf, " </cputune>\n");

if (def->numatune.memory.nodemask) { diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 0eed60e..2a37e26 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -1558,6 +1558,8 @@ struct _virDomainDef { unsigned long shares; unsigned long long period; long long quota; + unsigned long long hypervisor_period; + long long hypervisor_quota; int nvcpupin; virDomainVcpuPinDefPtr *vcpupin; } cputune; diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 727c0d4..7c6ef33 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -493,17 +493,23 @@ int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm) goto cleanup; }

Check if cgroup CPU is active here:

if (!qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { virReportSystemError(EINVAL, _("%s"), "Cgroup CPU is not active."); goto cleanup; }

and remove all following checks in this function.

...
- /* Set cpu bandwidth for the vm */ - if (period || quota) { - if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { - /* Ensure that we can multiply by vcpus without overflowing. */ - if (quota > LLONG_MAX / vm->def->vcpus) { - virReportSystemError(EINVAL, - _("%s"), - "Unable to set cpu bandwidth quota"); - goto cleanup; - } + /* + * Set cpu bandwidth for the vm if any of the following is true: + * 1. we donot know VCPU<->PID mapping or all vcpus run in the same thread + * 2. the hypervisor threads are not limited(quota <= 0) + */ + if ((period || quota) && + qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { + /* Ensure that we can multiply by vcpus without overflowing. */ + if (quota > LLONG_MAX / vm->def->vcpus) { + virReportSystemError(EINVAL, + _("%s"), + "Unable to set cpu bandwidth quota"); + goto cleanup; + }

+ if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid || + vm->def->cputune.hypervisor_quota <= 0) { if (quota > 0) vm_quota = quota * vm->def->vcpus; else @@ -514,7 +520,7 @@ int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm) }

if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) { - /* If we does not know VCPU<->PID mapping or all vcpu runs in the same + /* If we donot know VCPU<->PID mapping or all vcpus run in the same * thread, we cannot control each vcpu. */ virCgroupFree(&cgroup); @@ -570,6 +576,8 @@ int qemuSetupCgroupForHypervisor(struct qemud_driver *driver, virCgroupPtr cgroup = NULL; virCgroupPtr cgroup_hypervisor = NULL; qemuDomainObjPrivatePtr priv = vm->privateData; + unsigned long long period = vm->def->cputune.hypervisor_period; + long long quota = vm->def->cputune.hypervisor_quota; int rc;

if (driver->cgroup == NULL) @@ -608,6 +616,13 @@ int qemuSetupCgroupForHypervisor(struct qemud_driver *driver, goto cleanup; }

+ if (period || quota) { + if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { + if (qemuSetupCgroupVcpuBW(cgroup_hypervisor, period, quota) < 0)

Actually, qemuSetupCgroupVcpuBW just changes the cgroup's settings(period, quota), it doesn't care about what the cgroup is associated with. So can we change the name to qemuSetupCgroupBW?

...
+ goto cleanup; + } + } + virCgroupFree(&cgroup_hypervisor); virCgroupFree(&cgroup); return 0; diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index c3555ca..2e40aee 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -7007,6 +7007,7 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup, int rc; long long vm_quota = 0; long long old_quota = 0; + long long hypervisor_quota = vm->def->cputune.hypervisor_quota; unsigned long long old_period = 0;

if (period == 0 && quota == 0) @@ -7039,6 +7040,16 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup, goto cleanup; }

+ /* If we donot know VCPU<->PID mapping or all vcpu runs in the same + * thread, we cannot control each vcpu. So we only modify cpu bandwidth + * for the vm. + */ + if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) { + if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0) + goto cleanup; + return 0; + } + /* * If quota will be changed to a small value, we should modify vcpu's quota * first. Otherwise, we should modify vm's quota first. @@ -7048,8 +7059,12 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup, * * If both quota and period will be changed to a big/small value, we cannot * modify period and quota together. + * + * If the hypervisor threads are limited, we can update period for vm first, + * and then we can modify period and quota for the vcpu together even if + * they will be changed to a big/small value. */ - if ((quota != 0) && (period != 0)) { + if (hypervisor_quota <= 0 && quota != 0 && period != 0) { if (((quota > old_quota) && (period > old_period)) || ((quota < old_quota) && (period < old_period))) { /* modify period */ @@ -7063,40 +7078,44 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup, } }

- if (((vm_quota != 0) && (vm_quota > old_quota)) || - ((period != 0) && (period < old_period))) - /* Set cpu bandwidth for the vm */ - if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0) - goto cleanup; - - /* If we does not know VCPU<->PID mapping or all vcpu runs in the same - * thread, we cannot control each vcpu. So we only modify cpu bandwidth - * when each vcpu has a separated thread. + /* + * If the hypervisor threads are not limited, set cpu bandwidth for the + * vm. */ - if (priv->nvcpupids != 0 && priv->vcpupids[0] != vm->pid) { - for (i = 0; i < priv->nvcpupids; i++) { - rc = virCgroupForVcpu(cgroup, i, &cgroup_vcpu, 0); - if (rc < 0) { - virReportSystemError(-rc, - _("Unable to find vcpu cgroup for %s(vcpu:" - " %d)"), - vm->def->name, i); - goto cleanup; - } - - if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0) + if (vm->def->cputune.hypervisor_quota <= 0) { + if (((vm_quota != 0) && (vm_quota > old_quota)) || + ((period != 0) && (period < old_period))) + if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0) goto cleanup; + }

- virCgroupFree(&cgroup_vcpu); + /* each vcpu has a separated thread, so we modify cpu bandwidth for vcpu. */ + for (i = 0; i < priv->nvcpupids; i++) { + rc = virCgroupForVcpu(cgroup, i, &cgroup_vcpu, 0); + if (rc < 0) { + virReportSystemError(-rc, + _("Unable to find vcpu cgroup for %s(vcpu:" + " %d)"), + vm->def->name, i); + goto cleanup; } - }

- if (((vm_quota != 0) && (vm_quota <= old_quota)) || - ((period != 0) && (period >= old_period))) - /* Set cpu bandwidth for the vm */ - if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0) + if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0) goto cleanup;

+ virCgroupFree(&cgroup_vcpu); + } + + /* + * If the hypervisor threads are not limited, set cpu bandwidth for the vm. + */

This makes me confused. Why do we have to limit the whole vm when the hypervisor threads are not limited? Users may want to limit the vcpus only.

We have 3 situations now:

1. limit the vcpus only. 2. limit the hypervisor but not vcpus. 3. limit the whole vm.

Before this patch, we limit the whole vm when we limit the vcpus because we donot want to the hypervisor threads eat too much cpu time.

So, if we donot limit the hypervisor, the behavior shoule not be changed. So we should limit the whole vm. If the hypervisor threads are limited, there is no need to limit the whole vm.

need to tune hypervisor quota to limit vcpu-only quota ? Sounds strange to me. Thanks, -Kame

Wen Congyang

7:07 a.m.

New subject: [libvirt] [PATCH 5/6] qemu: Implement hypervisor's period and quota tunable XML configuration and parsing

At 05/17/2012 12:46 PM, KAMEZAWA Hiroyuki Wrote:

...

(2012/05/17 12:54), Wen Congyang wrote:

...
At 05/17/2012 11:13 AM, Hu Tao Wrote:

...
On Wed, Apr 25, 2012 at 05:46:06PM +0800, Wen Congyang wrote:

...
set hypervisor's period and quota when the vm starts. It will affect to set vm's period and quota: donot set vm's period and quota if we limit hypevisor thread's bandwidth(hypervisor_quota > 0). --- src/conf/domain_conf.c | 25 ++++++++++++++- src/conf/domain_conf.h | 2 + src/qemu/qemu_cgroup.c | 37 ++++++++++++++++------- src/qemu/qemu_driver.c | 75 ++++++++++++++++++++++++++++++------------------ 4 files changed, 98 insertions(+), 41 deletions(-)

diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 5ab052a..28a6436 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -7931,6 +7931,14 @@ static virDomainDefPtr virDomainDefParseXML(virCapsPtr caps, &def->cputune.quota) < 0) def->cputune.quota = 0;

+ if (virXPathULongLong("string(./cputune/hypervisor_period[1])", ctxt, + &def->cputune.hypervisor_period) < 0) + def->cputune.hypervisor_period = 0; + + if (virXPathLongLong("string(./cputune/hypervisor_quota[1])", ctxt, + &def->cputune.hypervisor_quota) < 0) + def->cputune.hypervisor_quota = 0; + if ((n = virXPathNodeSet("./cputune/vcpupin", ctxt, &nodes)) < 0) { goto error; } @@ -12406,7 +12414,8 @@ virDomainDefFormatInternal(virDomainDefPtr def, virBufferAsprintf(buf, ">%u</vcpu>\n", def->maxvcpus);

if (def->cputune.shares || def->cputune.vcpupin || - def->cputune.period || def->cputune.quota) + def->cputune.period || def->cputune.quota || + def->cputune.hypervisor_period || def->cputune.hypervisor_quota) virBufferAddLit(buf, " <cputune>\n");

if (def->cputune.shares) @@ -12418,6 +12427,17 @@ virDomainDefFormatInternal(virDomainDefPtr def, if (def->cputune.quota) virBufferAsprintf(buf, " <quota>%lld</quota>\n", def->cputune.quota); + + if (def->cputune.hypervisor_period) + virBufferAsprintf(buf, " <hypervisor_period>%llu" + "</hypervisor_period>\n", + def->cputune.hypervisor_period); + + if (def->cputune.hypervisor_period) + virBufferAsprintf(buf, " <hypervisor_quota>%lld" + "</hypervisor_quota>\n", + def->cputune.hypervisor_quota); + if (def->cputune.vcpupin) { for (i = 0; i < def->cputune.nvcpupin; i++) { virBufferAsprintf(buf, " <vcpupin vcpu='%u' ", @@ -12439,7 +12459,8 @@ virDomainDefFormatInternal(virDomainDefPtr def, }

if (def->cputune.shares || def->cputune.vcpupin || - def->cputune.period || def->cputune.quota) + def->cputune.period || def->cputune.quota || + def->cputune.hypervisor_period || def->cputune.hypervisor_quota) virBufferAddLit(buf, " </cputune>\n");

if (def->numatune.memory.nodemask) { diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 0eed60e..2a37e26 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -1558,6 +1558,8 @@ struct _virDomainDef { unsigned long shares; unsigned long long period; long long quota; + unsigned long long hypervisor_period; + long long hypervisor_quota; int nvcpupin; virDomainVcpuPinDefPtr *vcpupin; } cputune; diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 727c0d4..7c6ef33 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -493,17 +493,23 @@ int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm) goto cleanup; }

Check if cgroup CPU is active here:

if (!qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { virReportSystemError(EINVAL, _("%s"), "Cgroup CPU is not active."); goto cleanup; }

and remove all following checks in this function.

...
- /* Set cpu bandwidth for the vm */ - if (period || quota) { - if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { - /* Ensure that we can multiply by vcpus without overflowing. */ - if (quota > LLONG_MAX / vm->def->vcpus) { - virReportSystemError(EINVAL, - _("%s"), - "Unable to set cpu bandwidth quota"); - goto cleanup; - } + /* + * Set cpu bandwidth for the vm if any of the following is true: + * 1. we donot know VCPU<->PID mapping or all vcpus run in the same thread + * 2. the hypervisor threads are not limited(quota <= 0) + */ + if ((period || quota) && + qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { + /* Ensure that we can multiply by vcpus without overflowing. */ + if (quota > LLONG_MAX / vm->def->vcpus) { + virReportSystemError(EINVAL, + _("%s"), + "Unable to set cpu bandwidth quota"); + goto cleanup; + }

+ if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid || + vm->def->cputune.hypervisor_quota <= 0) { if (quota > 0) vm_quota = quota * vm->def->vcpus; else @@ -514,7 +520,7 @@ int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm) }

if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) { - /* If we does not know VCPU<->PID mapping or all vcpu runs in the same + /* If we donot know VCPU<->PID mapping or all vcpus run in the same * thread, we cannot control each vcpu. */ virCgroupFree(&cgroup); @@ -570,6 +576,8 @@ int qemuSetupCgroupForHypervisor(struct qemud_driver *driver, virCgroupPtr cgroup = NULL; virCgroupPtr cgroup_hypervisor = NULL; qemuDomainObjPrivatePtr priv = vm->privateData; + unsigned long long period = vm->def->cputune.hypervisor_period; + long long quota = vm->def->cputune.hypervisor_quota; int rc;

if (driver->cgroup == NULL) @@ -608,6 +616,13 @@ int qemuSetupCgroupForHypervisor(struct qemud_driver *driver, goto cleanup; }

+ if (period || quota) { + if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { + if (qemuSetupCgroupVcpuBW(cgroup_hypervisor, period, quota) < 0)

Actually, qemuSetupCgroupVcpuBW just changes the cgroup's settings(period, quota), it doesn't care about what the cgroup is associated with. So can we change the name to qemuSetupCgroupBW?

...
+ goto cleanup; + } + } + virCgroupFree(&cgroup_hypervisor); virCgroupFree(&cgroup); return 0; diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index c3555ca..2e40aee 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -7007,6 +7007,7 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup, int rc; long long vm_quota = 0; long long old_quota = 0; + long long hypervisor_quota = vm->def->cputune.hypervisor_quota; unsigned long long old_period = 0;

if (period == 0 && quota == 0) @@ -7039,6 +7040,16 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup, goto cleanup; }

+ /* If we donot know VCPU<->PID mapping or all vcpu runs in the same + * thread, we cannot control each vcpu. So we only modify cpu bandwidth + * for the vm. + */ + if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) { + if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0) + goto cleanup; + return 0; + } + /* * If quota will be changed to a small value, we should modify vcpu's quota * first. Otherwise, we should modify vm's quota first. @@ -7048,8 +7059,12 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup, * * If both quota and period will be changed to a big/small value, we cannot * modify period and quota together. + * + * If the hypervisor threads are limited, we can update period for vm first, + * and then we can modify period and quota for the vcpu together even if + * they will be changed to a big/small value. */ - if ((quota != 0) && (period != 0)) { + if (hypervisor_quota <= 0 && quota != 0 && period != 0) { if (((quota > old_quota) && (period > old_period)) || ((quota < old_quota) && (period < old_period))) { /* modify period */ @@ -7063,40 +7078,44 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr cgroup, } }

- if (((vm_quota != 0) && (vm_quota > old_quota)) || - ((period != 0) && (period < old_period))) - /* Set cpu bandwidth for the vm */ - if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0) - goto cleanup; - - /* If we does not know VCPU<->PID mapping or all vcpu runs in the same - * thread, we cannot control each vcpu. So we only modify cpu bandwidth - * when each vcpu has a separated thread. + /* + * If the hypervisor threads are not limited, set cpu bandwidth for the + * vm. */ - if (priv->nvcpupids != 0 && priv->vcpupids[0] != vm->pid) { - for (i = 0; i < priv->nvcpupids; i++) { - rc = virCgroupForVcpu(cgroup, i, &cgroup_vcpu, 0); - if (rc < 0) { - virReportSystemError(-rc, - _("Unable to find vcpu cgroup for %s(vcpu:" - " %d)"), - vm->def->name, i); - goto cleanup; - } - - if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0) + if (vm->def->cputune.hypervisor_quota <= 0) { + if (((vm_quota != 0) && (vm_quota > old_quota)) || + ((period != 0) && (period < old_period))) + if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0) goto cleanup; + }

- virCgroupFree(&cgroup_vcpu); + /* each vcpu has a separated thread, so we modify cpu bandwidth for vcpu. */ + for (i = 0; i < priv->nvcpupids; i++) { + rc = virCgroupForVcpu(cgroup, i, &cgroup_vcpu, 0); + if (rc < 0) { + virReportSystemError(-rc, + _("Unable to find vcpu cgroup for %s(vcpu:" + " %d)"), + vm->def->name, i); + goto cleanup; } - }

- if (((vm_quota != 0) && (vm_quota <= old_quota)) || - ((period != 0) && (period >= old_period))) - /* Set cpu bandwidth for the vm */ - if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0) + if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0) goto cleanup;

+ virCgroupFree(&cgroup_vcpu); + } + + /* + * If the hypervisor threads are not limited, set cpu bandwidth for the vm. + */

This makes me confused. Why do we have to limit the whole vm when the hypervisor threads are not limited? Users may want to limit the vcpus only.

We have 3 situations now:

1. limit the vcpus only. 2. limit the hypervisor but not vcpus. 3. limit the whole vm.

Before this patch, we limit the whole vm when we limit the vcpus because we donot want to the hypervisor threads eat too much cpu time.

So, if we donot limit the hypervisor, the behavior shoule not be changed. So we should limit the whole vm. If the hypervisor threads are limited, there is no need to limit the whole vm.

need to tune hypervisor quota to limit vcpu-only quota ? Sounds strange to me.

No, current implemetion is: 1. limit vcpu and hypervisor: vm is not limited 2. limit vcpu only: vm is limited 3. limit hypervisor: vm is not limited 4. vcpu and hypervisor are not limited: vm is not limited. Thanks Wen Congyang

...

Thanks, -Kame

KAMEZAWA Hiroyuki

7:09 a.m.

New subject: [libvirt] [PATCH 5/6] qemu: Implement hypervisor's period and quota tunable XML configuration and parsing

(2012/05/17 14:07), Wen Congyang wrote:

...

At 05/17/2012 12:46 PM, KAMEZAWA Hiroyuki Wrote:

...
(2012/05/17 12:54), Wen Congyang wrote:

...

...
...
So, if we donot limit the hypervisor, the behavior shoule not be changed. So we should limit the whole vm. If the hypervisor threads are limited, there is no need to limit the whole vm.

need to tune hypervisor quota to limit vcpu-only quota ? Sounds strange to me.

No, current implemetion is: 1. limit vcpu and hypervisor: vm is not limited 2. limit vcpu only: vm is limited 3. limit hypervisor: vm is not limited 4. vcpu and hypervisor are not limited: vm is not limited.

In 2. vm ----- quota_is_limited |- hypervisor ----- quota is unlimited |- vcpu[0...x] ----- quota is limited Hm...vm is limited. It seems user need to tune vm's quota...can user makes vm's quota to be unlimited and limit *only* vpcus as Xen ? Thanks, -Kame

Wen Congyang

7:25 a.m.

New subject: [libvirt] [PATCH 5/6] qemu: Implement hypervisor's period and quota tunable XML configuration and parsing

At 05/17/2012 01:09 PM, KAMEZAWA Hiroyuki Wrote:

...

(2012/05/17 14:07), Wen Congyang wrote:

...
At 05/17/2012 12:46 PM, KAMEZAWA Hiroyuki Wrote:

...
(2012/05/17 12:54), Wen Congyang wrote:

...
...
...
So, if we donot limit the hypervisor, the behavior shoule not be changed. So we should limit the whole vm. If the hypervisor threads are limited, there is no need to limit the whole vm.

need to tune hypervisor quota to limit vcpu-only quota ? Sounds strange to me.

No, current implemetion is: 1. limit vcpu and hypervisor: vm is not limited 2. limit vcpu only: vm is limited 3. limit hypervisor: vm is not limited 4. vcpu and hypervisor are not limited: vm is not limited.

In 2.

vm ----- quota_is_limited |- hypervisor ----- quota is unlimited |- vcpu[0...x] ----- quota is limited

Hm...vm is limited. It seems user need to tune vm's quota...can user makes vm's quota to be unlimited and limit *only* vpcus as Xen ?

Without this patchset, the vm will be limited if the vcpu is limited. We cannot know whether the hypervisor is unlimited or the user does not set it. If the user does not set it, we should not change the behavior. So I donot find an easy way to limit only vcpus. Thanks Wen Cong

...

Thanks, -Kame

KAMEZAWA Hiroyuki

7:34 a.m.

New subject: [libvirt] [PATCH 5/6] qemu: Implement hypervisor's period and quota tunable XML configuration and parsing

(2012/05/17 14:25), Wen Congyang wrote:

...

At 05/17/2012 01:09 PM, KAMEZAWA Hiroyuki Wrote:

...
(2012/05/17 14:07), Wen Congyang wrote:

...
At 05/17/2012 12:46 PM, KAMEZAWA Hiroyuki Wrote:

...
(2012/05/17 12:54), Wen Congyang wrote:

...
...
...
So, if we donot limit the hypervisor, the behavior shoule not be changed. So we should limit the whole vm. If the hypervisor threads are limited, there is no need to limit the whole vm.

need to tune hypervisor quota to limit vcpu-only quota ? Sounds strange to me.

No, current implemetion is: 1. limit vcpu and hypervisor: vm is not limited 2. limit vcpu only: vm is limited 3. limit hypervisor: vm is not limited 4. vcpu and hypervisor are not limited: vm is not limited.

In 2.

vm ----- quota_is_limited |- hypervisor ----- quota is unlimited |- vcpu[0...x] ----- quota is limited

Hm...vm is limited. It seems user need to tune vm's quota...can user makes vm's quota to be unlimited and limit *only* vpcus as Xen ?

Without this patchset, the vm will be limited if the vcpu is limited. We cannot know whether the hypervisor is unlimited or the user does not set it. If the user does not set it, we should not change the behavior. So I donot find an easy way to limit only vcpus.

That just comes from your new XML rule.. If I was you..... I'll add an optional attribute to quota/period as <quota target='all|vcpu'> </quota> # all is default and limit the whole vm. # vcpu is just limit vcpus. hypervisor is unlimited. <period> .... </period> # period is shared in all settings. If hypervisor should be limited. <quota target='hypervisor'> ...</quota> should work. Thanks, -Kame

Hu Tao

7:42 a.m.

New subject: [libvirt] [PATCH 5/6] qemu: Implement hypervisor's period and quota tunable XML configuration and parsing

On Thu, May 17, 2012 at 01:25:48PM +0800, Wen Congyang wrote:

...

At 05/17/2012 01:09 PM, KAMEZAWA Hiroyuki Wrote:

...
(2012/05/17 14:07), Wen Congyang wrote:

...
At 05/17/2012 12:46 PM, KAMEZAWA Hiroyuki Wrote:

...
(2012/05/17 12:54), Wen Congyang wrote:

...
...
...
So, if we donot limit the hypervisor, the behavior shoule not be changed. So we should limit the whole vm. If the hypervisor threads are limited, there is no need to limit the whole vm.

need to tune hypervisor quota to limit vcpu-only quota ? Sounds strange to me.

No, current implemetion is: 1. limit vcpu and hypervisor: vm is not limited 2. limit vcpu only: vm is limited 3. limit hypervisor: vm is not limited 4. vcpu and hypervisor are not limited: vm is not limited.

In 2.

vm ----- quota_is_limited |- hypervisor ----- quota is unlimited |- vcpu[0...x] ----- quota is limited

Hm...vm is limited. It seems user need to tune vm's quota...can user makes vm's quota to be unlimited and limit *only* vpcus as Xen ?

Without this patchset, the vm will be limited if the vcpu is limited. We cannot know whether the hypervisor is unlimited or the user does not set it. If the user does not set it, we should not change the behavior. So I donot find an easy way to limit only vcpus.

Can we change the implementation like these: 1. <cputune> <vcpu> <period>period</period> <quota>quota</quota> </vcpu> </cputune> Limit vcpus only, does not limit vm or hypervisor. 2. <cputune> <hypervisor> <period>period</period> <quota>quota</quota> </hypervisor> </cputune> Limit hypervisor only, does not limit vm or vcpus. 3. <cputune> <period>period</period> <quota>quota</quota> </cputune> Limit the whole vm, in this case vcpus and hypervisor are also limited. 4. <cputune> <period>period</period> <quota>quota</quota> <vcpu> <period>period</period> <quota>quota</quota> </vcpu> <hypervisor> <period>period</period> <quota>quota</quota> </hypervisor> </cputune> Limit vm, vcpus and hypervisor. I'm not sure how cpu cgroup behaves in this case, but I suppose vcpus' and hypervisor's values will not exceed vm's value. -- Thanks, Hu Tao

Wen Congyang

8:05 a.m.

New subject: [libvirt] [PATCH 5/6] qemu: Implement hypervisor's period and quota tunable XML configuration and parsing

At 05/17/2012 01:42 PM, Hu Tao Wrote:

...

On Thu, May 17, 2012 at 01:25:48PM +0800, Wen Congyang wrote:

...
At 05/17/2012 01:09 PM, KAMEZAWA Hiroyuki Wrote:

...
(2012/05/17 14:07), Wen Congyang wrote:

...
At 05/17/2012 12:46 PM, KAMEZAWA Hiroyuki Wrote:

...
(2012/05/17 12:54), Wen Congyang wrote:

...
...
...
So, if we donot limit the hypervisor, the behavior shoule not be changed. So we should limit the whole vm. If the hypervisor threads are limited, there is no need to limit the whole vm.

need to tune hypervisor quota to limit vcpu-only quota ? Sounds strange to me.

No, current implemetion is: 1. limit vcpu and hypervisor: vm is not limited 2. limit vcpu only: vm is limited 3. limit hypervisor: vm is not limited 4. vcpu and hypervisor are not limited: vm is not limited.

In 2.

vm ----- quota_is_limited |- hypervisor ----- quota is unlimited |- vcpu[0...x] ----- quota is limited

Hm...vm is limited. It seems user need to tune vm's quota...can user makes vm's quota to be unlimited and limit *only* vpcus as Xen ?

Without this patchset, the vm will be limited if the vcpu is limited. We cannot know whether the hypervisor is unlimited or the user does not set it. If the user does not set it, we should not change the behavior. So I donot find an easy way to limit only vcpus.

Can we change the implementation like these:

1. <cputune> <vcpu> <period>period</period> <quota>quota</quota> </vcpu> </cputune>

Limit vcpus only, does not limit vm or hypervisor.

2. <cputune> <hypervisor> <period>period</period> <quota>quota</quota> </hypervisor> </cputune>

Limit hypervisor only, does not limit vm or vcpus.

3. <cputune> <period>period</period> <quota>quota</quota> </cputune>

Limit the whole vm, in this case vcpus and hypervisor are also limited.

4. <cputune> <period>period</period> <quota>quota</quota> <vcpu> <period>period</period> <quota>quota</quota> </vcpu> <hypervisor> <period>period</period> <quota>quota</quota> </hypervisor> </cputune>

This is very complex, and the user should know how we implement it and the period/quota's limit. If the user gives a wrong value, the vm cannot start. We implement the limit like this: vm | ---------------------------- | ... | | vcpu0 vcpun hypervisor If all is limited, the vm's quota should be greater than max(vcpu's quota, hypervisor's quota) and vm's period should be less than min(vcpu's period, hypervisor's period). So I donot like this idea. Thanks Wen Congyang

...

Limit vm, vcpus and hypervisor. I'm not sure how cpu cgroup behaves in this case, but I suppose vcpus' and hypervisor's values will not exceed vm's value.

KAMEZAWA Hiroyuki

8:06 a.m.

New subject: [libvirt] [PATCH 5/6] qemu: Implement hypervisor's period and quota tunable XML configuration and parsing

(2012/05/17 15:05), Wen Congyang wrote:

...

At 05/17/2012 01:42 PM, Hu Tao Wrote:

...
On Thu, May 17, 2012 at 01:25:48PM +0800, Wen Congyang wrote:

...
At 05/17/2012 01:09 PM, KAMEZAWA Hiroyuki Wrote:

...
(2012/05/17 14:07), Wen Congyang wrote:

...
At 05/17/2012 12:46 PM, KAMEZAWA Hiroyuki Wrote:

...
(2012/05/17 12:54), Wen Congyang wrote:

...
...
> So, if we donot limit the hypervisor, the behavior shoule not be > changed. So we should limit the whole vm. If the hypervisor threads > are limited, there is no need to limit the whole vm. >

need to tune hypervisor quota to limit vcpu-only quota ? Sounds strange to me.

No, current implemetion is: 1. limit vcpu and hypervisor: vm is not limited 2. limit vcpu only: vm is limited 3. limit hypervisor: vm is not limited 4. vcpu and hypervisor are not limited: vm is not limited.

In 2.

vm ----- quota_is_limited |- hypervisor ----- quota is unlimited |- vcpu[0...x] ----- quota is limited

Hm...vm is limited. It seems user need to tune vm's quota...can user makes vm's quota to be unlimited and limit *only* vpcus as Xen ?

Without this patchset, the vm will be limited if the vcpu is limited. We cannot know whether the hypervisor is unlimited or the user does not set it. If the user does not set it, we should not change the behavior. So I donot find an easy way to limit only vcpus.

Can we change the implementation like these:

1. <cputune> <vcpu> <period>period</period> <quota>quota</quota> </vcpu> </cputune>

Limit vcpus only, does not limit vm or hypervisor.

2. <cputune> <hypervisor> <period>period</period> <quota>quota</quota> </hypervisor> </cputune>

Limit hypervisor only, does not limit vm or vcpus.

3. <cputune> <period>period</period> <quota>quota</quota> </cputune>

Limit the whole vm, in this case vcpus and hypervisor are also limited.

4. <cputune> <period>period</period> <quota>quota</quota> <vcpu> <period>period</period> <quota>quota</quota> </vcpu> <hypervisor> <period>period</period> <quota>quota</quota> </hypervisor> </cputune>

This is very complex, and the user should know how we implement it and the period/quota's limit. If the user gives a wrong value, the vm cannot start.

We implement the limit like this:

vm | ---------------------------- | ... | | vcpu0 vcpun hypervisor

If all is limited, the vm's quota should be greater than max(vcpu's quota, hypervisor's quota) and vm's period should be less than min(vcpu's period, hypervisor's period).

So I donot like this idea.

Hmm. my proposal is not so complicated as you say. And simpler than your way. <quota target='all|vcpu'> </quota> # all is default and limit the whole vm. # vcpu is just limit vcpus rather than whole vm. <period> .... </period> # period is shared in all settings. Isn't it ? Thanks, -Kame

Wen Congyang

8:34 a.m.

New subject: [libvirt] [PATCH 5/6] qemu: Implement hypervisor's period and quota tunable XML configuration and parsing

At 05/17/2012 02:06 PM, KAMEZAWA Hiroyuki Wrote:

...

(2012/05/17 15:05), Wen Congyang wrote:

...
At 05/17/2012 01:42 PM, Hu Tao Wrote:

...
On Thu, May 17, 2012 at 01:25:48PM +0800, Wen Congyang wrote:

...
At 05/17/2012 01:09 PM, KAMEZAWA Hiroyuki Wrote:

...
(2012/05/17 14:07), Wen Congyang wrote:

...
At 05/17/2012 12:46 PM, KAMEZAWA Hiroyuki Wrote: > (2012/05/17 12:54), Wen Congyang wrote:

...
>> So, if we donot limit the hypervisor, the behavior shoule not be >> changed. So we should limit the whole vm. If the hypervisor threads >> are limited, there is no need to limit the whole vm. >> > > > need to tune hypervisor quota to limit vcpu-only quota ? > Sounds strange to me.

No, current implemetion is: 1. limit vcpu and hypervisor: vm is not limited 2. limit vcpu only: vm is limited 3. limit hypervisor: vm is not limited 4. vcpu and hypervisor are not limited: vm is not limited.

In 2.

vm ----- quota_is_limited |- hypervisor ----- quota is unlimited |- vcpu[0...x] ----- quota is limited

Hm...vm is limited. It seems user need to tune vm's quota...can user makes vm's quota to be unlimited and limit *only* vpcus as Xen ?

Without this patchset, the vm will be limited if the vcpu is limited. We cannot know whether the hypervisor is unlimited or the user does not set it. If the user does not set it, we should not change the behavior. So I donot find an easy way to limit only vcpus.

Can we change the implementation like these:

1. <cputune> <vcpu> <period>period</period> <quota>quota</quota> </vcpu> </cputune>

Limit vcpus only, does not limit vm or hypervisor.

2. <cputune> <hypervisor> <period>period</period> <quota>quota</quota> </hypervisor> </cputune>

Limit hypervisor only, does not limit vm or vcpus.

3. <cputune> <period>period</period> <quota>quota</quota> </cputune>

Limit the whole vm, in this case vcpus and hypervisor are also limited.

4. <cputune> <period>period</period> <quota>quota</quota> <vcpu> <period>period</period> <quota>quota</quota> </vcpu> <hypervisor> <period>period</period> <quota>quota</quota> </hypervisor> </cputune>

This is very complex, and the user should know how we implement it and the period/quota's limit. If the user gives a wrong value, the vm cannot start.

We implement the limit like this:

vm | ---------------------------- | ... | | vcpu0 vcpun hypervisor

If all is limited, the vm's quota should be greater than max(vcpu's quota, hypervisor's quota) and vm's period should be less than min(vcpu's period, hypervisor's period).

So I donot like this idea.

Hmm. my proposal is not so complicated as you say. And simpler than your way.

<quota target='all|vcpu'> </quota> # all is default and limit the whole vm. # vcpu is just limit vcpus rather than whole vm. <period> .... </period> # period is shared in all settings.

Isn't it ?

Do you mean this: <cputune> <vcpu> <period>period</period> <quota target='all|vcpu'>quota</quota> </vcpu> <hypervisor> <period>period</period> <quota>quota</quota> </hypervisor> </cputune> Thanks Wen Congyang

...

Thanks, -Kame

KAMEZAWA Hiroyuki

8:33 a.m.

New subject: [libvirt] [PATCH 5/6] qemu: Implement hypervisor's period and quota tunable XML configuration and parsing

(2012/05/17 15:34), Wen Congyang wrote:

...

At 05/17/2012 02:06 PM, KAMEZAWA Hiroyuki Wrote:

...
(2012/05/17 15:05), Wen Congyang wrote:

...
At 05/17/2012 01:42 PM, Hu Tao Wrote:

...
On Thu, May 17, 2012 at 01:25:48PM +0800, Wen Congyang wrote:

...
At 05/17/2012 01:09 PM, KAMEZAWA Hiroyuki Wrote:

...
(2012/05/17 14:07), Wen Congyang wrote:

> At 05/17/2012 12:46 PM, KAMEZAWA Hiroyuki Wrote: >> (2012/05/17 12:54), Wen Congyang wrote:

>>> So, if we donot limit the hypervisor, the behavior shoule not be >>> changed. So we should limit the whole vm. If the hypervisor threads >>> are limited, there is no need to limit the whole vm. >>> >> >> >> need to tune hypervisor quota to limit vcpu-only quota ? >> Sounds strange to me. > > No, current implemetion is: > 1. limit vcpu and hypervisor: vm is not limited > 2. limit vcpu only: vm is limited > 3. limit hypervisor: vm is not limited > 4. vcpu and hypervisor are not limited: vm is not limited. >

In 2.

vm ----- quota_is_limited |- hypervisor ----- quota is unlimited |- vcpu[0...x] ----- quota is limited

Hm...vm is limited. It seems user need to tune vm's quota...can user makes vm's quota to be unlimited and limit *only* vpcus as Xen ?

Without this patchset, the vm will be limited if the vcpu is limited. We cannot know whether the hypervisor is unlimited or the user does not set it. If the user does not set it, we should not change the behavior. So I donot find an easy way to limit only vcpus.

Can we change the implementation like these:

1. <cputune> <vcpu> <period>period</period> <quota>quota</quota> </vcpu> </cputune>

Limit vcpus only, does not limit vm or hypervisor.

2. <cputune> <hypervisor> <period>period</period> <quota>quota</quota> </hypervisor> </cputune>

Limit hypervisor only, does not limit vm or vcpus.

3. <cputune> <period>period</period> <quota>quota</quota> </cputune>

Limit the whole vm, in this case vcpus and hypervisor are also limited.

4. <cputune> <period>period</period> <quota>quota</quota> <vcpu> <period>period</period> <quota>quota</quota> </vcpu> <hypervisor> <period>period</period> <quota>quota</quota> </hypervisor> </cputune>

This is very complex, and the user should know how we implement it and the period/quota's limit. If the user gives a wrong value, the vm cannot start.

We implement the limit like this:

vm | ---------------------------- | ... | | vcpu0 vcpun hypervisor

If all is limited, the vm's quota should be greater than max(vcpu's quota, hypervisor's quota) and vm's period should be less than min(vcpu's period, hypervisor's period).

So I donot like this idea.

Hmm. my proposal is not so complicated as you say. And simpler than your way.

<quota target='all|vcpu'> </quota> # all is default and limit the whole vm. # vcpu is just limit vcpus rather than whole vm. <period> .... </period> # period is shared in all settings.

Isn't it ?

Do you mean this: <cputune> <vcpu> <period>period</period> <quota target='all|vcpu'>quota</quota> </vcpu> <hypervisor> <period>period</period> <quota>quota</quota> </hypervisor> </cputune>

No. hypervisor limiting is not necessary at all in my proposal. Thanks, -Kame

Wen Congyang

25 Apr 25 Apr

11:46 a.m.

New subject: [libvirt] [PATCH 6/6] qemu: Implement hypervisor_period and hypervisor_quota's modification

allow the user change/get hypervisor's period and quota when the vm is running. If the hypervisor's bandwidth is changed to unlimited, we should limit the vm's bandwidth again. If we limit the hypervisor's bandwidth, there is no need to limit the vm's bandwidth. --- include/libvirt/libvirt.h.in | 16 +++++ src/qemu/qemu_driver.c | 147 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 162 insertions(+), 1 deletions(-) diff --git a/include/libvirt/libvirt.h.in b/include/libvirt/libvirt.h.in index 97ad99d..e015499 100644 --- a/include/libvirt/libvirt.h.in +++ b/include/libvirt/libvirt.h.in @@ -662,6 +662,22 @@ typedef virTypedParameter *virTypedParameterPtr; #define VIR_DOMAIN_SCHEDULER_VCPU_QUOTA "vcpu_quota" /** + * VIR_DOMAIN_SCHEDULER_HYPERVISOR_PERIOD: + * + * Macro represents the enforcement period for a quota, in microseconds, + * when using the posix scheduler, as a ullong. + */ +#define VIR_DOMAIN_SCHEDULER_HYPERVISOR_PERIOD "hypervisor_period" + +/** + * VIR_DOMAIN_SCHEDULER_HYPERVISOR_QUOTA: + * + * Macro represents the maximum bandwidth to be used within a period, + * when using the posix scheduler, as an llong. + */ +#define VIR_DOMAIN_SCHEDULER_HYPERVISOR_QUOTA "hypervisor_quota" + +/** * VIR_DOMAIN_SCHEDULER_WEIGHT: * * Macro represents the relative weight, when using the credit diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 2e40aee..165d1c0 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -5973,7 +5973,7 @@ static char *qemuGetSchedulerType(virDomainPtr dom, else if (rc == 0) *nparams = 1; else - *nparams = 3; + *nparams = 5; } ret = strdup("posix"); @@ -7124,6 +7124,54 @@ cleanup: } static int +qemuSetHypervisorBWLive(virDomainObjPtr vm, virCgroupPtr cgroup, + unsigned long long period, long long quota) +{ + qemuDomainObjPrivatePtr priv = vm->privateData; + virCgroupPtr cgroup_hypervisor = NULL; + int rc; + + if (period == 0 && quota == 0) + return 0; + + if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) { + return 0; + } + + rc = virCgroupForHypervisor(cgroup, &cgroup_hypervisor, 0); + if (rc < 0) { + virReportSystemError(-rc, + _("Unable to find hypervisor cgroup for %s"), + vm->def->name); + goto cleanup; + } + + /* we have limited hypervisor thread, so unlimit vm cgoup */ + if (quota > 0 && qemuSetupCgroupVcpuBW(cgroup, 0, -1) < 0) + goto cleanup; + + if (qemuSetupCgroupVcpuBW(cgroup_hypervisor, period, quota) < 0) + goto cleanup; + + /* + * we have unlimited hypervisor thread, so limit vm cgoup again. + * + * We have ensured that we can multiply by vcpus without overflowing when + * setting vcpu's quota. So we donot check it here. + */ + if (quota < 0 && + qemuSetupCgroupVcpuBW(cgroup, vm->def->cputune.period, + vm->def->cputune.quota * vm->def->vcpus) < 0) + goto cleanup; + + return 0; + +cleanup: + virCgroupFree(&cgroup_hypervisor); + return -1; +} + +static int qemuSetSchedulerParametersFlags(virDomainPtr dom, virTypedParameterPtr params, int nparams, @@ -7146,6 +7194,10 @@ qemuSetSchedulerParametersFlags(virDomainPtr dom, VIR_TYPED_PARAM_ULLONG, VIR_DOMAIN_SCHEDULER_VCPU_QUOTA, VIR_TYPED_PARAM_LLONG, + VIR_DOMAIN_SCHEDULER_HYPERVISOR_PERIOD, + VIR_TYPED_PARAM_ULLONG, + VIR_DOMAIN_SCHEDULER_HYPERVISOR_QUOTA, + VIR_TYPED_PARAM_LLONG, NULL) < 0) return -1; @@ -7228,6 +7280,32 @@ qemuSetSchedulerParametersFlags(virDomainPtr dom, if (flags & VIR_DOMAIN_AFFECT_CONFIG) { vmdef->cputune.quota = params[i].value.l; } + } else if (STREQ(param->field, VIR_DOMAIN_SCHEDULER_HYPERVISOR_PERIOD)) { + if (flags & VIR_DOMAIN_AFFECT_LIVE) { + rc = qemuSetHypervisorBWLive(vm, group, params[i].value.ul, 0); + if (rc != 0) + goto cleanup; + + if (params[i].value.ul) + vm->def->cputune.hypervisor_period = params[i].value.ul; + } + + if (flags & VIR_DOMAIN_AFFECT_CONFIG) { + vmdef->cputune.hypervisor_period = params[i].value.ul; + } + } else if (STREQ(param->field, VIR_DOMAIN_SCHEDULER_HYPERVISOR_QUOTA)) { + if (flags & VIR_DOMAIN_AFFECT_LIVE) { + rc = qemuSetHypervisorBWLive(vm, group, 0, params[i].value.l); + if (rc != 0) + goto cleanup; + + if (params[i].value.l) + vm->def->cputune.hypervisor_quota = params[i].value.l; + } + + if (flags & VIR_DOMAIN_AFFECT_CONFIG) { + vmdef->cputune.hypervisor_quota = params[i].value.l; + } } } @@ -7332,6 +7410,43 @@ cleanup: } static int +qemuGetHypervisorBWLive(virDomainObjPtr vm, virCgroupPtr cgroup, + unsigned long long *period, long long *quota) +{ + virCgroupPtr cgroup_hypervisor = NULL; + qemuDomainObjPrivatePtr priv = NULL; + int rc; + int ret = -1; + + priv = vm->privateData; + if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) { + /* We donot create sub dir for each vcpu */ + *period = 0; + *quota = 0; + return 0; + } + + /* get period and quota for hypervisor */ + rc = virCgroupForHypervisor(cgroup, &cgroup_hypervisor, 0); + if (!cgroup_hypervisor) { + virReportSystemError(-rc, + _("Unable to find hypervisor cgroup for %s"), + vm->def->name); + goto cleanup; + } + + rc = qemuGetVcpuBWLive(cgroup_hypervisor, period, quota); + if (rc < 0) + goto cleanup; + + ret = 0; + +cleanup: + virCgroupFree(&cgroup_hypervisor); + return ret; +} + +static int qemuGetSchedulerParametersFlags(virDomainPtr dom, virTypedParameterPtr params, int *nparams, @@ -7343,6 +7458,8 @@ qemuGetSchedulerParametersFlags(virDomainPtr dom, unsigned long long shares; unsigned long long period; long long quota; + unsigned long long hypervisor_period; + long long hypervisor_quota; int ret = -1; int rc; bool cpu_bw_status = false; @@ -7382,6 +7499,8 @@ qemuGetSchedulerParametersFlags(virDomainPtr dom, if (*nparams > 1 && cpu_bw_status) { period = persistentDef->cputune.period; quota = persistentDef->cputune.quota; + hypervisor_period = persistentDef->cputune.hypervisor_period; + hypervisor_quota = persistentDef->cputune.hypervisor_quota; } goto out; } @@ -7410,6 +7529,14 @@ qemuGetSchedulerParametersFlags(virDomainPtr dom, if (rc != 0) goto cleanup; } + + if (*nparams > 3 && cpu_bw_status) { + rc = qemuGetHypervisorBWLive(vm, group, &hypervisor_period, + &hypervisor_quota); + if (rc != 0) + goto cleanup; + } + out: if (virTypedParameterAssign(¶ms[0], VIR_DOMAIN_SCHEDULER_CPU_SHARES, VIR_TYPED_PARAM_ULLONG, shares) < 0) @@ -7432,6 +7559,24 @@ out: goto cleanup; saved_nparams++; } + + if (*nparams > saved_nparams) { + if (virTypedParameterAssign(¶ms[3], + VIR_DOMAIN_SCHEDULER_HYPERVISOR_PERIOD, + VIR_TYPED_PARAM_ULLONG, + hypervisor_period) < 0) + goto cleanup; + saved_nparams++; + } + + if (*nparams > saved_nparams) { + if (virTypedParameterAssign(¶ms[4], + VIR_DOMAIN_SCHEDULER_HYPERVISOR_QUOTA, + VIR_TYPED_PARAM_LLONG, + hypervisor_quota) < 0) + goto cleanup; + saved_nparams++; + } } *nparams = saved_nparams; -- 1.7.1

4918

Age (days ago)

4940

Last active (days ago)

List overview

Download

19 comments

3 participants

participants (3)

Hu Tao
KAMEZAWA Hiroyuki
Wen Congyang