(2012/05/17 12:54), Wen Congyang wrote:
At 05/17/2012 11:13 AM, Hu Tao Wrote:
> On Wed, Apr 25, 2012 at 05:46:06PM +0800, Wen Congyang wrote:
>> set hypervisor's period and quota when the vm starts. It will affect to set
>> vm's period and quota: donot set vm's period and quota if we limit
hypevisor
>> thread's bandwidth(hypervisor_quota > 0).
>> ---
>> src/conf/domain_conf.c | 25 ++++++++++++++-
>> src/conf/domain_conf.h | 2 +
>> src/qemu/qemu_cgroup.c | 37 ++++++++++++++++-------
>> src/qemu/qemu_driver.c | 75 ++++++++++++++++++++++++++++++------------------
>> 4 files changed, 98 insertions(+), 41 deletions(-)
>>
>> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
>> index 5ab052a..28a6436 100644
>> --- a/src/conf/domain_conf.c
>> +++ b/src/conf/domain_conf.c
>> @@ -7931,6 +7931,14 @@ static virDomainDefPtr virDomainDefParseXML(virCapsPtr
caps,
>> &def->cputune.quota) < 0)
>> def->cputune.quota = 0;
>>
>> + if (virXPathULongLong("string(./cputune/hypervisor_period[1])",
ctxt,
>> + &def->cputune.hypervisor_period) < 0)
>> + def->cputune.hypervisor_period = 0;
>> +
>> + if (virXPathLongLong("string(./cputune/hypervisor_quota[1])",
ctxt,
>> + &def->cputune.hypervisor_quota) < 0)
>> + def->cputune.hypervisor_quota = 0;
>> +
>> if ((n = virXPathNodeSet("./cputune/vcpupin", ctxt, &nodes))
< 0) {
>> goto error;
>> }
>> @@ -12406,7 +12414,8 @@ virDomainDefFormatInternal(virDomainDefPtr def,
>> virBufferAsprintf(buf, ">%u</vcpu>\n",
def->maxvcpus);
>>
>> if (def->cputune.shares || def->cputune.vcpupin ||
>> - def->cputune.period || def->cputune.quota)
>> + def->cputune.period || def->cputune.quota ||
>> + def->cputune.hypervisor_period || def->cputune.hypervisor_quota)
>> virBufferAddLit(buf, " <cputune>\n");
>>
>> if (def->cputune.shares)
>> @@ -12418,6 +12427,17 @@ virDomainDefFormatInternal(virDomainDefPtr def,
>> if (def->cputune.quota)
>> virBufferAsprintf(buf, "
<quota>%lld</quota>\n",
>> def->cputune.quota);
>> +
>> + if (def->cputune.hypervisor_period)
>> + virBufferAsprintf(buf, " <hypervisor_period>%llu"
>> + "</hypervisor_period>\n",
>> + def->cputune.hypervisor_period);
>> +
>> + if (def->cputune.hypervisor_period)
>> + virBufferAsprintf(buf, " <hypervisor_quota>%lld"
>> + "</hypervisor_quota>\n",
>> + def->cputune.hypervisor_quota);
>> +
>> if (def->cputune.vcpupin) {
>> for (i = 0; i < def->cputune.nvcpupin; i++) {
>> virBufferAsprintf(buf, " <vcpupin vcpu='%u'
",
>> @@ -12439,7 +12459,8 @@ virDomainDefFormatInternal(virDomainDefPtr def,
>> }
>>
>> if (def->cputune.shares || def->cputune.vcpupin ||
>> - def->cputune.period || def->cputune.quota)
>> + def->cputune.period || def->cputune.quota ||
>> + def->cputune.hypervisor_period || def->cputune.hypervisor_quota)
>> virBufferAddLit(buf, " </cputune>\n");
>>
>> if (def->numatune.memory.nodemask) {
>> diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
>> index 0eed60e..2a37e26 100644
>> --- a/src/conf/domain_conf.h
>> +++ b/src/conf/domain_conf.h
>> @@ -1558,6 +1558,8 @@ struct _virDomainDef {
>> unsigned long shares;
>> unsigned long long period;
>> long long quota;
>> + unsigned long long hypervisor_period;
>> + long long hypervisor_quota;
>> int nvcpupin;
>> virDomainVcpuPinDefPtr *vcpupin;
>> } cputune;
>> diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
>> index 727c0d4..7c6ef33 100644
>> --- a/src/qemu/qemu_cgroup.c
>> +++ b/src/qemu/qemu_cgroup.c
>> @@ -493,17 +493,23 @@ int qemuSetupCgroupForVcpu(struct qemud_driver *driver,
virDomainObjPtr vm)
>> goto cleanup;
>> }
>
> Check if cgroup CPU is active here:
>
> if (!qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) {
> virReportSystemError(EINVAL,
> _("%s"),
> "Cgroup CPU is not active.");
> goto cleanup;
> }
>
> and remove all following checks in this function.
>
>>
>> - /* Set cpu bandwidth for the vm */
>> - if (period || quota) {
>> - if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) {
>> - /* Ensure that we can multiply by vcpus without overflowing. */
>> - if (quota > LLONG_MAX / vm->def->vcpus) {
>> - virReportSystemError(EINVAL,
>> - _("%s"),
>> - "Unable to set cpu bandwidth
quota");
>> - goto cleanup;
>> - }
>> + /*
>> + * Set cpu bandwidth for the vm if any of the following is true:
>> + * 1. we donot know VCPU<->PID mapping or all vcpus run in the same
thread
>> + * 2. the hypervisor threads are not limited(quota <= 0)
>> + */
>> + if ((period || quota) &&
>> + qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) {
>> + /* Ensure that we can multiply by vcpus without overflowing. */
>> + if (quota > LLONG_MAX / vm->def->vcpus) {
>> + virReportSystemError(EINVAL,
>> + _("%s"),
>> + "Unable to set cpu bandwidth
quota");
>> + goto cleanup;
>> + }
>>
>> + if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid ||
>> + vm->def->cputune.hypervisor_quota <= 0) {
>> if (quota > 0)
>> vm_quota = quota * vm->def->vcpus;
>> else
>> @@ -514,7 +520,7 @@ int qemuSetupCgroupForVcpu(struct qemud_driver *driver,
virDomainObjPtr vm)
>> }
>>
>> if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) {
>> - /* If we does not know VCPU<->PID mapping or all vcpu runs in the
same
>> + /* If we donot know VCPU<->PID mapping or all vcpus run in the
same
>> * thread, we cannot control each vcpu.
>> */
>> virCgroupFree(&cgroup);
>> @@ -570,6 +576,8 @@ int qemuSetupCgroupForHypervisor(struct qemud_driver
*driver,
>> virCgroupPtr cgroup = NULL;
>> virCgroupPtr cgroup_hypervisor = NULL;
>> qemuDomainObjPrivatePtr priv = vm->privateData;
>> + unsigned long long period = vm->def->cputune.hypervisor_period;
>> + long long quota = vm->def->cputune.hypervisor_quota;
>> int rc;
>>
>> if (driver->cgroup == NULL)
>> @@ -608,6 +616,13 @@ int qemuSetupCgroupForHypervisor(struct qemud_driver
*driver,
>> goto cleanup;
>> }
>>
>> + if (period || quota) {
>> + if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) {
>> + if (qemuSetupCgroupVcpuBW(cgroup_hypervisor, period, quota) < 0)
>
> Actually, qemuSetupCgroupVcpuBW just changes the cgroup's
> settings(period, quota), it doesn't care about what the
> cgroup is associated with. So can we change the name to
> qemuSetupCgroupBW?
>
>> + goto cleanup;
>> + }
>> + }
>> +
>> virCgroupFree(&cgroup_hypervisor);
>> virCgroupFree(&cgroup);
>> return 0;
>> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
>> index c3555ca..2e40aee 100644
>> --- a/src/qemu/qemu_driver.c
>> +++ b/src/qemu/qemu_driver.c
>> @@ -7007,6 +7007,7 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr
cgroup,
>> int rc;
>> long long vm_quota = 0;
>> long long old_quota = 0;
>> + long long hypervisor_quota = vm->def->cputune.hypervisor_quota;
>> unsigned long long old_period = 0;
>>
>> if (period == 0 && quota == 0)
>> @@ -7039,6 +7040,16 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr
cgroup,
>> goto cleanup;
>> }
>>
>> + /* If we donot know VCPU<->PID mapping or all vcpu runs in the same
>> + * thread, we cannot control each vcpu. So we only modify cpu bandwidth
>> + * for the vm.
>> + */
>> + if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) {
>> + if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0)
>> + goto cleanup;
>> + return 0;
>> + }
>> +
>> /*
>> * If quota will be changed to a small value, we should modify vcpu's
quota
>> * first. Otherwise, we should modify vm's quota first.
>> @@ -7048,8 +7059,12 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr
cgroup,
>> *
>> * If both quota and period will be changed to a big/small value, we cannot
>> * modify period and quota together.
>> + *
>> + * If the hypervisor threads are limited, we can update period for vm
first,
>> + * and then we can modify period and quota for the vcpu together even if
>> + * they will be changed to a big/small value.
>> */
>> - if ((quota != 0) && (period != 0)) {
>> + if (hypervisor_quota <= 0 && quota != 0 && period != 0)
{
>> if (((quota > old_quota) && (period > old_period)) ||
>> ((quota < old_quota) && (period < old_period))) {
>> /* modify period */
>> @@ -7063,40 +7078,44 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr
cgroup,
>> }
>> }
>>
>> - if (((vm_quota != 0) && (vm_quota > old_quota)) ||
>> - ((period != 0) && (period < old_period)))
>> - /* Set cpu bandwidth for the vm */
>> - if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0)
>> - goto cleanup;
>> -
>> - /* If we does not know VCPU<->PID mapping or all vcpu runs in the
same
>> - * thread, we cannot control each vcpu. So we only modify cpu bandwidth
>> - * when each vcpu has a separated thread.
>> + /*
>> + * If the hypervisor threads are not limited, set cpu bandwidth for the
>> + * vm.
>> */
>> - if (priv->nvcpupids != 0 && priv->vcpupids[0] != vm->pid)
{
>> - for (i = 0; i < priv->nvcpupids; i++) {
>> - rc = virCgroupForVcpu(cgroup, i, &cgroup_vcpu, 0);
>> - if (rc < 0) {
>> - virReportSystemError(-rc,
>> - _("Unable to find vcpu cgroup for
%s(vcpu:"
>> - " %d)"),
>> - vm->def->name, i);
>> - goto cleanup;
>> - }
>> -
>> - if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0)
>> + if (vm->def->cputune.hypervisor_quota <= 0) {
>> + if (((vm_quota != 0) && (vm_quota > old_quota)) ||
>> + ((period != 0) && (period < old_period)))
>> + if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0)
>> goto cleanup;
>> + }
>>
>> - virCgroupFree(&cgroup_vcpu);
>> + /* each vcpu has a separated thread, so we modify cpu bandwidth for vcpu.
*/
>> + for (i = 0; i < priv->nvcpupids; i++) {
>> + rc = virCgroupForVcpu(cgroup, i, &cgroup_vcpu, 0);
>> + if (rc < 0) {
>> + virReportSystemError(-rc,
>> + _("Unable to find vcpu cgroup for
%s(vcpu:"
>> + " %d)"),
>> + vm->def->name, i);
>> + goto cleanup;
>> }
>> - }
>>
>> - if (((vm_quota != 0) && (vm_quota <= old_quota)) ||
>> - ((period != 0) && (period >= old_period)))
>> - /* Set cpu bandwidth for the vm */
>> - if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0)
>> + if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0)
>> goto cleanup;
>>
>> + virCgroupFree(&cgroup_vcpu);
>> + }
>> +
>> + /*
>> + * If the hypervisor threads are not limited, set cpu bandwidth for the vm.
>> + */
>
> This makes me confused. Why do we have to limit the whole vm when the
> hypervisor threads are not limited? Users may want to limit the vcpus
> only.
>
> We have 3 situations now:
>
> 1. limit the vcpus only.
> 2. limit the hypervisor but not vcpus.
> 3. limit the whole vm.
Before this patch, we limit the whole vm when we limit the vcpus
because we donot want to the hypervisor threads eat too much cpu
time.
So, if we donot limit the hypervisor, the behavior shoule not be
changed. So we should limit the whole vm. If the hypervisor threads
are limited, there is no need to limit the whole vm.
need to tune hypervisor quota to limit vcpu-only quota ?
Sounds strange to me.
Thanks,
-Kame