At 05/17/2012 12:46 PM, KAMEZAWA Hiroyuki Wrote:
(2012/05/17 12:54), Wen Congyang wrote:
> At 05/17/2012 11:13 AM, Hu Tao Wrote:
>> On Wed, Apr 25, 2012 at 05:46:06PM +0800, Wen Congyang wrote:
>>> set hypervisor's period and quota when the vm starts. It will affect to
set
>>> vm's period and quota: donot set vm's period and quota if we limit
hypevisor
>>> thread's bandwidth(hypervisor_quota > 0).
>>> ---
>>> src/conf/domain_conf.c | 25 ++++++++++++++-
>>> src/conf/domain_conf.h | 2 +
>>> src/qemu/qemu_cgroup.c | 37 ++++++++++++++++-------
>>> src/qemu/qemu_driver.c | 75
++++++++++++++++++++++++++++++------------------
>>> 4 files changed, 98 insertions(+), 41 deletions(-)
>>>
>>> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
>>> index 5ab052a..28a6436 100644
>>> --- a/src/conf/domain_conf.c
>>> +++ b/src/conf/domain_conf.c
>>> @@ -7931,6 +7931,14 @@ static virDomainDefPtr virDomainDefParseXML(virCapsPtr
caps,
>>> &def->cputune.quota) < 0)
>>> def->cputune.quota = 0;
>>>
>>> + if
(virXPathULongLong("string(./cputune/hypervisor_period[1])", ctxt,
>>> + &def->cputune.hypervisor_period) < 0)
>>> + def->cputune.hypervisor_period = 0;
>>> +
>>> + if (virXPathLongLong("string(./cputune/hypervisor_quota[1])",
ctxt,
>>> + &def->cputune.hypervisor_quota) < 0)
>>> + def->cputune.hypervisor_quota = 0;
>>> +
>>> if ((n = virXPathNodeSet("./cputune/vcpupin", ctxt,
&nodes)) < 0) {
>>> goto error;
>>> }
>>> @@ -12406,7 +12414,8 @@ virDomainDefFormatInternal(virDomainDefPtr def,
>>> virBufferAsprintf(buf, ">%u</vcpu>\n",
def->maxvcpus);
>>>
>>> if (def->cputune.shares || def->cputune.vcpupin ||
>>> - def->cputune.period || def->cputune.quota)
>>> + def->cputune.period || def->cputune.quota ||
>>> + def->cputune.hypervisor_period ||
def->cputune.hypervisor_quota)
>>> virBufferAddLit(buf, " <cputune>\n");
>>>
>>> if (def->cputune.shares)
>>> @@ -12418,6 +12427,17 @@ virDomainDefFormatInternal(virDomainDefPtr def,
>>> if (def->cputune.quota)
>>> virBufferAsprintf(buf, "
<quota>%lld</quota>\n",
>>> def->cputune.quota);
>>> +
>>> + if (def->cputune.hypervisor_period)
>>> + virBufferAsprintf(buf, "
<hypervisor_period>%llu"
>>> + "</hypervisor_period>\n",
>>> + def->cputune.hypervisor_period);
>>> +
>>> + if (def->cputune.hypervisor_period)
>>> + virBufferAsprintf(buf, " <hypervisor_quota>%lld"
>>> + "</hypervisor_quota>\n",
>>> + def->cputune.hypervisor_quota);
>>> +
>>> if (def->cputune.vcpupin) {
>>> for (i = 0; i < def->cputune.nvcpupin; i++) {
>>> virBufferAsprintf(buf, " <vcpupin vcpu='%u'
",
>>> @@ -12439,7 +12459,8 @@ virDomainDefFormatInternal(virDomainDefPtr def,
>>> }
>>>
>>> if (def->cputune.shares || def->cputune.vcpupin ||
>>> - def->cputune.period || def->cputune.quota)
>>> + def->cputune.period || def->cputune.quota ||
>>> + def->cputune.hypervisor_period ||
def->cputune.hypervisor_quota)
>>> virBufferAddLit(buf, " </cputune>\n");
>>>
>>> if (def->numatune.memory.nodemask) {
>>> diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
>>> index 0eed60e..2a37e26 100644
>>> --- a/src/conf/domain_conf.h
>>> +++ b/src/conf/domain_conf.h
>>> @@ -1558,6 +1558,8 @@ struct _virDomainDef {
>>> unsigned long shares;
>>> unsigned long long period;
>>> long long quota;
>>> + unsigned long long hypervisor_period;
>>> + long long hypervisor_quota;
>>> int nvcpupin;
>>> virDomainVcpuPinDefPtr *vcpupin;
>>> } cputune;
>>> diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
>>> index 727c0d4..7c6ef33 100644
>>> --- a/src/qemu/qemu_cgroup.c
>>> +++ b/src/qemu/qemu_cgroup.c
>>> @@ -493,17 +493,23 @@ int qemuSetupCgroupForVcpu(struct qemud_driver *driver,
virDomainObjPtr vm)
>>> goto cleanup;
>>> }
>>
>> Check if cgroup CPU is active here:
>>
>> if (!qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) {
>> virReportSystemError(EINVAL,
>> _("%s"),
>> "Cgroup CPU is not active.");
>> goto cleanup;
>> }
>>
>> and remove all following checks in this function.
>>
>>>
>>> - /* Set cpu bandwidth for the vm */
>>> - if (period || quota) {
>>> - if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU))
{
>>> - /* Ensure that we can multiply by vcpus without overflowing. */
>>> - if (quota > LLONG_MAX / vm->def->vcpus) {
>>> - virReportSystemError(EINVAL,
>>> - _("%s"),
>>> - "Unable to set cpu bandwidth
quota");
>>> - goto cleanup;
>>> - }
>>> + /*
>>> + * Set cpu bandwidth for the vm if any of the following is true:
>>> + * 1. we donot know VCPU<->PID mapping or all vcpus run in the
same thread
>>> + * 2. the hypervisor threads are not limited(quota <= 0)
>>> + */
>>> + if ((period || quota) &&
>>> + qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) {
>>> + /* Ensure that we can multiply by vcpus without overflowing. */
>>> + if (quota > LLONG_MAX / vm->def->vcpus) {
>>> + virReportSystemError(EINVAL,
>>> + _("%s"),
>>> + "Unable to set cpu bandwidth
quota");
>>> + goto cleanup;
>>> + }
>>>
>>> + if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid
||
>>> + vm->def->cputune.hypervisor_quota <= 0) {
>>> if (quota > 0)
>>> vm_quota = quota * vm->def->vcpus;
>>> else
>>> @@ -514,7 +520,7 @@ int qemuSetupCgroupForVcpu(struct qemud_driver *driver,
virDomainObjPtr vm)
>>> }
>>>
>>> if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) {
>>> - /* If we does not know VCPU<->PID mapping or all vcpu runs in
the same
>>> + /* If we donot know VCPU<->PID mapping or all vcpus run in the
same
>>> * thread, we cannot control each vcpu.
>>> */
>>> virCgroupFree(&cgroup);
>>> @@ -570,6 +576,8 @@ int qemuSetupCgroupForHypervisor(struct qemud_driver
*driver,
>>> virCgroupPtr cgroup = NULL;
>>> virCgroupPtr cgroup_hypervisor = NULL;
>>> qemuDomainObjPrivatePtr priv = vm->privateData;
>>> + unsigned long long period = vm->def->cputune.hypervisor_period;
>>> + long long quota = vm->def->cputune.hypervisor_quota;
>>> int rc;
>>>
>>> if (driver->cgroup == NULL)
>>> @@ -608,6 +616,13 @@ int qemuSetupCgroupForHypervisor(struct qemud_driver
*driver,
>>> goto cleanup;
>>> }
>>>
>>> + if (period || quota) {
>>> + if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU))
{
>>> + if (qemuSetupCgroupVcpuBW(cgroup_hypervisor, period, quota) <
0)
>>
>> Actually, qemuSetupCgroupVcpuBW just changes the cgroup's
>> settings(period, quota), it doesn't care about what the
>> cgroup is associated with. So can we change the name to
>> qemuSetupCgroupBW?
>>
>>> + goto cleanup;
>>> + }
>>> + }
>>> +
>>> virCgroupFree(&cgroup_hypervisor);
>>> virCgroupFree(&cgroup);
>>> return 0;
>>> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
>>> index c3555ca..2e40aee 100644
>>> --- a/src/qemu/qemu_driver.c
>>> +++ b/src/qemu/qemu_driver.c
>>> @@ -7007,6 +7007,7 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr
cgroup,
>>> int rc;
>>> long long vm_quota = 0;
>>> long long old_quota = 0;
>>> + long long hypervisor_quota = vm->def->cputune.hypervisor_quota;
>>> unsigned long long old_period = 0;
>>>
>>> if (period == 0 && quota == 0)
>>> @@ -7039,6 +7040,16 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr
cgroup,
>>> goto cleanup;
>>> }
>>>
>>> + /* If we donot know VCPU<->PID mapping or all vcpu runs in the
same
>>> + * thread, we cannot control each vcpu. So we only modify cpu bandwidth
>>> + * for the vm.
>>> + */
>>> + if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) {
>>> + if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0)
>>> + goto cleanup;
>>> + return 0;
>>> + }
>>> +
>>> /*
>>> * If quota will be changed to a small value, we should modify
vcpu's quota
>>> * first. Otherwise, we should modify vm's quota first.
>>> @@ -7048,8 +7059,12 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr
cgroup,
>>> *
>>> * If both quota and period will be changed to a big/small value, we
cannot
>>> * modify period and quota together.
>>> + *
>>> + * If the hypervisor threads are limited, we can update period for vm
first,
>>> + * and then we can modify period and quota for the vcpu together even
if
>>> + * they will be changed to a big/small value.
>>> */
>>> - if ((quota != 0) && (period != 0)) {
>>> + if (hypervisor_quota <= 0 && quota != 0 && period !=
0) {
>>> if (((quota > old_quota) && (period > old_period)) ||
>>> ((quota < old_quota) && (period < old_period))) {
>>> /* modify period */
>>> @@ -7063,40 +7078,44 @@ qemuSetVcpusBWLive(virDomainObjPtr vm, virCgroupPtr
cgroup,
>>> }
>>> }
>>>
>>> - if (((vm_quota != 0) && (vm_quota > old_quota)) ||
>>> - ((period != 0) && (period < old_period)))
>>> - /* Set cpu bandwidth for the vm */
>>> - if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0)
>>> - goto cleanup;
>>> -
>>> - /* If we does not know VCPU<->PID mapping or all vcpu runs in the
same
>>> - * thread, we cannot control each vcpu. So we only modify cpu bandwidth
>>> - * when each vcpu has a separated thread.
>>> + /*
>>> + * If the hypervisor threads are not limited, set cpu bandwidth for the
>>> + * vm.
>>> */
>>> - if (priv->nvcpupids != 0 && priv->vcpupids[0] !=
vm->pid) {
>>> - for (i = 0; i < priv->nvcpupids; i++) {
>>> - rc = virCgroupForVcpu(cgroup, i, &cgroup_vcpu, 0);
>>> - if (rc < 0) {
>>> - virReportSystemError(-rc,
>>> - _("Unable to find vcpu cgroup for
%s(vcpu:"
>>> - " %d)"),
>>> - vm->def->name, i);
>>> - goto cleanup;
>>> - }
>>> -
>>> - if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0)
>>> + if (vm->def->cputune.hypervisor_quota <= 0) {
>>> + if (((vm_quota != 0) && (vm_quota > old_quota)) ||
>>> + ((period != 0) && (period < old_period)))
>>> + if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0)
>>> goto cleanup;
>>> + }
>>>
>>> - virCgroupFree(&cgroup_vcpu);
>>> + /* each vcpu has a separated thread, so we modify cpu bandwidth for
vcpu. */
>>> + for (i = 0; i < priv->nvcpupids; i++) {
>>> + rc = virCgroupForVcpu(cgroup, i, &cgroup_vcpu, 0);
>>> + if (rc < 0) {
>>> + virReportSystemError(-rc,
>>> + _("Unable to find vcpu cgroup for
%s(vcpu:"
>>> + " %d)"),
>>> + vm->def->name, i);
>>> + goto cleanup;
>>> }
>>> - }
>>>
>>> - if (((vm_quota != 0) && (vm_quota <= old_quota)) ||
>>> - ((period != 0) && (period >= old_period)))
>>> - /* Set cpu bandwidth for the vm */
>>> - if (qemuSetupCgroupVcpuBW(cgroup, period, vm_quota) < 0)
>>> + if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0)
>>> goto cleanup;
>>>
>>> + virCgroupFree(&cgroup_vcpu);
>>> + }
>>> +
>>> + /*
>>> + * If the hypervisor threads are not limited, set cpu bandwidth for the
vm.
>>> + */
>>
>> This makes me confused. Why do we have to limit the whole vm when the
>> hypervisor threads are not limited? Users may want to limit the vcpus
>> only.
>>
>> We have 3 situations now:
>>
>> 1. limit the vcpus only.
>> 2. limit the hypervisor but not vcpus.
>> 3. limit the whole vm.
>
> Before this patch, we limit the whole vm when we limit the vcpus
> because we donot want to the hypervisor threads eat too much cpu
> time.
>
> So, if we donot limit the hypervisor, the behavior shoule not be
> changed. So we should limit the whole vm. If the hypervisor threads
> are limited, there is no need to limit the whole vm.
>
need to tune hypervisor quota to limit vcpu-only quota ?
Sounds strange to me.
No, current implemetion is:
1. limit vcpu and hypervisor: vm is not limited
2. limit vcpu only: vm is limited
3. limit hypervisor: vm is not limited
4. vcpu and hypervisor are not limited: vm is not limited.
Thanks
Wen Congyang
Thanks,
-Kame