---
src/conf/domain_conf.c | 272 ++++++++++++++++++++++-
src/conf/domain_conf.h | 25 ++
src/libvirt_private.syms | 4 +
src/qemu/qemu_cgroup.c | 131 +++++++++++
src/qemu/qemu_cgroup.h | 2 +
src/qemu/qemu_process.c | 4 +
tests/qemuxml2argvdata/qemuxml2argv-cputune.xml | 2 +
7 files changed, 438 insertions(+), 2 deletions(-)
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 60e0318..0a1f973 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -997,6 +997,21 @@ virDomainVcpuPinDefFree(virDomainVcpuPinDefPtr *def,
VIR_FREE(def);
}
+static void
+virDomainVcpuBWDefFree(virDomainVcpuBWDefPtr *def,
+ int nvcpubw)
+{
+ int i;
+
+ if (!def || !nvcpubw)
+ return;
+
+ for(i = 0; i < nvcpubw; i++)
+ VIR_FREE(def[i]);
+
+ VIR_FREE(def);
+}
+
void virDomainDefFree(virDomainDefPtr def)
{
unsigned int i;
@@ -1089,6 +1104,9 @@ void virDomainDefFree(virDomainDefPtr def)
virCPUDefFree(def->cpu);
+ virDomainVcpuBWDefFree(def->cputune.vcpubw,
+ def->cputune.nvcpubw);
+
virDomainVcpuPinDefFree(def->cputune.vcpupin, def->cputune.nvcpupin);
VIR_FREE(def->numatune.memory.nodemask);
@@ -5715,6 +5733,62 @@ error:
goto cleanup;
}
+/* Parse the XML definition for a vcpubandwidth */
+static virDomainVcpuBWDefPtr
+virDomainVcpuBWDefParseXML(const xmlNodePtr node,
+ xmlXPathContextPtr ctxt,
+ int maxvcpus)
+{
+ virDomainVcpuBWDefPtr def;
+ xmlNodePtr oldnode = ctxt->node;
+ unsigned int vcpuid;
+ unsigned long long period;
+ long long quota;
+ int ret;
+
+ if (VIR_ALLOC(def) < 0) {
+ virReportOOMError();
+ return NULL;
+ }
+
+ ctxt->node = node;
+
+ ret = virXPathUInt("string(./@vcpu)", ctxt, &vcpuid);
+ if (ret == -2) {
+ virDomainReportError(VIR_ERR_INTERNAL_ERROR,
+ "%s", _("vcpu id must be an unsigned
integer"));
+ goto error;
+ } else if (ret == -1) {
+ virDomainReportError(VIR_ERR_INTERNAL_ERROR,
+ "%s", _("can't parse vcpupin
node"));
+ goto error;
+ }
+
+ if (vcpuid >= maxvcpus) {
+ virDomainReportError(VIR_ERR_INTERNAL_ERROR,
+ "%s", _("vcpu id must be less than
maxvcpus"));
+ goto error;
+ }
+
+ if (virXPathULongLong("string(./@period)", ctxt, &period) < 0)
+ period = 0;
+
+ if (virXPathLongLong("string(./@quota)", ctxt, "a) < 0)
+ quota = 0;
+
+ def->vcpuid = vcpuid;
+ def->period = period;
+ def->quota = quota;
+
+cleanup:
+ ctxt->node = oldnode;
+ return def;
+
+error:
+ VIR_FREE(def);
+ goto cleanup;
+}
+
static virDomainDefPtr virDomainDefParseXML(virCapsPtr caps,
xmlDocPtr xml,
@@ -5881,6 +5955,49 @@ static virDomainDefPtr virDomainDefParseXML(virCapsPtr caps,
&def->cputune.shares) < 0)
def->cputune.shares = 0;
+ if ((n = virXPathNodeSet("./cputune/bandwidth", ctxt, &nodes)) < 0)
+ goto error;
+
+ if (n > def->maxvcpus) {
+ virDomainReportError(VIR_ERR_INTERNAL_ERROR,
+ "%s", _("bandwith nodes must be less
than"
+ " maxvcpus"));
+ goto error;
+ }
+
+ if (n && VIR_ALLOC_N(def->cputune.vcpubw, n) < 0)
+ goto no_memory;
+
+ for (i = 0; i < n; i++) {
+ virDomainVcpuBWDefPtr vcpubw = NULL;
+ vcpubw = virDomainVcpuBWDefParseXML(nodes[i], ctxt, def->maxvcpus);
+
+ if (!vcpubw)
+ goto error;
+
+ if (virDomainVcpuBWIsDuplicate(def->cputune.vcpubw,
+ def->cputune.nvcpubw,
+ vcpubw->vcpuid)) {
+ virDomainReportError(VIR_ERR_INTERNAL_ERROR,
+ "%s", _("duplicate vcpubandwidth for
same"
+ " vcpu"));
+ VIR_FREE(vcpubw);
+ goto error;
+ }
+
+ if (vcpubw->period || vcpubw->quota)
+ def->cputune.vcpubw[def->cputune.nvcpubw++] = vcpubw;
+ else
+ VIR_FREE(vcpubw);
+ }
+ if (def->cputune.nvcpubw)
+ ignore_value(VIR_REALLOC_N(def->cputune.vcpubw,
+ def->cputune.nvcpubw));
+ else
+ VIR_FREE(def->cputune.vcpubw);
+
+ VIR_FREE(nodes);
+
if ((n = virXPathNodeSet("./cputune/vcpupin", ctxt, &nodes)) < 0) {
goto error;
}
@@ -8274,6 +8391,144 @@ virDomainVcpuPinDel(virDomainDefPtr def, int vcpu)
return 0;
}
+/* Check if vcpupin with same vcpuid already exists.
+ * Return 1 if exists, 0 if not. */
+int
+virDomainVcpuBWIsDuplicate(virDomainVcpuBWDefPtr *def,
+ int nvcpubw,
+ int vcpu)
+{
+ int i;
+
+ if (!def || !nvcpubw)
+ return 0;
+
+ for (i = 0; i < nvcpubw; i++) {
+ if (def[i]->vcpuid == vcpu)
+ return 1;
+ }
+
+ return 0;
+}
+
+virDomainVcpuBWDefPtr
+virDomainVcpuBWFindByVcpu(virDomainVcpuBWDefPtr *def,
+ int nvcpubw,
+ int vcpu)
+{
+ int i;
+
+ if (!def || !nvcpubw)
+ return NULL;
+
+ for (i = 0; i < nvcpubw; i++) {
+ if (def[i]->vcpuid == vcpu)
+ return def[i];
+ }
+
+ return NULL;
+}
+
+int
+virDomainVcpuBWAdd(virDomainDefPtr def,
+ unsigned long long period,
+ long long quota,
+ int vcpu)
+{
+ virDomainVcpuBWDefPtr *vcpubw_list = NULL;
+ virDomainVcpuBWDefPtr vcpubw = NULL;
+
+ /* No vcpubw exists yet. */
+ if (!def->cputune.nvcpubw) {
+ if (period == 0 && quota == 0)
+ return 0;
+
+ if (VIR_ALLOC(vcpubw) < 0)
+ goto no_memory;
+
+ if (VIR_ALLOC(vcpubw_list) < 0)
+ goto no_memory;
+
+ vcpubw->vcpuid = vcpu;
+ vcpubw->period = period;
+ vcpubw->quota = quota;
+ vcpubw_list[def->cputune.nvcpubw++] = vcpubw;
+
+ def->cputune.vcpubw = vcpubw_list;
+ } else {
+ int nvcpubw = def->cputune.nvcpubw;
+ vcpubw_list = def->cputune.vcpubw;
+ if (virDomainVcpuBWIsDuplicate(vcpubw_list, nvcpubw, vcpu)) {
+ vcpubw = virDomainVcpuBWFindByVcpu(vcpubw_list, nvcpubw, vcpu);
+ if (period == 0 && quota == 0) {
+ return virDomainVcpuBWDel(def, vcpu);
+ } else {
+ vcpubw->vcpuid = vcpu;
+ vcpubw->period = period;
+ vcpubw->quota = quota;
+ }
+ } else {
+ if (period == 0 && quota == 0)
+ return 0;
+
+ if (VIR_ALLOC(vcpubw) < 0)
+ goto no_memory;
+
+ if (VIR_REALLOC_N(vcpubw_list, nvcpubw + 1) < 0)
+ goto no_memory;
+
+ vcpubw->vcpuid = vcpu;
+ vcpubw->period = period;
+ vcpubw->quota = quota;
+ vcpubw_list[def->cputune.nvcpubw++] = vcpubw;
+ }
+ }
+
+ return 0;
+
+no_memory:
+ virReportOOMError();
+ VIR_FREE(vcpubw);
+ return -1;
+}
+
+int
+virDomainVcpuBWDel(virDomainDefPtr def, int vcpu)
+{
+ int n;
+ bool deleted = false;
+ virDomainVcpuBWDefPtr *vcpubw_list = def->cputune.vcpubw;
+
+ /* No vcpubw exists yet */
+ if (!def->cputune.nvcpubw)
+ return 0;
+
+ for (n = 0; n < def->cputune.nvcpubw; n++) {
+ if (vcpubw_list[n]->vcpuid == vcpu) {
+ VIR_FREE(vcpubw_list[n]);
+ memmove(&vcpubw_list[n], &vcpubw_list[n+1],
+ (def->cputune.nvcpubw - n - 1) *
+ sizeof(virDomainVcpuBWDefPtr));
+ deleted = true;
+ break;
+ }
+ }
+
+ if (!deleted)
+ return 0;
+
+ if (--def->cputune.nvcpubw == 0) {
+ VIR_FREE(def->cputune.vcpubw);
+ } else {
+ if (VIR_REALLOC_N(def->cputune.vcpubw,
+ def->cputune.nvcpubw) < 0) {
+ /* ignore, harmless */
+ }
+ }
+
+ return 0;
+}
+
static int
virDomainLifecycleDefFormat(virBufferPtr buf,
int type,
@@ -9553,12 +9808,24 @@ char *virDomainDefFormat(virDomainDefPtr def,
virBufferAsprintf(&buf, " current='%u'", def->vcpus);
virBufferAsprintf(&buf, ">%u</vcpu>\n", def->maxvcpus);
- if (def->cputune.shares || def->cputune.vcpupin)
+ if (def->cputune.shares || def->cputune.vcpupin ||
+ def->cputune.vcpubw)
virBufferAddLit(&buf, " <cputune>\n");
if (def->cputune.shares)
virBufferAsprintf(&buf, " <shares>%lu</shares>\n",
def->cputune.shares);
+ if (def->cputune.vcpubw) {
+ int i;
+ for (i = 0; i < def->cputune.nvcpubw; i++) {
+ virBufferAsprintf(&buf, " <bandwidth vcpu='%u'
",
+ def->cputune.vcpubw[i]->vcpuid);
+ virBufferAsprintf(&buf, "period='%llu' ",
+ def->cputune.vcpubw[i]->period);
+ virBufferAsprintf(&buf, "quota='%lld'/>\n",
+ def->cputune.vcpubw[i]->quota);
+ }
+ }
if (def->cputune.vcpupin) {
int i;
for (i = 0; i < def->cputune.nvcpupin; i++) {
@@ -9580,7 +9847,8 @@ char *virDomainDefFormat(virDomainDefPtr def,
}
}
- if (def->cputune.shares || def->cputune.vcpupin)
+ if (def->cputune.shares || def->cputune.vcpupin ||
+ def->cputune.vcpubw)
virBufferAddLit(&buf, " </cputune>\n");
if (def->numatune.memory.nodemask)
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index e81977c..a2929b5 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -1108,6 +1108,14 @@ struct _virDomainVcpuPinDef {
char *cpumask;
};
+typedef struct _virDomainVcpuBWDef virDomainVcpuBWDef;
+typedef virDomainVcpuBWDef *virDomainVcpuBWDefPtr;
+struct _virDomainVcpuBWDef {
+ int vcpuid;
+ unsigned long long period;
+ long long quota;
+};
+
int virDomainVcpuPinIsDuplicate(virDomainVcpuPinDefPtr *def,
int nvcpupin,
int vcpu);
@@ -1116,6 +1124,14 @@ virDomainVcpuPinDefPtr
virDomainVcpuPinFindByVcpu(virDomainVcpuPinDefPtr *def,
int nvcpupin,
int vcpu);
+int virDomainVcpuBWIsDuplicate(virDomainVcpuBWDefPtr *def,
+ int nvcpubw,
+ int vcpu);
+
+virDomainVcpuBWDefPtr virDomainVcpuBWFindByVcpu(virDomainVcpuBWDefPtr *def,
+ int nvcpubw,
+ int vcpu);
+
enum virDomainNumatuneMemMode {
VIR_DOMAIN_NUMATUNE_MEM_STRICT,
VIR_DOMAIN_NUMATUNE_MEM_PREFERRED,
@@ -1170,6 +1186,8 @@ struct _virDomainDef {
struct {
unsigned long shares;
+ int nvcpubw;
+ virDomainVcpuBWDefPtr *vcpubw;
int nvcpupin;
virDomainVcpuPinDefPtr *vcpupin;
} cputune;
@@ -1413,6 +1431,13 @@ int virDomainVcpuPinAdd(virDomainDefPtr def,
int virDomainVcpuPinDel(virDomainDefPtr def, int vcpu);
+int virDomainVcpuBWAdd(virDomainDefPtr def,
+ unsigned long long period,
+ long long quota,
+ int vcpu);
+
+int virDomainVcpuBWDel(virDomainDefPtr def, int vcpu);
+
int virDomainDiskIndexByName(virDomainDefPtr def, const char *name);
int virDomainDiskInsert(virDomainDefPtr def,
virDomainDiskDefPtr disk);
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 9b9b6ce..aad0c3a 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -371,6 +371,10 @@ virDomainTimerTickpolicyTypeFromString;
virDomainTimerTickpolicyTypeToString;
virDomainTimerTrackTypeFromString;
virDomainTimerTrackTypeToString;
+virDomainVcpuBWAdd;
+virDomainVcpuBWDel;
+virDomainVcpuBWFindByVcpu;
+virDomainVcpuBWIsDuplicate;
virDomainVcpuPinAdd;
virDomainVcpuPinDel;
virDomainVcpuPinFindByVcpu;
diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
index 1298924..201c0b8 100644
--- a/src/qemu/qemu_cgroup.c
+++ b/src/qemu/qemu_cgroup.c
@@ -24,6 +24,7 @@
#include <config.h>
#include "qemu_cgroup.h"
+#include "qemu_domain.h"
#include "cgroup.h"
#include "logging.h"
#include "memory.h"
@@ -376,6 +377,136 @@ cleanup:
return -1;
}
+int qemuSetupCgroupVcpuBW(virCgroupPtr cgroup, virDomainVcpuBWDefPtr vcpubw)
+{
+ int rc;
+ unsigned long long old_period;
+
+ if (!vcpubw)
+ return 0;
+
+ if (vcpubw->period == 0 && vcpubw->quota == 0)
+ return 0;
+
+ if (vcpubw->period) {
+ /* get old period, and we can rollback if set quota failed */
+ rc = virCgroupGetCpuCfsPeriod(cgroup, &old_period);
+ if (rc < 0) {
+ virReportSystemError(-rc,
+ _("%s"), "Unable to get cpu bandwidth
period");
+ return -1;
+ }
+
+ rc = virCgroupSetCpuCfsPeriod(cgroup, vcpubw->period);
+ if (rc < 0) {
+ virReportSystemError(-rc,
+ _("%s"), "Unable to set cpu bandwidth
period");
+ return -1;
+ }
+ }
+
+ if (vcpubw->quota) {
+ rc = virCgroupSetCpuCfsQuota(cgroup, vcpubw->quota);
+ if (rc < 0) {
+ virReportSystemError(-rc,
+ _("%s"), "Unable to set cpu bandwidth
quota");
+ goto cleanup;
+ }
+ }
+
+ return 0;
+
+cleanup:
+ if (vcpubw->period) {
+ rc = virCgroupSetCpuCfsPeriod(cgroup, old_period);
+ if (rc < 0)
+ virReportSystemError(-rc,
+ _("%s"),
+ "Unable to rollback cpu bandwidth period");
+ }
+
+ return -1;
+}
+
+int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm)
+{
+ virCgroupPtr cgroup = NULL;
+ virCgroupPtr cgroup_vcpu = NULL;
+ qemuDomainObjPrivatePtr priv = vm->privateData;
+ int rc;
+ unsigned int i;
+ virDomainVcpuBWDefPtr *vcpubw_list = vm->def->cputune.vcpubw;
+ virDomainVcpuBWDefPtr vcpubw = NULL;
+ int nvcpubw = vm->def->cputune.nvcpubw;
+
+ if (driver->cgroup == NULL)
+ return 0; /* Not supported, so claim success */
+
+ rc = virCgroupForDomain(driver->cgroup, vm->def->name, &cgroup, 0);
+ if (rc != 0) {
+ virReportSystemError(-rc,
+ _("Unable to find cgroup for %s"),
+ vm->def->name);
+ goto cleanup;
+ }
+
+ if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) {
+ /* If we does not know VCPU<->PID mapping or all vcpu runs in the same
+ * thread, we can not control each vcpu. So just use the last config.
+ */
+ if (vcpubw_list) {
+ if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) {
+ if (qemuSetupCgroupVcpuBW(cgroup, vcpubw_list[nvcpubw - 1]) < 0)
+ goto cleanup;
+ }
+ }
+ return 0;
+ }
+
+ for (i = 0; i < priv->nvcpupids; i++) {
+ rc = virCgroupForVcpu(cgroup, i, &cgroup_vcpu, 1);
+ if (rc < 0) {
+ virReportSystemError(-rc,
+ _("Unable to create vcpu cgroup for %s(vcpu:"
+ " %d)"),
+ vm->def->name, i);
+ goto cleanup;
+ }
+
+ /* move the thread for vcpu to sub dir */
+ rc = virCgroupAddTask(cgroup_vcpu, priv->vcpupids[i]);
+ if (rc < 0) {
+ virReportSystemError(-rc,
+ _("unable to add vcpu %d task %d to cgroup"),
+ i, priv->vcpupids[i]);
+ goto cleanup;
+ }
+
+ if (vcpubw_list) {
+ if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) {
+ vcpubw = virDomainVcpuBWFindByVcpu(vcpubw_list, nvcpubw, i);
+ if (qemuSetupCgroupVcpuBW(cgroup, vcpubw) < 0)
+ goto cleanup;
+ }
+ }
+
+ virCgroupFree(&cgroup_vcpu);
+ }
+
+ virCgroupFree(&cgroup_vcpu);
+ virCgroupFree(&cgroup);
+ return 0;
+
+cleanup:
+ virCgroupFree(&cgroup_vcpu);
+ if (cgroup) {
+ virCgroupRemove(cgroup);
+ virCgroupFree(&cgroup);
+ }
+
+ return -1;
+}
+
int qemuRemoveCgroup(struct qemud_driver *driver,
virDomainObjPtr vm,
diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h
index e8abfb4..f0a5cee 100644
--- a/src/qemu/qemu_cgroup.h
+++ b/src/qemu/qemu_cgroup.h
@@ -49,6 +49,8 @@ int qemuSetupHostUsbDeviceCgroup(usbDevice *dev,
void *opaque);
int qemuSetupCgroup(struct qemud_driver *driver,
virDomainObjPtr vm);
+int qemuSetupCgroupVcpuBW(virCgroupPtr cgroup, virDomainVcpuBWDefPtr vcpubw);
+int qemuSetupCgroupForVcpu(struct qemud_driver *driver, virDomainObjPtr vm);
int qemuRemoveCgroup(struct qemud_driver *driver,
virDomainObjPtr vm,
int quiet);
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 88a31a3..ce3a4bb 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -2677,6 +2677,10 @@ int qemuProcessStart(virConnectPtr conn,
if (qemuProcessDetectVcpuPIDs(driver, vm) < 0)
goto cleanup;
+ VIR_DEBUG("Setting cgroup for each VCPU(if required)");
+ if (qemuSetupCgroupForVcpu(driver, vm) < 0)
+ goto cleanup;
+
VIR_DEBUG("Setting VCPU affinities");
if (qemuProcessSetVcpuAffinites(conn, vm) < 0)
goto cleanup;
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml
b/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml
index 0afbadb..0a67e40 100644
--- a/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml
+++ b/tests/qemuxml2argvdata/qemuxml2argv-cputune.xml
@@ -6,6 +6,8 @@
<vcpu>2</vcpu>
<cputune>
<shares>2048</shares>
+ <bandwidth vcpu='0' period='1000000' quota='-1'/>
+ <bandwidth vcpu='1' period='1000' quota='1000'/>
<vcpupin vcpu='0' cpuset='0'/>
<vcpupin vcpu='1' cpuset='1'/>
</cputune>
--
1.7.1