From: Hu Tao <hutao(a)cn.fujitsu.com>
Implement setting/getting per-device blkio weights in qemu,
using the cgroups blkio.weight_device tunable.
---
v5: did not include this patch
v6: split v4 2/2 into two parts; this part covers just the qemu.
Rebase to accomodate 'device_weight' naming and VIR_TYPED_PARAM_STRING
handling.
This patch is broken, with several flaws:
1. 'virsh blkiotune dom --device-weight /dev/sda,500 --live' does not
alter the 'virsh dumpxml dom' live XML. The cgroups change is
active, but unless we also reflect the change in the eventual xml dump,
then we aren't accurately telling the user the current state of the
domain. This means that qemuDomainSetBlkioParameters needs a fix.
2. 'virsh blkiotune dom --live' now shows output that looks like:
weight : 500
device_weight : 8,0 500
whereas 'virsh blkiotune dom --config' shows:
weight : 500
device_weight : /dev/sda,500
The --config version is correct - it accurately reflects the XML.
The --live version is wrong - we MUST NOT expose major,minor
back to the user, since we intentionally decided that the XML
should track only the device name, not the major/minor implementation
detail of cgroups. I think the correct fix would be to just delete
virCgroupGetBlkioDeviceWeight and treat cgroups blkio.weight_device
as write-only, and instead have qemuDomainGetBlkioParameters always
produce output from the domain definition (--live vs. --config merely
choosing between vm->def vs. persistentDef), which stems back to my
solution of point 1 in having in vm->def accurately reflect whatever
is written into cgroups.
src/qemu/qemu_cgroup.c | 22 ++++++
src/qemu/qemu_driver.c | 191 ++++++++++++++++++++++++++++++++++++++++++++++-
src/util/cgroup.c | 33 ++++++++
src/util/cgroup.h | 4 +
4 files changed, 245 insertions(+), 5 deletions(-)
diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
index 2a10bd2..d397578 100644
--- a/src/qemu/qemu_cgroup.c
+++ b/src/qemu/qemu_cgroup.c
@@ -312,6 +312,28 @@ int qemuSetupCgroup(struct qemud_driver *driver,
}
}
+ if (qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_BLKIO)) {
+ char *tmp;
+ if (virBlkioDeviceWeightToStr(vm->def->blkio.devices,
+ vm->def->blkio.ndevices,
+ &tmp) < 0) {
+ qemuReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Unable to set io device weight for domain %s"),
+ vm->def->name);
+ goto cleanup;
+ }
+ if (tmp) {
+ rc = virCgroupSetBlkioDeviceWeight(cgroup, tmp);
+ VIR_FREE(tmp);
+ if (rc != 0) {
+ virReportSystemError(-rc,
+ _("Unable to set io device weight for domain
%s"),
+ vm->def->name);
+ goto cleanup;
+ }
+ }
+ }
+
if (vm->def->mem.hard_limit != 0 ||
vm->def->mem.soft_limit != 0 ||
vm->def->mem.swap_hard_limit != 0) {
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index c8a858e..6b0acf0 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -112,7 +112,7 @@
# define KVM_CAP_NR_VCPUS 9 /* returns max vcpus per vm */
#endif
-#define QEMU_NB_BLKIO_PARAM 1
+#define QEMU_NB_BLKIO_PARAM 2
static void processWatchdogEvent(void *data, void *opaque);
@@ -5888,6 +5888,82 @@ cleanup:
return ret;
}
+/* deviceWeightStr in the form of /device/path,weight;/device/path,weight
+ * for example, /dev/disk/by-path/pci-0000:00:1f.2-scsi-0:0:0:0,800
+ */
+static int
+parseBlkioWeightDeviceStr(char *deviceWeightStr,
+ virBlkioDeviceWeightPtr *dw, int *size)
+{
+ char *temp;
+ int ndevices = 0;
+ int i;
+ virBlkioDeviceWeightPtr result = NULL;
+
+ temp = deviceWeightStr;
+ while (temp) {
+ temp = strchr(temp, ';');
+ if (temp) {
+ temp++;
+ if (*temp == '\0')
+ break;
+ ndevices++;
+ }
+ }
+ ndevices++;
+
+ if (VIR_ALLOC_N(result, ndevices) < 0) {
+ virReportOOMError();
+ return -1;
+ }
+
+ i = 0;
+ temp = deviceWeightStr;
+ while (temp && i < ndevices) {
+ char *p = temp;
+
+ /* device path */
+
+ p = strchr(p, ',');
+ if (!p)
+ goto error;
+
+ result[i].path = strndup(temp, p - temp);
+ if (!result[i].path) {
+ virReportOOMError();
+ goto cleanup;
+ }
+
+ /* weight */
+ temp = p + 1;
+
+ if (virStrToLong_i(temp, &p, 10, &result[i].weight) < 0)
+ goto error;
+
+ i++;
+ if (*p == '\0')
+ break;
+ else if (*p != ';')
+ goto error;
+ temp = p + 1;
+ }
+ if (i != ndevices)
+ goto error;
+
+ *dw = result;
+ *size = i;
+
+ return 0;
+
+error:
+ qemuReportError(VIR_ERR_INVALID_ARG,
+ _("unable to parse %s"), deviceWeightStr);
+cleanup:
+ virBlkioDeviceWeightArrayClear(result, ndevices);
+ VIR_FREE(result);
+ return -1;
+}
+
static int qemuDomainSetBlkioParameters(virDomainPtr dom,
virTypedParameterPtr params,
int nparams,
@@ -5954,10 +6030,10 @@ static int qemuDomainSetBlkioParameters(virDomainPtr dom,
ret = 0;
if (flags & VIR_DOMAIN_AFFECT_LIVE) {
for (i = 0; i < nparams; i++) {
+ int rc;
virTypedParameterPtr param = ¶ms[i];
if (STREQ(param->field, VIR_DOMAIN_BLKIO_WEIGHT)) {
- int rc;
if (param->type != VIR_TYPED_PARAM_UINT) {
qemuReportError(VIR_ERR_INVALID_ARG, "%s",
_("invalid type for blkio weight tunable,
expected a 'unsigned int'"));
@@ -5978,6 +6054,45 @@ static int qemuDomainSetBlkioParameters(virDomainPtr dom,
_("unable to set blkio weight
tunable"));
ret = -1;
}
+ } else if (STREQ(param->field, VIR_DOMAIN_BLKIO_DEVICE_WEIGHT)) {
+ int ndevices;
+ virBlkioDeviceWeightPtr devices = NULL;
+ char *tmp;
+ if (param->type != VIR_TYPED_PARAM_STRING) {
+ qemuReportError(VIR_ERR_INVALID_ARG, "%s",
+ _("invalid type for device_weight tunable,
expected a 'char *'"));
+ ret = -1;
+ continue;
+ }
+
+ if (parseBlkioWeightDeviceStr(params[i].value.s,
+ &devices,
+ &ndevices) < 0) {
+ ret = -1;
+ continue;
+ }
+ if (virBlkioDeviceWeightToStr(devices,
+ ndevices,
+ &tmp) < 0) {
+ qemuReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Unable to set blkio weight_device
tunable"));
+ virBlkioDeviceWeightArrayClear(devices, ndevices);
+ VIR_FREE(devices);
+ ret = -1;
+ continue;
+ }
+ virBlkioDeviceWeightArrayClear(devices, ndevices);
+ VIR_FREE(devices);
+ if (tmp) {
+ rc = virCgroupSetBlkioDeviceWeight(group, tmp);
+ VIR_FREE(tmp);
+ if (rc != 0) {
+ virReportSystemError(-rc, "%s",
+ _("unable to set blkio
weight_device tunable"));
+ ret = -1;
+ continue;
+ }
+ }
} else {
qemuReportError(VIR_ERR_INVALID_ARG,
_("Parameter `%s' not supported"),
param->field);
@@ -6007,9 +6122,24 @@ static int qemuDomainSetBlkioParameters(virDomainPtr dom,
}
persistentDef->blkio.weight = params[i].value.ui;
+ } else if (STREQ(param->field, VIR_DOMAIN_BLKIO_DEVICE_WEIGHT)) {
+ virBlkioDeviceWeightPtr devices = NULL;
+ int ndevices;
+ if (parseBlkioWeightDeviceStr(params[i].value.s,
+ &devices,
+ &ndevices) < 0) {
+ ret = -1;
+ continue;
+ }
+ virBlkioDeviceWeightArrayClear(persistentDef->blkio.devices,
+ persistentDef->blkio.ndevices);
+ VIR_FREE(persistentDef->blkio.devices);
+ persistentDef->blkio.devices = devices;
+ persistentDef->blkio.ndevices = ndevices;
} else {
qemuReportError(VIR_ERR_INVALID_ARG,
- _("Parameter `%s' not supported"),
param->field);
+ _("Parameter `%s' not supported"),
+ param->field);
ret = -1;
}
}
@@ -6032,7 +6162,7 @@ static int qemuDomainGetBlkioParameters(virDomainPtr dom,
unsigned int flags)
{
struct qemud_driver *driver = dom->conn->privateData;
- int i;
+ int i, j;
virCgroupPtr group = NULL;
virDomainObjPtr vm = NULL;
virDomainDefPtr persistentDef = NULL;
@@ -6046,7 +6176,8 @@ static int qemuDomainGetBlkioParameters(virDomainPtr dom,
VIR_TYPED_PARAM_STRING_OKAY, -1);
qemuDriverLock(driver);
- /* We don't return strings, and thus trivially support this flag. */
+ /* We blindly return a string, and let libvirt.c do the filtering
+ * on behalf of older clients that can't parse it. */
flags &= ~VIR_TYPED_PARAM_STRING_OKAY;
vm = virDomainFindByUUID(&driver->domains, dom->uuid);
@@ -6104,6 +6235,7 @@ static int qemuDomainGetBlkioParameters(virDomainPtr dom,
if (flags & VIR_DOMAIN_AFFECT_LIVE) {
for (i = 0; i < *nparams && i < QEMU_NB_BLKIO_PARAM; i++) {
+ char *device_weights;
virTypedParameterPtr param = ¶ms[i];
val = 0;
param->value.ui = 0;
@@ -6125,6 +6257,23 @@ static int qemuDomainGetBlkioParameters(virDomainPtr dom,
}
param->value.ui = val;
break;
+ case 1: /* blkiotune.device_weight */
+ param->type = VIR_TYPED_PARAM_STRING;
+ rc = virCgroupGetBlkioDeviceWeight(group, &device_weights);
+ if (rc != 0) {
+ virReportSystemError(-rc, "%s",
+ _("unable to get blkio device
weights"));
+ goto cleanup;
+ }
+ if (virStrcpyStatic(param->field,
+ VIR_DOMAIN_BLKIO_DEVICE_WEIGHT) == NULL) {
+ qemuReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Field name '%s' too long"),
+ VIR_DOMAIN_BLKIO_DEVICE_WEIGHT);
+ goto cleanup;
+ }
+ param->value.s = device_weights;
+ break;
default:
break;
@@ -6149,6 +6298,38 @@ static int qemuDomainGetBlkioParameters(virDomainPtr dom,
param->value.ui = persistentDef->blkio.weight;
break;
+ case 1: /* blkiotune.device_weight */
+ if (persistentDef->blkio.ndevices > 0) {
+ virBuffer buf = VIR_BUFFER_INITIALIZER;
+ for (j = 0; j < persistentDef->blkio.ndevices; j++) {
+ if (j)
+ virBufferAddChar(&buf, ';');
+ virBufferAsprintf(&buf, "%s,%d",
+ persistentDef->blkio.devices[j].path,
+ persistentDef->blkio.devices[j].weight);
+ }
+ if (virBufferError(&buf)) {
+ virReportOOMError();
+ goto cleanup;
+ }
+ param->value.s = virBufferContentAndReset(&buf);
+ } else {
+ param->value.s = strdup("");
+ if (!param->value.s) {
+ virReportOOMError();
+ goto cleanup;
+ }
+ }
+ param->type = VIR_TYPED_PARAM_STRING;
+ if (virStrcpyStatic(param->field,
+ VIR_DOMAIN_BLKIO_DEVICE_WEIGHT) == NULL) {
+ qemuReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Field name '%s' too long"),
+ VIR_DOMAIN_BLKIO_DEVICE_WEIGHT);
+ goto cleanup;
+ }
+ break;
+
default:
break;
/* should not hit here */
diff --git a/src/util/cgroup.c b/src/util/cgroup.c
index c8d1f33..458bf91 100644
--- a/src/util/cgroup.c
+++ b/src/util/cgroup.c
@@ -982,6 +982,39 @@ int virCgroupGetBlkioWeight(virCgroupPtr group, unsigned int
*weight)
}
/**
+ * virCgroupSetBlkioDeviceWeight:
+ *
+ * @group: The cgroup to change io device weight device for
+ * @device_weight: The device weight for this cgroup
+ *
+ * Returns: 0 on success
+ */
+int virCgroupSetBlkioDeviceWeight(virCgroupPtr group,
+ const char *device_weight)
+{
+ return virCgroupSetValueStr(group,
+ VIR_CGROUP_CONTROLLER_BLKIO,
+ "blkio.weight_device",
+ device_weight);
+}
+
+/**
+ * virCgroupGetBlkioDeviceWeight:
+ *
+ * @group: The cgroup to get weight_device for
+ * @device_weight: returned device_weight string
+ *
+ * Returns: 0 on success
+ */
+int virCgroupGetBlkioDeviceWeight(virCgroupPtr group,
+ char **device_weight)
+{
+ return virCgroupGetValueStr(group,
+ VIR_CGROUP_CONTROLLER_BLKIO,
+ "blkio.weight_device", device_weight);
+}
+
+/**
* virCgroupSetMemory:
*
* @group: The cgroup to change memory for
diff --git a/src/util/cgroup.h b/src/util/cgroup.h
index d190bb3..2eb7307 100644
--- a/src/util/cgroup.h
+++ b/src/util/cgroup.h
@@ -55,6 +55,10 @@ int virCgroupAddTask(virCgroupPtr group, pid_t pid);
int virCgroupSetBlkioWeight(virCgroupPtr group, unsigned int weight);
int virCgroupGetBlkioWeight(virCgroupPtr group, unsigned int *weight);
+int virCgroupSetBlkioDeviceWeight(virCgroupPtr group,
+ const char *device_weight);
+int virCgroupGetBlkioDeviceWeight(virCgroupPtr group, char **device_weight);
+
int virCgroupSetMemory(virCgroupPtr group, unsigned long long kb);
int virCgroupGetMemoryUsage(virCgroupPtr group, unsigned long *kb);
--
1.7.4.4