[libvirt] [PATCH 0/5] Support Blkio tune, CPU tune, NUMA, CPU affinity in LXC

The following patch series brings the LXC driver upto parity with the QEMU driver in terms of support for various performance tuning controls

From: "Daniel P. Berrange" <berrange@redhat.com> To make lxcSetContainerResources smaller, pull the mem tune I/O tune, CPU tune, and device ACL setup code out into separate methods * src/lxc/lxc_controller.c: Split up lxcSetContainerResources --- src/lxc/lxc_controller.c | 154 +++++++++++++++++++++++++++++++--------------- 1 files changed, 104 insertions(+), 50 deletions(-) diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index 137ef52..d3c3b61 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -223,70 +223,49 @@ cleanup: return ret; } -/** - * lxcSetContainerResources - * @def: pointer to virtual machine structure - * - * Creates a cgroup for the container, moves the task inside, - * and sets resource limits - * - * Returns 0 on success or -1 in case of error - */ -static int lxcSetContainerResources(virDomainDefPtr def) +static int lxcSetContainerCpuTune(virCgroupPtr cgroup, virDomainDefPtr def) { - virCgroupPtr driver; - virCgroupPtr cgroup; - int rc = -1; - int i; - struct cgroup_device_policy devices[] = { - {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_NULL}, - {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_ZERO}, - {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_FULL}, - {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_RANDOM}, - {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_URANDOM}, - {'c', LXC_DEV_MAJ_TTY, LXC_DEV_MIN_TTY}, - {'c', LXC_DEV_MAJ_TTY, LXC_DEV_MIN_PTMX}, - {0, 0, 0}}; - - rc = virCgroupForDriver("lxc", &driver, 1, 0); - if (rc != 0) { - /* Skip all if no driver cgroup is configured */ - if (rc == -ENXIO || rc == -ENOENT) - return 0; - - virReportSystemError(-rc, "%s", - _("Unable to get cgroup for driver")); - return rc; - } - - rc = virCgroupForDomain(driver, def->name, &cgroup, 1); - if (rc != 0) { - virReportSystemError(-rc, - _("Unable to create cgroup for domain %s"), - def->name); - goto cleanup; - } - - if (def->blkio.weight) { - rc = virCgroupSetBlkioWeight(cgroup, def->blkio.weight); + int ret = -1; + if (def->cputune.shares != 0) { + int rc = virCgroupSetCpuShares(cgroup, def->cputune.shares); if (rc != 0) { virReportSystemError(-rc, - _("Unable to set Blkio weight for domain %s"), + _("Unable to set io cpu shares for domain %s"), def->name); goto cleanup; } } + ret = 0; +cleanup: + return ret; +} + - if (def->cputune.shares) { - rc = virCgroupSetCpuShares(cgroup, def->cputune.shares); +static int lxcSetContainerBlkioTune(virCgroupPtr cgroup, virDomainDefPtr def) +{ + int ret = -1; + + if (def->blkio.weight) { + int rc = virCgroupSetBlkioWeight(cgroup, def->blkio.weight); if (rc != 0) { virReportSystemError(-rc, - _("Unable to set cpu shares for domain %s"), + _("Unable to set Blkio weight for domain %s"), def->name); goto cleanup; } } + ret = 0; +cleanup: + return ret; +} + + +static int lxcSetContainerMemTune(virCgroupPtr cgroup, virDomainDefPtr def) +{ + int ret = -1; + int rc; + rc = virCgroupSetMemory(cgroup, def->mem.max_balloon); if (rc != 0) { virReportSystemError(-rc, @@ -325,6 +304,27 @@ static int lxcSetContainerResources(virDomainDefPtr def) } } + ret = 0; +cleanup: + return ret; +} + + +static int lxcSetContainerDeviceACL(virCgroupPtr cgroup, virDomainDefPtr def) +{ + int ret = -1; + int rc; + size_t i; + static const struct cgroup_device_policy devices[] = { + {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_NULL}, + {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_ZERO}, + {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_FULL}, + {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_RANDOM}, + {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_URANDOM}, + {'c', LXC_DEV_MAJ_TTY, LXC_DEV_MIN_TTY}, + {'c', LXC_DEV_MAJ_TTY, LXC_DEV_MIN_PTMX}, + {0, 0, 0}}; + rc = virCgroupDenyAllDevices(cgroup); if (rc != 0) { virReportSystemError(-rc, @@ -334,7 +334,7 @@ static int lxcSetContainerResources(virDomainDefPtr def) } for (i = 0; devices[i].type != 0; i++) { - struct cgroup_device_policy *dev = &devices[i]; + const struct cgroup_device_policy *dev = &devices[i]; rc = virCgroupAllowDevice(cgroup, dev->type, dev->major, @@ -374,6 +374,60 @@ static int lxcSetContainerResources(virDomainDefPtr def) goto cleanup; } + ret = 0; +cleanup: + return ret; +} + + +/** + * lxcSetContainerResources + * @def: pointer to virtual machine structure + * + * Creates a cgroup for the container, moves the task inside, + * and sets resource limits + * + * Returns 0 on success or -1 in case of error + */ +static int lxcSetContainerResources(virDomainDefPtr def) +{ + virCgroupPtr driver; + virCgroupPtr cgroup; + int rc = -1; + + rc = virCgroupForDriver("lxc", &driver, 1, 0); + if (rc != 0) { + /* Skip all if no driver cgroup is configured */ + if (rc == -ENXIO || rc == -ENOENT) + return 0; + + virReportSystemError(-rc, "%s", + _("Unable to get cgroup for driver")); + return rc; + } + + rc = virCgroupForDomain(driver, def->name, &cgroup, 1); + if (rc != 0) { + virReportSystemError(-rc, + _("Unable to create cgroup for domain %s"), + def->name); + goto cleanup; + } + + rc = -1; + + if (lxcSetContainerCpuTune(cgroup, def) < 0) + goto cleanup; + + if (lxcSetContainerBlkioTune(cgroup, def) < 0) + goto cleanup; + + if (lxcSetContainerMemTune(cgroup, def) < 0) + goto cleanup; + + if (lxcSetContainerDeviceACL(cgroup, def) < 0) + goto cleanup; + rc = virCgroupAddTask(cgroup, getpid()); if (rc != 0) { virReportSystemError(-rc, -- 1.7.6.4

From: "Daniel P. Berrange" <berrange@redhat.com> Use numactl to set NUMA memory placement for LXC containers * src/lxc/lxc_controller.c: Support NUMA memory placement --- src/lxc/lxc_controller.c | 111 ++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 111 insertions(+), 0 deletions(-) diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index d3c3b61..4f2326b 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -48,6 +48,11 @@ # include <cap-ng.h> #endif +#if HAVE_NUMACTL +# define NUMA_VERSION1_COMPATIBILITY 1 +# include <numa.h> +#endif + #include "virterror_internal.h" #include "logging.h" #include "util.h" @@ -223,6 +228,101 @@ cleanup: return ret; } + +#if HAVE_NUMACTL +static int lxcSetContainerNUMAPolicy(virDomainDefPtr def) +{ + nodemask_t mask; + int mode = -1; + int node = -1; + int ret = -1; + int i = 0; + int maxnode = 0; + bool warned = false; + + if (!def->numatune.memory.nodemask) + return 0; + + VIR_DEBUG("Setting NUMA memory policy"); + + if (numa_available() < 0) { + lxcError(VIR_ERR_CONFIG_UNSUPPORTED, + "%s", _("Host kernel is not aware of NUMA.")); + return -1; + } + + maxnode = numa_max_node() + 1; + + /* Convert nodemask to NUMA bitmask. */ + nodemask_zero(&mask); + for (i = 0; i < VIR_DOMAIN_CPUMASK_LEN; i++) { + if (def->numatune.memory.nodemask[i]) { + if (i > NUMA_NUM_NODES) { + lxcError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Host cannot support NUMA node %d"), i); + return -1; + } + if (i > maxnode && !warned) { + VIR_WARN("nodeset is out of range, there is only %d NUMA " + "nodes on host", maxnode); + warned = true; + } + nodemask_set(&mask, i); + } + } + + mode = def->numatune.memory.mode; + + if (mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) { + numa_set_bind_policy(1); + numa_set_membind(&mask); + numa_set_bind_policy(0); + } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_PREFERRED) { + int nnodes = 0; + for (i = 0; i < NUMA_NUM_NODES; i++) { + if (nodemask_isset(&mask, i)) { + node = i; + nnodes++; + } + } + + if (nnodes != 1) { + lxcError(VIR_ERR_CONFIG_UNSUPPORTED, + "%s", _("NUMA memory tuning in 'preferred' mode " + "only supports single node")); + goto cleanup; + } + + numa_set_bind_policy(0); + numa_set_preferred(node); + } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_INTERLEAVE) { + numa_set_interleave_mask(&mask); + } else { + lxcError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Unable to set NUMA policy %s"), + virDomainNumatuneMemModeTypeToString(mode)); + goto cleanup; + } + + ret = 0; + +cleanup: + return ret; +} +#else +static int lxcSetContainerNUMAPolicy(virDomainDefPtr def) +{ + if (def->numatune.memory.nodemask) { + lxcError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("NUMA policy is not available on this platform")); + return -1; + } + + return 0; +} +#endif + + static int lxcSetContainerCpuTune(virCgroupPtr cgroup, virDomainDefPtr def) { int ret = -1; @@ -435,6 +535,17 @@ static int lxcSetContainerResources(virDomainDefPtr def) getpid(), def->name); } + rc = -1; + + /* These must come *after* placing the task in + * the cgroup, otherwise the 'cpuset' controller + * will reset the values we've just defined! + */ + if (lxcSetContainerNUMAPolicy(def) < 0) + goto cleanup; + + rc = 0; + cleanup: virCgroupFree(&driver); virCgroupFree(&cgroup); -- 1.7.6.4

From: "Daniel P. Berrange" <berrange@redhat.com> While LXC does not have the concept of VCPUS, so we cann't do per-VCPU pCPU placement, we can support the VM level CPU placement. Todo this simply set the CPU affinity of the LXC controller at startup. All child processes will inherit this affinity. * src/lxc/lxc_controller.c: Set process affinity --- src/lxc/lxc_controller.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 62 insertions(+), 0 deletions(-) diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index 4f2326b..2c387a5 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -65,6 +65,8 @@ #include "virfile.h" #include "virpidfile.h" #include "command.h" +#include "processinfo.h" +#include "nodeinfo.h" #define VIR_FROM_THIS VIR_FROM_LXC @@ -323,6 +325,63 @@ static int lxcSetContainerNUMAPolicy(virDomainDefPtr def) #endif +/* + * To be run while still single threaded + */ +static int lxcSetContainerCpuAffinity(virDomainDefPtr def) +{ + int i, hostcpus, maxcpu = CPU_SETSIZE; + virNodeInfo nodeinfo; + unsigned char *cpumap; + int cpumaplen; + + VIR_DEBUG("Setting CPU affinity"); + + if (nodeGetInfo(NULL, &nodeinfo) < 0) + return -1; + + /* setaffinity fails if you set bits for CPUs which + * aren't present, so we have to limit ourselves */ + hostcpus = VIR_NODEINFO_MAXCPUS(nodeinfo); + if (maxcpu > hostcpus) + maxcpu = hostcpus; + + cpumaplen = VIR_CPU_MAPLEN(maxcpu); + if (VIR_ALLOC_N(cpumap, cpumaplen) < 0) { + virReportOOMError(); + return -1; + } + + if (def->cpumask) { + /* XXX why don't we keep 'cpumask' in the libvirt cpumap + * format to start with ?!?! */ + for (i = 0 ; i < maxcpu && i < def->cpumasklen ; i++) + if (def->cpumask[i]) + VIR_USE_CPU(cpumap, i); + } else { + /* You may think this is redundant, but we can't assume libvirtd + * itself is running on all pCPUs, so we need to explicitly set + * the spawned QEMU instance to all pCPUs if no map is given in + * its config file */ + for (i = 0 ; i < maxcpu ; i++) + VIR_USE_CPU(cpumap, i); + } + + /* We are pressuming we are running between fork/exec of QEMU + * so use '0' to indicate our own process ID. No threads are + * running at this point + */ + if (virProcessInfoSetAffinity(0, /* Self */ + cpumap, cpumaplen, maxcpu) < 0) { + VIR_FREE(cpumap); + return -1; + } + VIR_FREE(cpumap); + + return 0; +} + + static int lxcSetContainerCpuTune(virCgroupPtr cgroup, virDomainDefPtr def) { int ret = -1; @@ -541,6 +600,9 @@ static int lxcSetContainerResources(virDomainDefPtr def) * the cgroup, otherwise the 'cpuset' controller * will reset the values we've just defined! */ + if (lxcSetContainerCpuAffinity(def) < 0) + goto cleanup; + if (lxcSetContainerNUMAPolicy(def) < 0) goto cleanup; -- 1.7.6.4

From: "Daniel P. Berrange" <berrange@redhat.com> * src/lxc/lxc_driver.c: Support changing quota/period for LXC containers * src/lxc/lxc_controller.c: Set initial quota/period at startup --- src/lxc/lxc_controller.c | 18 ++ src/lxc/lxc_driver.c | 459 ++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 425 insertions(+), 52 deletions(-) diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index 2c387a5..dd4a334 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -394,6 +394,24 @@ static int lxcSetContainerCpuTune(virCgroupPtr cgroup, virDomainDefPtr def) goto cleanup; } } + if (def->cputune.quota != 0) { + int rc = virCgroupSetCpuCfsQuota(cgroup, def->cputune.quota); + if (rc != 0) { + virReportSystemError(-rc, + _("Unable to set io cpu quota for domain %s"), + def->name); + goto cleanup; + } + } + if (def->cputune.period != 0) { + int rc = virCgroupSetCpuCfsPeriod(cgroup, def->cputune.period); + if (rc != 0) { + virReportSystemError(-rc, + _("Unable to set io cpu period for domain %s"), + def->name); + goto cleanup; + } + } ret = 0; cleanup: return ret; diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c index f945e2a..356a175 100644 --- a/src/lxc/lxc_driver.c +++ b/src/lxc/lxc_driver.c @@ -2493,84 +2493,324 @@ static int lxcVersion(virConnectPtr conn ATTRIBUTE_UNUSED, unsigned long *versio return 0; } -static char *lxcGetSchedulerType(virDomainPtr domain ATTRIBUTE_UNUSED, + +/* + * check whether the host supports CFS bandwidth + * + * Return 1 when CFS bandwidth is supported, 0 when CFS bandwidth is not + * supported, -1 on error. + */ +static int lxcGetCpuBWStatus(virCgroupPtr cgroup) +{ + char *cfs_period_path = NULL; + int ret = -1; + + if (!cgroup) + return 0; + + if (virCgroupPathOfController(cgroup, VIR_CGROUP_CONTROLLER_CPU, + "cpu.cfs_period_us", &cfs_period_path) < 0) { + VIR_INFO("cannot get the path of cgroup CPU controller"); + ret = 0; + goto cleanup; + } + + if (access(cfs_period_path, F_OK) < 0) { + ret = 0; + } else { + ret = 1; + } + +cleanup: + VIR_FREE(cfs_period_path); + return ret; +} + + +static bool lxcCgroupControllerActive(lxc_driver_t *driver, + int controller) +{ + if (driver->cgroup == NULL) + return false; + if (controller < 0 || controller >= VIR_CGROUP_CONTROLLER_LAST) + return false; + if (!virCgroupMounted(driver->cgroup, controller)) + return false; + return true; +} + + + +static char *lxcGetSchedulerType(virDomainPtr domain, int *nparams) { - char *schedulerType = NULL; + lxc_driver_t *driver = domain->conn->privateData; + char *ret = NULL; + int rc; - if (nparams) - *nparams = 1; + lxcDriverLock(driver); + if (!lxcCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { + lxcError(VIR_ERR_OPERATION_INVALID, + "%s", _("cgroup CPU controller is not mounted")); + goto cleanup; + } - schedulerType = strdup("posix"); + if (nparams) { + rc = lxcGetCpuBWStatus(driver->cgroup); + if (rc < 0) + goto cleanup; + else if (rc == 0) + *nparams = 1; + else + *nparams = 3; + } - if (schedulerType == NULL) + ret = strdup("posix"); + if (!ret) virReportOOMError(); - return schedulerType; +cleanup: + lxcDriverUnlock(driver); + return ret; +} + + +static int +lxcGetVcpuBWLive(virCgroupPtr cgroup, unsigned long long *period, + long long *quota) +{ + int rc; + + rc = virCgroupGetCpuCfsPeriod(cgroup, period); + if (rc < 0) { + virReportSystemError(-rc, "%s", + _("unable to get cpu bandwidth period tunable")); + return -1; + } + + rc = virCgroupGetCpuCfsQuota(cgroup, quota); + if (rc < 0) { + virReportSystemError(-rc, "%s", + _("unable to get cpu bandwidth tunable")); + return -1; + } + + return 0; +} + + +static int lxcSetVcpuBWLive(virCgroupPtr cgroup, unsigned long long period, + long long quota) +{ + int rc; + unsigned long long old_period; + + if (period == 0 && quota == 0) + return 0; + + if (period) { + /* get old period, and we can rollback if set quota failed */ + rc = virCgroupGetCpuCfsPeriod(cgroup, &old_period); + if (rc < 0) { + virReportSystemError(-rc, + "%s", _("Unable to get cpu bandwidth period")); + return -1; + } + + rc = virCgroupSetCpuCfsPeriod(cgroup, period); + if (rc < 0) { + virReportSystemError(-rc, + "%s", _("Unable to set cpu bandwidth period")); + return -1; + } + } + + if (quota) { + rc = virCgroupSetCpuCfsQuota(cgroup, quota); + if (rc < 0) { + virReportSystemError(-rc, + "%s", _("Unable to set cpu bandwidth quota")); + goto cleanup; + } + } + + return 0; + +cleanup: + if (period) { + rc = virCgroupSetCpuCfsPeriod(cgroup, old_period); + if (rc < 0) + virReportSystemError(-rc, + _("%s"), + "Unable to rollback cpu bandwidth period"); + } + + return -1; } + static int -lxcSetSchedulerParametersFlags(virDomainPtr domain, +lxcSetSchedulerParametersFlags(virDomainPtr dom, virTypedParameterPtr params, int nparams, unsigned int flags) { - lxc_driver_t *driver = domain->conn->privateData; + lxc_driver_t *driver = dom->conn->privateData; int i; virCgroupPtr group = NULL; virDomainObjPtr vm = NULL; + virDomainDefPtr vmdef = NULL; int ret = -1; + bool isActive; + int rc; - virCheckFlags(0, -1); - - if (driver->cgroup == NULL) - return -1; + virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | + VIR_DOMAIN_AFFECT_CONFIG, -1); lxcDriverLock(driver); - vm = virDomainFindByUUID(&driver->domains, domain->uuid); + + vm = virDomainFindByUUID(&driver->domains, dom->uuid); if (vm == NULL) { - char uuidstr[VIR_UUID_STRING_BUFLEN]; - virUUIDFormat(domain->uuid, uuidstr); - lxcError(VIR_ERR_NO_DOMAIN, - _("No domain with matching uuid '%s'"), uuidstr); + lxcError(VIR_ERR_INTERNAL_ERROR, + _("No such domain %s"), dom->uuid); goto cleanup; } - if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0) - goto cleanup; + isActive = virDomainObjIsActive(vm); + + if (flags == VIR_DOMAIN_AFFECT_CURRENT) { + if (isActive) + flags = VIR_DOMAIN_AFFECT_LIVE; + else + flags = VIR_DOMAIN_AFFECT_CONFIG; + } + + if (flags & VIR_DOMAIN_AFFECT_CONFIG) { + if (!vm->persistent) { + lxcError(VIR_ERR_OPERATION_INVALID, "%s", + _("cannot change persistent config of a transient domain")); + goto cleanup; + } + + /* Make a copy for updated domain. */ + vmdef = virDomainObjCopyPersistentDef(driver->caps, vm); + if (!vmdef) + goto cleanup; + } + + if (flags & VIR_DOMAIN_AFFECT_LIVE) { + if (!isActive) { + lxcError(VIR_ERR_OPERATION_INVALID, + "%s", _("domain is not running")); + goto cleanup; + } + + if (!lxcCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { + lxcError(VIR_ERR_OPERATION_INVALID, + "%s", _("cgroup CPU controller is not mounted")); + goto cleanup; + } + if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0) { + lxcError(VIR_ERR_INTERNAL_ERROR, + _("cannot find cgroup for domain %s"), + vm->def->name); + goto cleanup; + } + } for (i = 0; i < nparams; i++) { virTypedParameterPtr param = ¶ms[i]; - if (STRNEQ(param->field, VIR_DOMAIN_SCHEDULER_CPU_SHARES)) { + if (STREQ(param->field, VIR_DOMAIN_SCHEDULER_CPU_SHARES)) { + if (param->type != VIR_TYPED_PARAM_ULLONG) { + lxcError(VIR_ERR_INVALID_ARG, "%s", + _("invalid type for cpu_shares tunable, expected a 'ullong'")); + goto cleanup; + } + + if (flags & VIR_DOMAIN_AFFECT_LIVE) { + rc = virCgroupSetCpuShares(group, params[i].value.ul); + if (rc != 0) { + virReportSystemError(-rc, "%s", + _("unable to set cpu shares tunable")); + goto cleanup; + } + + vm->def->cputune.shares = params[i].value.ul; + } + + if (flags & VIR_DOMAIN_AFFECT_CONFIG) { + vmdef->cputune.shares = params[i].value.ul; + } + } else if (STREQ(param->field, VIR_DOMAIN_SCHEDULER_VCPU_PERIOD)) { + if (param->type != VIR_TYPED_PARAM_ULLONG) { + lxcError(VIR_ERR_INVALID_ARG, "%s", + _("invalid type for vcpu_period tunable," + " expected a 'ullong'")); + goto cleanup; + } + + if (flags & VIR_DOMAIN_AFFECT_LIVE) { + rc = lxcSetVcpuBWLive(group, params[i].value.ul, 0); + if (rc != 0) + goto cleanup; + + if (params[i].value.ul) + vm->def->cputune.period = params[i].value.ul; + } + + if (flags & VIR_DOMAIN_AFFECT_CONFIG) { + vmdef->cputune.period = params[i].value.ul; + } + } else if (STREQ(param->field, VIR_DOMAIN_SCHEDULER_VCPU_QUOTA)) { + if (param->type != VIR_TYPED_PARAM_LLONG) { + lxcError(VIR_ERR_INVALID_ARG, "%s", + _("invalid type for vcpu_quota tunable," + " expected a 'llong'")); + goto cleanup; + } + + if (flags & VIR_DOMAIN_AFFECT_LIVE) { + rc = lxcSetVcpuBWLive(group, 0, params[i].value.l); + if (rc != 0) + goto cleanup; + + if (params[i].value.l) + vm->def->cputune.quota = params[i].value.l; + } + + if (flags & VIR_DOMAIN_AFFECT_CONFIG) { + vmdef->cputune.quota = params[i].value.l; + } + } else { lxcError(VIR_ERR_INVALID_ARG, _("Invalid parameter `%s'"), param->field); goto cleanup; } + } - if (param->type != VIR_TYPED_PARAM_ULLONG) { - lxcError(VIR_ERR_INVALID_ARG, "%s", - _("Invalid type for cpu_shares tunable, expected a 'ullong'")); - goto cleanup; - } + if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0) + goto cleanup; - int rc = virCgroupSetCpuShares(group, params[i].value.ul); - if (rc != 0) { - virReportSystemError(-rc, _("failed to set cpu_shares=%llu"), - params[i].value.ul); + + if (flags & VIR_DOMAIN_AFFECT_CONFIG) { + rc = virDomainSaveConfig(driver->configDir, vmdef); + if (rc < 0) goto cleanup; - } - vm->def->cputune.shares = params[i].value.ul; + virDomainObjAssignDef(vm, vmdef, false); + vmdef = NULL; } + ret = 0; cleanup: - lxcDriverUnlock(driver); + virDomainDefFree(vmdef); virCgroupFree(&group); if (vm) virDomainObjUnlock(vm); + lxcDriverUnlock(driver); return ret; } @@ -2583,55 +2823,170 @@ lxcSetSchedulerParameters(virDomainPtr domain, } static int -lxcGetSchedulerParametersFlags(virDomainPtr domain, +lxcGetSchedulerParametersFlags(virDomainPtr dom, virTypedParameterPtr params, int *nparams, unsigned int flags) { - lxc_driver_t *driver = domain->conn->privateData; + lxc_driver_t *driver = dom->conn->privateData; virCgroupPtr group = NULL; virDomainObjPtr vm = NULL; - unsigned long long val; + unsigned long long shares = 0; + unsigned long long period = 0; + long long quota = 0; int ret = -1; + int rc; + bool isActive; + bool cpu_bw_status = false; + int saved_nparams = 0; - virCheckFlags(0, -1); - - if (driver->cgroup == NULL) - return -1; + virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | + VIR_DOMAIN_AFFECT_CONFIG, -1); lxcDriverLock(driver); - vm = virDomainFindByUUID(&driver->domains, domain->uuid); + + if ((flags & (VIR_DOMAIN_AFFECT_LIVE | VIR_DOMAIN_AFFECT_CONFIG)) == + (VIR_DOMAIN_AFFECT_LIVE | VIR_DOMAIN_AFFECT_CONFIG)) { + lxcError(VIR_ERR_INVALID_ARG, "%s", + _("cannot query live and config together")); + goto cleanup; + } + + if (*nparams > 1) { + rc = lxcGetCpuBWStatus(driver->cgroup); + if (rc < 0) + goto cleanup; + cpu_bw_status = !!rc; + } + + vm = virDomainFindByUUID(&driver->domains, dom->uuid); if (vm == NULL) { - char uuidstr[VIR_UUID_STRING_BUFLEN]; - virUUIDFormat(domain->uuid, uuidstr); - lxcError(VIR_ERR_NO_DOMAIN, - _("No domain with matching uuid '%s'"), uuidstr); + lxcError(VIR_ERR_INTERNAL_ERROR, + _("No such domain %s"), dom->uuid); goto cleanup; } - if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0) + isActive = virDomainObjIsActive(vm); + + if (flags == VIR_DOMAIN_AFFECT_CURRENT) { + if (isActive) + flags = VIR_DOMAIN_AFFECT_LIVE; + else + flags = VIR_DOMAIN_AFFECT_CONFIG; + } + + if (flags & VIR_DOMAIN_AFFECT_CONFIG) { + if (!vm->persistent) { + lxcError(VIR_ERR_OPERATION_INVALID, "%s", + _("cannot query persistent config of a transient domain")); + goto cleanup; + } + + if (isActive) { + virDomainDefPtr persistentDef; + + persistentDef = virDomainObjGetPersistentDef(driver->caps, vm); + if (!persistentDef) { + lxcError(VIR_ERR_INTERNAL_ERROR, "%s", + _("can't get persistentDef")); + goto cleanup; + } + shares = persistentDef->cputune.shares; + if (*nparams > 1 && cpu_bw_status) { + period = persistentDef->cputune.period; + quota = persistentDef->cputune.quota; + } + } else { + shares = vm->def->cputune.shares; + if (*nparams > 1 && cpu_bw_status) { + period = vm->def->cputune.period; + quota = vm->def->cputune.quota; + } + } + goto out; + } + + if (!isActive) { + lxcError(VIR_ERR_OPERATION_INVALID, "%s", + _("domain is not running")); goto cleanup; + } - if (virCgroupGetCpuShares(group, &val) != 0) + if (!lxcCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { + lxcError(VIR_ERR_OPERATION_INVALID, + "%s", _("cgroup CPU controller is not mounted")); goto cleanup; - params[0].value.ul = val; + } + + if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0) { + lxcError(VIR_ERR_INTERNAL_ERROR, + _("cannot find cgroup for domain %s"), vm->def->name); + goto cleanup; + } + + rc = virCgroupGetCpuShares(group, &shares); + if (rc != 0) { + virReportSystemError(-rc, "%s", + _("unable to get cpu shares tunable")); + goto cleanup; + } + + if (*nparams > 1 && cpu_bw_status) { + rc = lxcGetVcpuBWLive(group, &period, "a); + if (rc != 0) + goto cleanup; + } +out: + params[0].value.ul = shares; + params[0].type = VIR_TYPED_PARAM_ULLONG; if (virStrcpyStatic(params[0].field, VIR_DOMAIN_SCHEDULER_CPU_SHARES) == NULL) { lxcError(VIR_ERR_INTERNAL_ERROR, - "%s", _("Field cpu_shares too big for destination")); + _("Field name '%s' too long"), + VIR_DOMAIN_SCHEDULER_CPU_SHARES); goto cleanup; } - params[0].type = VIR_TYPED_PARAM_ULLONG; - *nparams = 1; + saved_nparams++; + + if (cpu_bw_status) { + if (*nparams > saved_nparams) { + params[1].value.ul = period; + params[1].type = VIR_TYPED_PARAM_ULLONG; + if (virStrcpyStatic(params[1].field, + VIR_DOMAIN_SCHEDULER_VCPU_PERIOD) == NULL) { + lxcError(VIR_ERR_INTERNAL_ERROR, + _("Field name '%s' too long"), + VIR_DOMAIN_SCHEDULER_VCPU_PERIOD); + goto cleanup; + } + saved_nparams++; + } + + if (*nparams > saved_nparams) { + params[2].value.ul = quota; + params[2].type = VIR_TYPED_PARAM_LLONG; + if (virStrcpyStatic(params[2].field, + VIR_DOMAIN_SCHEDULER_VCPU_QUOTA) == NULL) { + lxcError(VIR_ERR_INTERNAL_ERROR, + _("Field name '%s' too long"), + VIR_DOMAIN_SCHEDULER_VCPU_QUOTA); + goto cleanup; + } + saved_nparams++; + } + } + + *nparams = saved_nparams; + ret = 0; cleanup: - lxcDriverUnlock(driver); virCgroupFree(&group); if (vm) virDomainObjUnlock(vm); + lxcDriverUnlock(driver); return ret; } -- 1.7.6.4

From: "Daniel P. Berrange" <berrange@redhat.com> * src/lxc/lxc_controller.c: Refactor setting of initial blkio tuning parameters * src/lxc/lxc_driver.c: Enable live change of blkio tuning --- src/lxc/lxc_driver.c | 283 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 283 insertions(+), 0 deletions(-) diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c index 356a175..0edc703 100644 --- a/src/lxc/lxc_driver.c +++ b/src/lxc/lxc_driver.c @@ -2998,6 +2998,287 @@ lxcGetSchedulerParameters(virDomainPtr domain, return lxcGetSchedulerParametersFlags(domain, params, nparams, 0); } + +static int lxcDomainSetBlkioParameters(virDomainPtr dom, + virTypedParameterPtr params, + int nparams, + unsigned int flags) +{ + lxc_driver_t *driver = dom->conn->privateData; + int i; + virCgroupPtr group = NULL; + virDomainObjPtr vm = NULL; + virDomainDefPtr persistentDef = NULL; + int ret = -1; + bool isActive; + + virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | + VIR_DOMAIN_AFFECT_CONFIG, -1); + lxcDriverLock(driver); + + vm = virDomainFindByUUID(&driver->domains, dom->uuid); + + if (vm == NULL) { + lxcError(VIR_ERR_INTERNAL_ERROR, + _("No such domain %s"), dom->uuid); + goto cleanup; + } + + isActive = virDomainObjIsActive(vm); + + if (flags == VIR_DOMAIN_AFFECT_CURRENT) { + if (isActive) + flags = VIR_DOMAIN_AFFECT_LIVE; + else + flags = VIR_DOMAIN_AFFECT_CONFIG; + } + + if (flags & VIR_DOMAIN_AFFECT_LIVE) { + if (!isActive) { + lxcError(VIR_ERR_OPERATION_INVALID, + "%s", _("domain is not running")); + goto cleanup; + } + + if (!lxcCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_BLKIO)) { + lxcError(VIR_ERR_OPERATION_INVALID, _("blkio cgroup isn't mounted")); + goto cleanup; + } + + if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0) { + lxcError(VIR_ERR_INTERNAL_ERROR, + _("cannot find cgroup for domain %s"), vm->def->name); + goto cleanup; + } + } + + if (flags & VIR_DOMAIN_AFFECT_CONFIG) { + if (!vm->persistent) { + lxcError(VIR_ERR_OPERATION_INVALID, "%s", + _("cannot change persistent config of a transient domain")); + goto cleanup; + } + if (!(persistentDef = virDomainObjGetPersistentDef(driver->caps, vm))) + goto cleanup; + } + + ret = 0; + if (flags & VIR_DOMAIN_AFFECT_LIVE) { + for (i = 0; i < nparams; i++) { + virTypedParameterPtr param = ¶ms[i]; + + if (STREQ(param->field, VIR_DOMAIN_BLKIO_WEIGHT)) { + int rc; + if (param->type != VIR_TYPED_PARAM_UINT) { + lxcError(VIR_ERR_INVALID_ARG, "%s", + _("invalid type for blkio weight tunable, expected a 'unsigned int'")); + ret = -1; + continue; + } + + if (params[i].value.ui > 1000 || params[i].value.ui < 100) { + lxcError(VIR_ERR_INVALID_ARG, "%s", + _("out of blkio weight range.")); + ret = -1; + continue; + } + + rc = virCgroupSetBlkioWeight(group, params[i].value.ui); + if (rc != 0) { + virReportSystemError(-rc, "%s", + _("unable to set blkio weight tunable")); + ret = -1; + } + } else { + lxcError(VIR_ERR_INVALID_ARG, + _("Parameter `%s' not supported"), param->field); + ret = -1; + } + } + } else if (flags & VIR_DOMAIN_AFFECT_CONFIG) { + /* Clang can't see that if we get here, persistentDef was set. */ + sa_assert(persistentDef); + + for (i = 0; i < nparams; i++) { + virTypedParameterPtr param = ¶ms[i]; + + if (STREQ(param->field, VIR_DOMAIN_BLKIO_WEIGHT)) { + if (param->type != VIR_TYPED_PARAM_UINT) { + lxcError(VIR_ERR_INVALID_ARG, "%s", + _("invalid type for blkio weight tunable, expected a 'unsigned int'")); + ret = -1; + continue; + } + + if (params[i].value.ui > 1000 || params[i].value.ui < 100) { + lxcError(VIR_ERR_INVALID_ARG, "%s", + _("out of blkio weight range.")); + ret = -1; + continue; + } + + persistentDef->blkio.weight = params[i].value.ui; + } else { + lxcError(VIR_ERR_INVALID_ARG, + _("Parameter `%s' not supported"), param->field); + ret = -1; + } + } + + if (virDomainSaveConfig(driver->configDir, persistentDef) < 0) + ret = -1; + } + +cleanup: + virCgroupFree(&group); + if (vm) + virDomainObjUnlock(vm); + lxcDriverUnlock(driver); + return ret; +} + + +#define LXC_NB_BLKIO_PARAM 1 +static int lxcDomainGetBlkioParameters(virDomainPtr dom, + virTypedParameterPtr params, + int *nparams, + unsigned int flags) +{ + lxc_driver_t *driver = dom->conn->privateData; + int i; + virCgroupPtr group = NULL; + virDomainObjPtr vm = NULL; + virDomainDefPtr persistentDef = NULL; + unsigned int val; + int ret = -1; + int rc; + bool isActive; + + virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | + VIR_DOMAIN_AFFECT_CONFIG, -1); + lxcDriverLock(driver); + + vm = virDomainFindByUUID(&driver->domains, dom->uuid); + + if (vm == NULL) { + lxcError(VIR_ERR_INTERNAL_ERROR, + _("No such domain %s"), dom->uuid); + goto cleanup; + } + + if ((*nparams) == 0) { + /* Current number of blkio parameters supported by cgroups */ + *nparams = LXC_NB_BLKIO_PARAM; + ret = 0; + goto cleanup; + } + + isActive = virDomainObjIsActive(vm); + + if (flags == VIR_DOMAIN_AFFECT_CURRENT) { + if (isActive) + flags = VIR_DOMAIN_AFFECT_LIVE; + else + flags = VIR_DOMAIN_AFFECT_CONFIG; + } + + if (flags & VIR_DOMAIN_AFFECT_LIVE) { + if (!isActive) { + lxcError(VIR_ERR_OPERATION_INVALID, + "%s", _("domain is not running")); + goto cleanup; + } + + if (!lxcCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_BLKIO)) { + lxcError(VIR_ERR_OPERATION_INVALID, _("blkio cgroup isn't mounted")); + goto cleanup; + } + + if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0) { + lxcError(VIR_ERR_INTERNAL_ERROR, + _("cannot find cgroup for domain %s"), vm->def->name); + goto cleanup; + } + } + + if (flags & VIR_DOMAIN_AFFECT_CONFIG) { + if (!vm->persistent) { + lxcError(VIR_ERR_OPERATION_INVALID, "%s", + _("cannot change persistent config of a transient domain")); + goto cleanup; + } + if (!(persistentDef = virDomainObjGetPersistentDef(driver->caps, vm))) + goto cleanup; + } + + if (flags & VIR_DOMAIN_AFFECT_LIVE) { + for (i = 0; i < *nparams && i < LXC_NB_BLKIO_PARAM; i++) { + virTypedParameterPtr param = ¶ms[i]; + val = 0; + param->value.ui = 0; + param->type = VIR_TYPED_PARAM_UINT; + + switch (i) { + case 0: /* fill blkio weight here */ + rc = virCgroupGetBlkioWeight(group, &val); + if (rc != 0) { + virReportSystemError(-rc, "%s", + _("unable to get blkio weight")); + goto cleanup; + } + if (virStrcpyStatic(param->field, VIR_DOMAIN_BLKIO_WEIGHT) == NULL) { + lxcError(VIR_ERR_INTERNAL_ERROR, + _("Field name '%s' too long"), + VIR_DOMAIN_BLKIO_WEIGHT); + goto cleanup; + } + param->value.ui = val; + break; + + default: + break; + /* should not hit here */ + } + } + } else if (flags & VIR_DOMAIN_AFFECT_CONFIG) { + for (i = 0; i < *nparams && i < LXC_NB_BLKIO_PARAM; i++) { + virTypedParameterPtr param = ¶ms[i]; + val = 0; + param->value.ui = 0; + param->type = VIR_TYPED_PARAM_UINT; + + switch (i) { + case 0: /* fill blkio weight here */ + if (virStrcpyStatic(param->field, VIR_DOMAIN_BLKIO_WEIGHT) == NULL) { + lxcError(VIR_ERR_INTERNAL_ERROR, + _("Field name '%s' too long"), + VIR_DOMAIN_BLKIO_WEIGHT); + goto cleanup; + } + param->value.ui = persistentDef->blkio.weight; + break; + + default: + break; + /* should not hit here */ + } + } + } + + if (LXC_NB_BLKIO_PARAM < *nparams) + *nparams = LXC_NB_BLKIO_PARAM; + ret = 0; + +cleanup: + if (group) + virCgroupFree(&group); + if (vm) + virDomainObjUnlock(vm); + lxcDriverUnlock(driver); + return ret; +} + + #ifdef __linux__ static int lxcDomainInterfaceStats(virDomainPtr dom, @@ -3493,6 +3774,8 @@ static virDriver lxcDriver = { .domainSetMemory = lxcDomainSetMemory, /* 0.7.2 */ .domainSetMemoryParameters = lxcDomainSetMemoryParameters, /* 0.8.5 */ .domainGetMemoryParameters = lxcDomainGetMemoryParameters, /* 0.8.5 */ + .domainSetBlkioParameters = lxcDomainSetBlkioParameters, /* 0.9.8 */ + .domainGetBlkioParameters = lxcDomainGetBlkioParameters, /* 0.9.8 */ .domainGetInfo = lxcDomainGetInfo, /* 0.4.2 */ .domainGetState = lxcDomainGetState, /* 0.9.2 */ .domainGetXMLDesc = lxcDomainGetXMLDesc, /* 0.4.2 */ -- 1.7.6.4
participants (1)
-
Daniel P. Berrange