[libvirt] [PATCH 0/5 (repost)] Support Blkio tune, CPU tune, NUMA, CPU affinity in LXC

Re-post of https://www.redhat.com/archives/libvir-list/2011-November/msg00464.html rebased to latest GIT

From: "Daniel P. Berrange" <berrange@redhat.com> Use numactl to set NUMA memory placement for LXC containers * src/lxc/lxc_controller.c: Support NUMA memory placement --- src/lxc/lxc_controller.c | 101 ++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 101 insertions(+), 0 deletions(-) diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index 40047f0..4718fa8 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -48,6 +48,11 @@ # include <cap-ng.h> #endif +#if HAVE_NUMACTL +# define NUMA_VERSION1_COMPATIBILITY 1 +# include <numa.h> +#endif + #include "virterror_internal.h" #include "logging.h" #include "util.h" @@ -224,6 +229,99 @@ cleanup: return ret; } +#if HAVE_NUMACTL +static int lxcSetContainerNUMAPolicy(virDomainDefPtr def) +{ + nodemask_t mask; + int mode = -1; + int node = -1; + int ret = -1; + int i = 0; + int maxnode = 0; + bool warned = false; + + if (!def->numatune.memory.nodemask) + return 0; + + VIR_DEBUG("Setting NUMA memory policy"); + + if (numa_available() < 0) { + lxcError(VIR_ERR_CONFIG_UNSUPPORTED, + "%s", _("Host kernel is not aware of NUMA.")); + return -1; + } + + maxnode = numa_max_node() + 1; + + /* Convert nodemask to NUMA bitmask. */ + nodemask_zero(&mask); + for (i = 0; i < VIR_DOMAIN_CPUMASK_LEN; i++) { + if (def->numatune.memory.nodemask[i]) { + if (i > NUMA_NUM_NODES) { + lxcError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Host cannot support NUMA node %d"), i); + return -1; + } + if (i > maxnode && !warned) { + VIR_WARN("nodeset is out of range, there is only %d NUMA " + "nodes on host", maxnode); + warned = true; + } + nodemask_set(&mask, i); + } + } + + mode = def->numatune.memory.mode; + + if (mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) { + numa_set_bind_policy(1); + numa_set_membind(&mask); + numa_set_bind_policy(0); + } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_PREFERRED) { + int nnodes = 0; + for (i = 0; i < NUMA_NUM_NODES; i++) { + if (nodemask_isset(&mask, i)) { + node = i; + nnodes++; + } + } + + if (nnodes != 1) { + lxcError(VIR_ERR_CONFIG_UNSUPPORTED, + "%s", _("NUMA memory tuning in 'preferred' mode " + "only supports single node")); + goto cleanup; + } + + numa_set_bind_policy(0); + numa_set_preferred(node); + } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_INTERLEAVE) { + numa_set_interleave_mask(&mask); + } else { + lxcError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Unable to set NUMA policy %s"), + virDomainNumatuneMemModeTypeToString(mode)); + goto cleanup; + } + + ret = 0; + +cleanup: + return ret; +} +#else +static int lxcSetContainerNUMAPolicy(virDomainDefPtr def) +{ + if (def->numatune.memory.nodemask) { + lxcError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("NUMA policy is not available on this platform")); + return -1; + } + + return 0; +} +#endif + /** * lxcSetContainerResources * @def: pointer to virtual machine structure @@ -249,6 +347,9 @@ static int lxcSetContainerResources(virDomainDefPtr def) {'c', LXC_DEV_MAJ_TTY, LXC_DEV_MIN_PTMX}, {0, 0, 0}}; + if (lxcSetContainerNUMAPolicy(def) < 0) + return -1; + rc = virCgroupForDriver("lxc", &driver, 1, 0); if (rc != 0) { /* Skip all if no driver cgroup is configured */ -- 1.7.6.4

On Thu, Nov 24, 2011 at 11:38:12AM +0000, Daniel P. Berrange wrote:
From: "Daniel P. Berrange" <berrange@redhat.com>
Use numactl to set NUMA memory placement for LXC containers
* src/lxc/lxc_controller.c: Support NUMA memory placement --- src/lxc/lxc_controller.c | 101 ++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 101 insertions(+), 0 deletions(-)
diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index 40047f0..4718fa8 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -48,6 +48,11 @@ # include <cap-ng.h> #endif
+#if HAVE_NUMACTL +# define NUMA_VERSION1_COMPATIBILITY 1 +# include <numa.h> +#endif + #include "virterror_internal.h" #include "logging.h" #include "util.h" @@ -224,6 +229,99 @@ cleanup: return ret; }
+#if HAVE_NUMACTL +static int lxcSetContainerNUMAPolicy(virDomainDefPtr def) +{ + nodemask_t mask; + int mode = -1; + int node = -1; + int ret = -1; + int i = 0; + int maxnode = 0; + bool warned = false; + + if (!def->numatune.memory.nodemask) + return 0; + + VIR_DEBUG("Setting NUMA memory policy"); + + if (numa_available() < 0) { + lxcError(VIR_ERR_CONFIG_UNSUPPORTED, + "%s", _("Host kernel is not aware of NUMA.")); + return -1; + } + + maxnode = numa_max_node() + 1; + + /* Convert nodemask to NUMA bitmask. */ + nodemask_zero(&mask); + for (i = 0; i < VIR_DOMAIN_CPUMASK_LEN; i++) { + if (def->numatune.memory.nodemask[i]) { + if (i > NUMA_NUM_NODES) { + lxcError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Host cannot support NUMA node %d"), i); + return -1; + } + if (i > maxnode && !warned) { + VIR_WARN("nodeset is out of range, there is only %d NUMA " + "nodes on host", maxnode); + warned = true; + }
small indent issue here
+ nodemask_set(&mask, i); + } + } + + mode = def->numatune.memory.mode; + + if (mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) { + numa_set_bind_policy(1); + numa_set_membind(&mask); + numa_set_bind_policy(0); + } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_PREFERRED) { + int nnodes = 0; + for (i = 0; i < NUMA_NUM_NODES; i++) { + if (nodemask_isset(&mask, i)) { + node = i; + nnodes++; + } + } + + if (nnodes != 1) { + lxcError(VIR_ERR_CONFIG_UNSUPPORTED, + "%s", _("NUMA memory tuning in 'preferred' mode " + "only supports single node")); + goto cleanup; + } + + numa_set_bind_policy(0); + numa_set_preferred(node); + } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_INTERLEAVE) { + numa_set_interleave_mask(&mask); + } else { + lxcError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Unable to set NUMA policy %s"), + virDomainNumatuneMemModeTypeToString(mode)); + goto cleanup; + } + + ret = 0; + +cleanup: + return ret; +} +#else +static int lxcSetContainerNUMAPolicy(virDomainDefPtr def) +{ + if (def->numatune.memory.nodemask) { + lxcError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("NUMA policy is not available on this platform")); + return -1; + } + + return 0; +} +#endif + /** * lxcSetContainerResources * @def: pointer to virtual machine structure @@ -249,6 +347,9 @@ static int lxcSetContainerResources(virDomainDefPtr def) {'c', LXC_DEV_MAJ_TTY, LXC_DEV_MIN_PTMX}, {0, 0, 0}};
+ if (lxcSetContainerNUMAPolicy(def) < 0) + return -1; + rc = virCgroupForDriver("lxc", &driver, 1, 0); if (rc != 0) { /* Skip all if no driver cgroup is configured */
ACK, Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

From: "Daniel P. Berrange" <berrange@redhat.com> While LXC does not have the concept of VCPUS, so we cann't do per-VCPU pCPU placement, we can support the VM level CPU placement. Todo this simply set the CPU affinity of the LXC controller at startup. All child processes will inherit this affinity. * src/lxc/lxc_controller.c: Set process affinity --- src/lxc/lxc_controller.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 63 insertions(+), 0 deletions(-) diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index 4718fa8..1db25fb 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -66,6 +66,8 @@ #include "virfile.h" #include "virpidfile.h" #include "command.h" +#include "processinfo.h" +#include "nodeinfo.h" #define VIR_FROM_THIS VIR_FROM_LXC @@ -322,6 +324,64 @@ static int lxcSetContainerNUMAPolicy(virDomainDefPtr def) } #endif + +/* + * To be run while still single threaded + */ +static int lxcSetContainerCpuAffinity(virDomainDefPtr def) +{ + int i, hostcpus, maxcpu = CPU_SETSIZE; + virNodeInfo nodeinfo; + unsigned char *cpumap; + int cpumaplen; + + VIR_DEBUG("Setting CPU affinity"); + + if (nodeGetInfo(NULL, &nodeinfo) < 0) + return -1; + + /* setaffinity fails if you set bits for CPUs which + * aren't present, so we have to limit ourselves */ + hostcpus = VIR_NODEINFO_MAXCPUS(nodeinfo); + if (maxcpu > hostcpus) + maxcpu = hostcpus; + + cpumaplen = VIR_CPU_MAPLEN(maxcpu); + if (VIR_ALLOC_N(cpumap, cpumaplen) < 0) { + virReportOOMError(); + return -1; + } + + if (def->cpumask) { + /* XXX why don't we keep 'cpumask' in the libvirt cpumap + * format to start with ?!?! */ + for (i = 0 ; i < maxcpu && i < def->cpumasklen ; i++) + if (def->cpumask[i]) + VIR_USE_CPU(cpumap, i); + } else { + /* You may think this is redundant, but we can't assume libvirtd + * itself is running on all pCPUs, so we need to explicitly set + * the spawned QEMU instance to all pCPUs if no map is given in + * its config file */ + for (i = 0 ; i < maxcpu ; i++) + VIR_USE_CPU(cpumap, i); + } + + /* We are pressuming we are running between fork/exec of QEMU + * so use '0' to indicate our own process ID. No threads are + * running at this point + */ + if (virProcessInfoSetAffinity(0, /* Self */ + cpumap, cpumaplen, maxcpu) < 0) { + VIR_FREE(cpumap); + return -1; + } + VIR_FREE(cpumap); + + return 0; +} + + /** * lxcSetContainerResources * @def: pointer to virtual machine structure @@ -347,6 +407,9 @@ static int lxcSetContainerResources(virDomainDefPtr def) {'c', LXC_DEV_MAJ_TTY, LXC_DEV_MIN_PTMX}, {0, 0, 0}}; + if (lxcSetContainerCpuAffinity(def) < 0) + return -1; + if (lxcSetContainerNUMAPolicy(def) < 0) return -1; -- 1.7.6.4

On Thu, Nov 24, 2011 at 11:38:13AM +0000, Daniel P. Berrange wrote:
From: "Daniel P. Berrange" <berrange@redhat.com>
While LXC does not have the concept of VCPUS, so we cann't do
s/nn/n/
per-VCPU pCPU placement, we can support the VM level CPU placement. Todo this simply set the CPU affinity of the LXC controller at startup. All child processes will inherit this affinity.
* src/lxc/lxc_controller.c: Set process affinity --- src/lxc/lxc_controller.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 63 insertions(+), 0 deletions(-)
diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index 4718fa8..1db25fb 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -66,6 +66,8 @@ #include "virfile.h" #include "virpidfile.h" #include "command.h" +#include "processinfo.h" +#include "nodeinfo.h"
#define VIR_FROM_THIS VIR_FROM_LXC
@@ -322,6 +324,64 @@ static int lxcSetContainerNUMAPolicy(virDomainDefPtr def) } #endif
+ +/* + * To be run while still single threaded + */ +static int lxcSetContainerCpuAffinity(virDomainDefPtr def) +{ + int i, hostcpus, maxcpu = CPU_SETSIZE; + virNodeInfo nodeinfo; + unsigned char *cpumap; + int cpumaplen; + + VIR_DEBUG("Setting CPU affinity"); + + if (nodeGetInfo(NULL, &nodeinfo) < 0) + return -1; + + /* setaffinity fails if you set bits for CPUs which + * aren't present, so we have to limit ourselves */ + hostcpus = VIR_NODEINFO_MAXCPUS(nodeinfo); + if (maxcpu > hostcpus) + maxcpu = hostcpus; + + cpumaplen = VIR_CPU_MAPLEN(maxcpu); + if (VIR_ALLOC_N(cpumap, cpumaplen) < 0) { + virReportOOMError(); + return -1; + } + + if (def->cpumask) { + /* XXX why don't we keep 'cpumask' in the libvirt cpumap + * format to start with ?!?! */ + for (i = 0 ; i < maxcpu && i < def->cpumasklen ; i++) + if (def->cpumask[i]) + VIR_USE_CPU(cpumap, i); + } else { + /* You may think this is redundant, but we can't assume libvirtd + * itself is running on all pCPUs, so we need to explicitly set + * the spawned QEMU instance to all pCPUs if no map is given in
Hum you mean LXC container instead of QEmu, right ?
+ * its config file */ + for (i = 0 ; i < maxcpu ; i++) + VIR_USE_CPU(cpumap, i); + } + + /* We are pressuming we are running between fork/exec of QEMU
idem
+ * so use '0' to indicate our own process ID. No threads are + * running at this point + */ + if (virProcessInfoSetAffinity(0, /* Self */ + cpumap, cpumaplen, maxcpu) < 0) { + VIR_FREE(cpumap); + return -1; + } + VIR_FREE(cpumap); + + return 0; +} + + /** * lxcSetContainerResources * @def: pointer to virtual machine structure @@ -347,6 +407,9 @@ static int lxcSetContainerResources(virDomainDefPtr def) {'c', LXC_DEV_MAJ_TTY, LXC_DEV_MIN_PTMX}, {0, 0, 0}};
+ if (lxcSetContainerCpuAffinity(def) < 0) + return -1; + if (lxcSetContainerNUMAPolicy(def) < 0) return -1;
Either I don't understand the comments or they need fixing :) ACK Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

On Thu, Nov 24, 2011 at 09:55:26PM +0800, Daniel Veillard wrote:
On Thu, Nov 24, 2011 at 11:38:13AM +0000, Daniel P. Berrange wrote:
From: "Daniel P. Berrange" <berrange@redhat.com>
While LXC does not have the concept of VCPUS, so we cann't do
s/nn/n/
per-VCPU pCPU placement, we can support the VM level CPU placement. Todo this simply set the CPU affinity of the LXC controller at startup. All child processes will inherit this affinity.
* src/lxc/lxc_controller.c: Set process affinity --- src/lxc/lxc_controller.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 63 insertions(+), 0 deletions(-)
diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index 4718fa8..1db25fb 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -66,6 +66,8 @@ #include "virfile.h" #include "virpidfile.h" #include "command.h" +#include "processinfo.h" +#include "nodeinfo.h"
#define VIR_FROM_THIS VIR_FROM_LXC
@@ -322,6 +324,64 @@ static int lxcSetContainerNUMAPolicy(virDomainDefPtr def) } #endif
+ +/* + * To be run while still single threaded + */ +static int lxcSetContainerCpuAffinity(virDomainDefPtr def) +{ + int i, hostcpus, maxcpu = CPU_SETSIZE; + virNodeInfo nodeinfo; + unsigned char *cpumap; + int cpumaplen; + + VIR_DEBUG("Setting CPU affinity"); + + if (nodeGetInfo(NULL, &nodeinfo) < 0) + return -1; + + /* setaffinity fails if you set bits for CPUs which + * aren't present, so we have to limit ourselves */ + hostcpus = VIR_NODEINFO_MAXCPUS(nodeinfo); + if (maxcpu > hostcpus) + maxcpu = hostcpus; + + cpumaplen = VIR_CPU_MAPLEN(maxcpu); + if (VIR_ALLOC_N(cpumap, cpumaplen) < 0) { + virReportOOMError(); + return -1; + } + + if (def->cpumask) { + /* XXX why don't we keep 'cpumask' in the libvirt cpumap + * format to start with ?!?! */ + for (i = 0 ; i < maxcpu && i < def->cpumasklen ; i++) + if (def->cpumask[i]) + VIR_USE_CPU(cpumap, i); + } else { + /* You may think this is redundant, but we can't assume libvirtd + * itself is running on all pCPUs, so we need to explicitly set + * the spawned QEMU instance to all pCPUs if no map is given in
Hum you mean LXC container instead of QEmu, right ?
+ * its config file */ + for (i = 0 ; i < maxcpu ; i++) + VIR_USE_CPU(cpumap, i); + } + + /* We are pressuming we are running between fork/exec of QEMU
idem
Yes, these should obviously have been s/QEMU/LXC/ :-) Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

From: "Daniel P. Berrange" <berrange@redhat.com> * src/lxc/lxc_driver.c: Support changing quota/period for LXC containers * src/lxc/lxc_controller.c: Set initial quota/period at startup --- src/lxc/lxc_controller.c | 49 ++++- src/lxc/lxc_driver.c | 463 ++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 450 insertions(+), 62 deletions(-) diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index 1db25fb..1685e15 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -382,6 +382,42 @@ static int lxcSetContainerCpuAffinity(virDomainDefPtr def) } +static int lxcSetContainerCpuTune(virCgroupPtr cgroup, virDomainDefPtr def) +{ + int ret = -1; + if (def->cputune.shares != 0) { + int rc = virCgroupSetCpuShares(cgroup, def->cputune.shares); + if (rc != 0) { + virReportSystemError(-rc, + _("Unable to set io cpu shares for domain %s"), + def->name); + goto cleanup; + } + } + if (def->cputune.quota != 0) { + int rc = virCgroupSetCpuCfsQuota(cgroup, def->cputune.quota); + if (rc != 0) { + virReportSystemError(-rc, + _("Unable to set io cpu quota for domain %s"), + def->name); + goto cleanup; + } + } + if (def->cputune.period != 0) { + int rc = virCgroupSetCpuCfsPeriod(cgroup, def->cputune.period); + if (rc != 0) { + virReportSystemError(-rc, + _("Unable to set io cpu period for domain %s"), + def->name); + goto cleanup; + } + } + ret = 0; +cleanup: + return ret; +} + + /** * lxcSetContainerResources * @def: pointer to virtual machine structure @@ -432,6 +468,9 @@ static int lxcSetContainerResources(virDomainDefPtr def) goto cleanup; } + if (lxcSetContainerCpuTune(cgroup, def) < 0) + goto cleanup; + if (def->blkio.weight) { rc = virCgroupSetBlkioWeight(cgroup, def->blkio.weight); if (rc != 0) { @@ -442,16 +481,6 @@ static int lxcSetContainerResources(virDomainDefPtr def) } } - if (def->cputune.shares) { - rc = virCgroupSetCpuShares(cgroup, def->cputune.shares); - if (rc != 0) { - virReportSystemError(-rc, - _("Unable to set cpu shares for domain %s"), - def->name); - goto cleanup; - } - } - rc = virCgroupSetMemory(cgroup, def->mem.max_balloon); if (rc != 0) { virReportSystemError(-rc, diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c index 1110c45..4445b5c 100644 --- a/src/lxc/lxc_driver.c +++ b/src/lxc/lxc_driver.c @@ -2596,84 +2596,328 @@ static int lxcVersion(virConnectPtr conn ATTRIBUTE_UNUSED, unsigned long *versio return 0; } -static char *lxcGetSchedulerType(virDomainPtr domain ATTRIBUTE_UNUSED, + +/* + * check whether the host supports CFS bandwidth + * + * Return 1 when CFS bandwidth is supported, 0 when CFS bandwidth is not + * supported, -1 on error. + */ +static int lxcGetCpuBWStatus(virCgroupPtr cgroup) +{ + char *cfs_period_path = NULL; + int ret = -1; + + if (!cgroup) + return 0; + + if (virCgroupPathOfController(cgroup, VIR_CGROUP_CONTROLLER_CPU, + "cpu.cfs_period_us", &cfs_period_path) < 0) { + VIR_INFO("cannot get the path of cgroup CPU controller"); + ret = 0; + goto cleanup; + } + + if (access(cfs_period_path, F_OK) < 0) { + ret = 0; + } else { + ret = 1; + } + +cleanup: + VIR_FREE(cfs_period_path); + return ret; +} + + +static bool lxcCgroupControllerActive(lxc_driver_t *driver, + int controller) +{ + if (driver->cgroup == NULL) + return false; + if (controller < 0 || controller >= VIR_CGROUP_CONTROLLER_LAST) + return false; + if (!virCgroupMounted(driver->cgroup, controller)) + return false; +#if 0 + if (driver->cgroupControllers & (1 << controller)) + return true; +#endif + return false; +} + + + +static char *lxcGetSchedulerType(virDomainPtr domain, int *nparams) { - char *schedulerType = NULL; + lxc_driver_t *driver = domain->conn->privateData; + char *ret = NULL; + int rc; - if (nparams) - *nparams = 1; + lxcDriverLock(driver); + if (!lxcCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { + lxcError(VIR_ERR_OPERATION_INVALID, + "%s", _("cgroup CPU controller is not mounted")); + goto cleanup; + } - schedulerType = strdup("posix"); + if (nparams) { + rc = lxcGetCpuBWStatus(driver->cgroup); + if (rc < 0) + goto cleanup; + else if (rc == 0) + *nparams = 1; + else + *nparams = 3; + } - if (schedulerType == NULL) + ret = strdup("posix"); + if (!ret) virReportOOMError(); - return schedulerType; +cleanup: + lxcDriverUnlock(driver); + return ret; } + static int -lxcSetSchedulerParametersFlags(virDomainPtr domain, +lxcGetVcpuBWLive(virCgroupPtr cgroup, unsigned long long *period, + long long *quota) +{ + int rc; + + rc = virCgroupGetCpuCfsPeriod(cgroup, period); + if (rc < 0) { + virReportSystemError(-rc, "%s", + _("unable to get cpu bandwidth period tunable")); + return -1; + } + + rc = virCgroupGetCpuCfsQuota(cgroup, quota); + if (rc < 0) { + virReportSystemError(-rc, "%s", + _("unable to get cpu bandwidth tunable")); + return -1; + } + + return 0; +} + + +static int lxcSetVcpuBWLive(virCgroupPtr cgroup, unsigned long long period, + long long quota) +{ + int rc; + unsigned long long old_period; + + if (period == 0 && quota == 0) + return 0; + + if (period) { + /* get old period, and we can rollback if set quota failed */ + rc = virCgroupGetCpuCfsPeriod(cgroup, &old_period); + if (rc < 0) { + virReportSystemError(-rc, + "%s", _("Unable to get cpu bandwidth period")); + return -1; + } + + rc = virCgroupSetCpuCfsPeriod(cgroup, period); + if (rc < 0) { + virReportSystemError(-rc, + "%s", _("Unable to set cpu bandwidth period")); + return -1; + } + } + + if (quota) { + rc = virCgroupSetCpuCfsQuota(cgroup, quota); + if (rc < 0) { + virReportSystemError(-rc, + "%s", _("Unable to set cpu bandwidth quota")); + goto cleanup; + } + } + + return 0; + +cleanup: + if (period) { + rc = virCgroupSetCpuCfsPeriod(cgroup, old_period); + if (rc < 0) + virReportSystemError(-rc, + _("%s"), + "Unable to rollback cpu bandwidth period"); + } + + return -1; +} + + +static int +lxcSetSchedulerParametersFlags(virDomainPtr dom, virTypedParameterPtr params, int nparams, unsigned int flags) { - lxc_driver_t *driver = domain->conn->privateData; + lxc_driver_t *driver = dom->conn->privateData; int i; virCgroupPtr group = NULL; virDomainObjPtr vm = NULL; + virDomainDefPtr vmdef = NULL; int ret = -1; + bool isActive; + int rc; - virCheckFlags(0, -1); - - if (driver->cgroup == NULL) - return -1; + virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | + VIR_DOMAIN_AFFECT_CONFIG, -1); lxcDriverLock(driver); - vm = virDomainFindByUUID(&driver->domains, domain->uuid); + + vm = virDomainFindByUUID(&driver->domains, dom->uuid); if (vm == NULL) { - char uuidstr[VIR_UUID_STRING_BUFLEN]; - virUUIDFormat(domain->uuid, uuidstr); - lxcError(VIR_ERR_NO_DOMAIN, - _("No domain with matching uuid '%s'"), uuidstr); + lxcError(VIR_ERR_INTERNAL_ERROR, + _("No such domain %s"), dom->uuid); goto cleanup; } - if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0) - goto cleanup; + isActive = virDomainObjIsActive(vm); + + if (flags == VIR_DOMAIN_AFFECT_CURRENT) { + if (isActive) + flags = VIR_DOMAIN_AFFECT_LIVE; + else + flags = VIR_DOMAIN_AFFECT_CONFIG; + } + + if (flags & VIR_DOMAIN_AFFECT_CONFIG) { + if (!vm->persistent) { + lxcError(VIR_ERR_OPERATION_INVALID, "%s", + _("cannot change persistent config of a transient domain")); + goto cleanup; + } + + /* Make a copy for updated domain. */ + vmdef = virDomainObjCopyPersistentDef(driver->caps, vm); + if (!vmdef) + goto cleanup; + } + + if (flags & VIR_DOMAIN_AFFECT_LIVE) { + if (!isActive) { + lxcError(VIR_ERR_OPERATION_INVALID, + "%s", _("domain is not running")); + goto cleanup; + } + + if (!lxcCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { + lxcError(VIR_ERR_OPERATION_INVALID, + "%s", _("cgroup CPU controller is not mounted")); + goto cleanup; + } + if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0) { + lxcError(VIR_ERR_INTERNAL_ERROR, + _("cannot find cgroup for domain %s"), + vm->def->name); + goto cleanup; + } + } for (i = 0; i < nparams; i++) { virTypedParameterPtr param = ¶ms[i]; - if (STRNEQ(param->field, VIR_DOMAIN_SCHEDULER_CPU_SHARES)) { + if (STREQ(param->field, VIR_DOMAIN_SCHEDULER_CPU_SHARES)) { + if (param->type != VIR_TYPED_PARAM_ULLONG) { + lxcError(VIR_ERR_INVALID_ARG, "%s", + _("invalid type for cpu_shares tunable, expected a 'ullong'")); + goto cleanup; + } + + if (flags & VIR_DOMAIN_AFFECT_LIVE) { + rc = virCgroupSetCpuShares(group, params[i].value.ul); + if (rc != 0) { + virReportSystemError(-rc, "%s", + _("unable to set cpu shares tunable")); + goto cleanup; + } + + vm->def->cputune.shares = params[i].value.ul; + } + + if (flags & VIR_DOMAIN_AFFECT_CONFIG) { + vmdef->cputune.shares = params[i].value.ul; + } + } else if (STREQ(param->field, VIR_DOMAIN_SCHEDULER_VCPU_PERIOD)) { + if (param->type != VIR_TYPED_PARAM_ULLONG) { + lxcError(VIR_ERR_INVALID_ARG, "%s", + _("invalid type for vcpu_period tunable," + " expected a 'ullong'")); + goto cleanup; + } + + if (flags & VIR_DOMAIN_AFFECT_LIVE) { + rc = lxcSetVcpuBWLive(group, params[i].value.ul, 0); + if (rc != 0) + goto cleanup; + + if (params[i].value.ul) + vm->def->cputune.period = params[i].value.ul; + } + + if (flags & VIR_DOMAIN_AFFECT_CONFIG) { + vmdef->cputune.period = params[i].value.ul; + } + } else if (STREQ(param->field, VIR_DOMAIN_SCHEDULER_VCPU_QUOTA)) { + if (param->type != VIR_TYPED_PARAM_LLONG) { + lxcError(VIR_ERR_INVALID_ARG, "%s", + _("invalid type for vcpu_quota tunable," + " expected a 'llong'")); + goto cleanup; + } + + if (flags & VIR_DOMAIN_AFFECT_LIVE) { + rc = lxcSetVcpuBWLive(group, 0, params[i].value.l); + if (rc != 0) + goto cleanup; + + if (params[i].value.l) + vm->def->cputune.quota = params[i].value.l; + } + + if (flags & VIR_DOMAIN_AFFECT_CONFIG) { + vmdef->cputune.quota = params[i].value.l; + } + } else { lxcError(VIR_ERR_INVALID_ARG, _("Invalid parameter `%s'"), param->field); goto cleanup; } + } - if (param->type != VIR_TYPED_PARAM_ULLONG) { - lxcError(VIR_ERR_INVALID_ARG, "%s", - _("Invalid type for cpu_shares tunable, expected a 'ullong'")); - goto cleanup; - } + if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0) + goto cleanup; - int rc = virCgroupSetCpuShares(group, params[i].value.ul); - if (rc != 0) { - virReportSystemError(-rc, _("failed to set cpu_shares=%llu"), - params[i].value.ul); + + if (flags & VIR_DOMAIN_AFFECT_CONFIG) { + rc = virDomainSaveConfig(driver->configDir, vmdef); + if (rc < 0) goto cleanup; - } - vm->def->cputune.shares = params[i].value.ul; + virDomainObjAssignDef(vm, vmdef, false); + vmdef = NULL; } + ret = 0; cleanup: - lxcDriverUnlock(driver); + virDomainDefFree(vmdef); virCgroupFree(&group); if (vm) virDomainObjUnlock(vm); + lxcDriverUnlock(driver); return ret; } @@ -2686,55 +2930,170 @@ lxcSetSchedulerParameters(virDomainPtr domain, } static int -lxcGetSchedulerParametersFlags(virDomainPtr domain, +lxcGetSchedulerParametersFlags(virDomainPtr dom, virTypedParameterPtr params, int *nparams, unsigned int flags) { - lxc_driver_t *driver = domain->conn->privateData; + lxc_driver_t *driver = dom->conn->privateData; virCgroupPtr group = NULL; virDomainObjPtr vm = NULL; - unsigned long long val; + unsigned long long shares = 0; + unsigned long long period = 0; + long long quota = 0; int ret = -1; + int rc; + bool isActive; + bool cpu_bw_status = false; + int saved_nparams = 0; - virCheckFlags(0, -1); - - if (driver->cgroup == NULL) - return -1; + virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | + VIR_DOMAIN_AFFECT_CONFIG, -1); lxcDriverLock(driver); - vm = virDomainFindByUUID(&driver->domains, domain->uuid); + + if ((flags & (VIR_DOMAIN_AFFECT_LIVE | VIR_DOMAIN_AFFECT_CONFIG)) == + (VIR_DOMAIN_AFFECT_LIVE | VIR_DOMAIN_AFFECT_CONFIG)) { + lxcError(VIR_ERR_INVALID_ARG, "%s", + _("cannot query live and config together")); + goto cleanup; + } + + if (*nparams > 1) { + rc = lxcGetCpuBWStatus(driver->cgroup); + if (rc < 0) + goto cleanup; + cpu_bw_status = !!rc; + } + + vm = virDomainFindByUUID(&driver->domains, dom->uuid); if (vm == NULL) { - char uuidstr[VIR_UUID_STRING_BUFLEN]; - virUUIDFormat(domain->uuid, uuidstr); - lxcError(VIR_ERR_NO_DOMAIN, - _("No domain with matching uuid '%s'"), uuidstr); + lxcError(VIR_ERR_INTERNAL_ERROR, + _("No such domain %s"), dom->uuid); + goto cleanup; + } + + isActive = virDomainObjIsActive(vm); + + if (flags == VIR_DOMAIN_AFFECT_CURRENT) { + if (isActive) + flags = VIR_DOMAIN_AFFECT_LIVE; + else + flags = VIR_DOMAIN_AFFECT_CONFIG; + } + + if (flags & VIR_DOMAIN_AFFECT_CONFIG) { + if (!vm->persistent) { + lxcError(VIR_ERR_OPERATION_INVALID, "%s", + _("cannot query persistent config of a transient domain")); + goto cleanup; + } + + if (isActive) { + virDomainDefPtr persistentDef; + + persistentDef = virDomainObjGetPersistentDef(driver->caps, vm); + if (!persistentDef) { + lxcError(VIR_ERR_INTERNAL_ERROR, "%s", + _("can't get persistentDef")); + goto cleanup; + } + shares = persistentDef->cputune.shares; + if (*nparams > 1 && cpu_bw_status) { + period = persistentDef->cputune.period; + quota = persistentDef->cputune.quota; + } + } else { + shares = vm->def->cputune.shares; + if (*nparams > 1 && cpu_bw_status) { + period = vm->def->cputune.period; + quota = vm->def->cputune.quota; + } + } + goto out; + } + + if (!isActive) { + lxcError(VIR_ERR_OPERATION_INVALID, "%s", + _("domain is not running")); + goto cleanup; + } + + if (!lxcCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { + lxcError(VIR_ERR_OPERATION_INVALID, + "%s", _("cgroup CPU controller is not mounted")); goto cleanup; } - if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0) + if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0) { + lxcError(VIR_ERR_INTERNAL_ERROR, + _("cannot find cgroup for domain %s"), vm->def->name); goto cleanup; + } - if (virCgroupGetCpuShares(group, &val) != 0) + rc = virCgroupGetCpuShares(group, &shares); + if (rc != 0) { + virReportSystemError(-rc, "%s", + _("unable to get cpu shares tunable")); goto cleanup; - params[0].value.ul = val; + } + + if (*nparams > 1 && cpu_bw_status) { + rc = lxcGetVcpuBWLive(group, &period, "a); + if (rc != 0) + goto cleanup; + } +out: + params[0].value.ul = shares; + params[0].type = VIR_TYPED_PARAM_ULLONG; if (virStrcpyStatic(params[0].field, VIR_DOMAIN_SCHEDULER_CPU_SHARES) == NULL) { lxcError(VIR_ERR_INTERNAL_ERROR, - "%s", _("Field cpu_shares too big for destination")); + _("Field name '%s' too long"), + VIR_DOMAIN_SCHEDULER_CPU_SHARES); goto cleanup; } - params[0].type = VIR_TYPED_PARAM_ULLONG; - *nparams = 1; + saved_nparams++; + + if (cpu_bw_status) { + if (*nparams > saved_nparams) { + params[1].value.ul = period; + params[1].type = VIR_TYPED_PARAM_ULLONG; + if (virStrcpyStatic(params[1].field, + VIR_DOMAIN_SCHEDULER_VCPU_PERIOD) == NULL) { + lxcError(VIR_ERR_INTERNAL_ERROR, + _("Field name '%s' too long"), + VIR_DOMAIN_SCHEDULER_VCPU_PERIOD); + goto cleanup; + } + saved_nparams++; + } + + if (*nparams > saved_nparams) { + params[2].value.ul = quota; + params[2].type = VIR_TYPED_PARAM_LLONG; + if (virStrcpyStatic(params[2].field, + VIR_DOMAIN_SCHEDULER_VCPU_QUOTA) == NULL) { + lxcError(VIR_ERR_INTERNAL_ERROR, + _("Field name '%s' too long"), + VIR_DOMAIN_SCHEDULER_VCPU_QUOTA); + goto cleanup; + } + saved_nparams++; + } + } + + *nparams = saved_nparams; + ret = 0; cleanup: - lxcDriverUnlock(driver); virCgroupFree(&group); if (vm) virDomainObjUnlock(vm); + lxcDriverUnlock(driver); return ret; } -- 1.7.6.4

On Thu, Nov 24, 2011 at 11:38:14AM +0000, Daniel P. Berrange wrote:
From: "Daniel P. Berrange" <berrange@redhat.com>
* src/lxc/lxc_driver.c: Support changing quota/period for LXC containers * src/lxc/lxc_controller.c: Set initial quota/period at startup --- src/lxc/lxc_controller.c | 49 ++++- src/lxc/lxc_driver.c | 463 ++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 450 insertions(+), 62 deletions(-)
diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index 1db25fb..1685e15 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -382,6 +382,42 @@ static int lxcSetContainerCpuAffinity(virDomainDefPtr def) }
+static int lxcSetContainerCpuTune(virCgroupPtr cgroup, virDomainDefPtr def) +{ + int ret = -1; + if (def->cputune.shares != 0) { + int rc = virCgroupSetCpuShares(cgroup, def->cputune.shares); + if (rc != 0) { + virReportSystemError(-rc, + _("Unable to set io cpu shares for domain %s"), + def->name); + goto cleanup; + } + } + if (def->cputune.quota != 0) { + int rc = virCgroupSetCpuCfsQuota(cgroup, def->cputune.quota); + if (rc != 0) { + virReportSystemError(-rc, + _("Unable to set io cpu quota for domain %s"), + def->name); + goto cleanup; + } + } + if (def->cputune.period != 0) { + int rc = virCgroupSetCpuCfsPeriod(cgroup, def->cputune.period); + if (rc != 0) { + virReportSystemError(-rc, + _("Unable to set io cpu period for domain %s"), + def->name); + goto cleanup; + } + } + ret = 0; +cleanup: + return ret; +} + + /** * lxcSetContainerResources * @def: pointer to virtual machine structure @@ -432,6 +468,9 @@ static int lxcSetContainerResources(virDomainDefPtr def) goto cleanup; }
+ if (lxcSetContainerCpuTune(cgroup, def) < 0) + goto cleanup; + if (def->blkio.weight) { rc = virCgroupSetBlkioWeight(cgroup, def->blkio.weight); if (rc != 0) { @@ -442,16 +481,6 @@ static int lxcSetContainerResources(virDomainDefPtr def) } }
- if (def->cputune.shares) { - rc = virCgroupSetCpuShares(cgroup, def->cputune.shares); - if (rc != 0) { - virReportSystemError(-rc, - _("Unable to set cpu shares for domain %s"), - def->name); - goto cleanup; - } - } - rc = virCgroupSetMemory(cgroup, def->mem.max_balloon); if (rc != 0) { virReportSystemError(-rc, diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c index 1110c45..4445b5c 100644 --- a/src/lxc/lxc_driver.c +++ b/src/lxc/lxc_driver.c @@ -2596,84 +2596,328 @@ static int lxcVersion(virConnectPtr conn ATTRIBUTE_UNUSED, unsigned long *versio return 0; }
-static char *lxcGetSchedulerType(virDomainPtr domain ATTRIBUTE_UNUSED, + +/* + * check whether the host supports CFS bandwidth + * + * Return 1 when CFS bandwidth is supported, 0 when CFS bandwidth is not + * supported, -1 on error. + */ +static int lxcGetCpuBWStatus(virCgroupPtr cgroup) +{ + char *cfs_period_path = NULL; + int ret = -1; + + if (!cgroup) + return 0; + + if (virCgroupPathOfController(cgroup, VIR_CGROUP_CONTROLLER_CPU, + "cpu.cfs_period_us", &cfs_period_path) < 0) { + VIR_INFO("cannot get the path of cgroup CPU controller"); + ret = 0; + goto cleanup; + } + + if (access(cfs_period_path, F_OK) < 0) { + ret = 0; + } else { + ret = 1; + } + +cleanup: + VIR_FREE(cfs_period_path); + return ret; +} + + +static bool lxcCgroupControllerActive(lxc_driver_t *driver, + int controller) +{ + if (driver->cgroup == NULL) + return false; + if (controller < 0 || controller >= VIR_CGROUP_CONTROLLER_LAST) + return false; + if (!virCgroupMounted(driver->cgroup, controller)) + return false; +#if 0 + if (driver->cgroupControllers & (1 << controller)) + return true; +#endif + return false; +} + + + +static char *lxcGetSchedulerType(virDomainPtr domain, int *nparams) { - char *schedulerType = NULL; + lxc_driver_t *driver = domain->conn->privateData; + char *ret = NULL; + int rc;
- if (nparams) - *nparams = 1; + lxcDriverLock(driver); + if (!lxcCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { + lxcError(VIR_ERR_OPERATION_INVALID, + "%s", _("cgroup CPU controller is not mounted")); + goto cleanup; + }
- schedulerType = strdup("posix"); + if (nparams) { + rc = lxcGetCpuBWStatus(driver->cgroup); + if (rc < 0) + goto cleanup; + else if (rc == 0) + *nparams = 1; + else + *nparams = 3; + }
- if (schedulerType == NULL) + ret = strdup("posix"); + if (!ret) virReportOOMError();
- return schedulerType; +cleanup: + lxcDriverUnlock(driver); + return ret; }
+ static int -lxcSetSchedulerParametersFlags(virDomainPtr domain, +lxcGetVcpuBWLive(virCgroupPtr cgroup, unsigned long long *period, + long long *quota) +{ + int rc; + + rc = virCgroupGetCpuCfsPeriod(cgroup, period); + if (rc < 0) { + virReportSystemError(-rc, "%s", + _("unable to get cpu bandwidth period tunable")); + return -1; + } + + rc = virCgroupGetCpuCfsQuota(cgroup, quota); + if (rc < 0) { + virReportSystemError(-rc, "%s", + _("unable to get cpu bandwidth tunable")); + return -1; + } + + return 0; +} + + +static int lxcSetVcpuBWLive(virCgroupPtr cgroup, unsigned long long period, + long long quota) +{ + int rc; + unsigned long long old_period; + + if (period == 0 && quota == 0) + return 0; + + if (period) { + /* get old period, and we can rollback if set quota failed */ + rc = virCgroupGetCpuCfsPeriod(cgroup, &old_period); + if (rc < 0) { + virReportSystemError(-rc, + "%s", _("Unable to get cpu bandwidth period")); + return -1; + } + + rc = virCgroupSetCpuCfsPeriod(cgroup, period); + if (rc < 0) { + virReportSystemError(-rc, + "%s", _("Unable to set cpu bandwidth period")); + return -1; + } + } + + if (quota) { + rc = virCgroupSetCpuCfsQuota(cgroup, quota); + if (rc < 0) { + virReportSystemError(-rc, + "%s", _("Unable to set cpu bandwidth quota")); + goto cleanup; + } + } + + return 0; + +cleanup: + if (period) { + rc = virCgroupSetCpuCfsPeriod(cgroup, old_period); + if (rc < 0) + virReportSystemError(-rc, + _("%s"), + "Unable to rollback cpu bandwidth period"); + } + + return -1; +} + + +static int +lxcSetSchedulerParametersFlags(virDomainPtr dom, virTypedParameterPtr params, int nparams, unsigned int flags) { - lxc_driver_t *driver = domain->conn->privateData; + lxc_driver_t *driver = dom->conn->privateData; int i; virCgroupPtr group = NULL; virDomainObjPtr vm = NULL; + virDomainDefPtr vmdef = NULL; int ret = -1; + bool isActive; + int rc;
- virCheckFlags(0, -1); - - if (driver->cgroup == NULL) - return -1; + virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | + VIR_DOMAIN_AFFECT_CONFIG, -1);
lxcDriverLock(driver); - vm = virDomainFindByUUID(&driver->domains, domain->uuid); + + vm = virDomainFindByUUID(&driver->domains, dom->uuid);
if (vm == NULL) { - char uuidstr[VIR_UUID_STRING_BUFLEN]; - virUUIDFormat(domain->uuid, uuidstr); - lxcError(VIR_ERR_NO_DOMAIN, - _("No domain with matching uuid '%s'"), uuidstr); + lxcError(VIR_ERR_INTERNAL_ERROR, + _("No such domain %s"), dom->uuid); goto cleanup; }
- if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0) - goto cleanup; + isActive = virDomainObjIsActive(vm); + + if (flags == VIR_DOMAIN_AFFECT_CURRENT) { + if (isActive) + flags = VIR_DOMAIN_AFFECT_LIVE; + else + flags = VIR_DOMAIN_AFFECT_CONFIG; + } + + if (flags & VIR_DOMAIN_AFFECT_CONFIG) { + if (!vm->persistent) { + lxcError(VIR_ERR_OPERATION_INVALID, "%s", + _("cannot change persistent config of a transient domain")); + goto cleanup; + } + + /* Make a copy for updated domain. */ + vmdef = virDomainObjCopyPersistentDef(driver->caps, vm); + if (!vmdef) + goto cleanup; + } + + if (flags & VIR_DOMAIN_AFFECT_LIVE) { + if (!isActive) { + lxcError(VIR_ERR_OPERATION_INVALID, + "%s", _("domain is not running")); + goto cleanup; + } + + if (!lxcCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { + lxcError(VIR_ERR_OPERATION_INVALID, + "%s", _("cgroup CPU controller is not mounted")); + goto cleanup; + } + if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0) { + lxcError(VIR_ERR_INTERNAL_ERROR, + _("cannot find cgroup for domain %s"), + vm->def->name); + goto cleanup; + } + }
for (i = 0; i < nparams; i++) { virTypedParameterPtr param = ¶ms[i];
- if (STRNEQ(param->field, VIR_DOMAIN_SCHEDULER_CPU_SHARES)) { + if (STREQ(param->field, VIR_DOMAIN_SCHEDULER_CPU_SHARES)) { + if (param->type != VIR_TYPED_PARAM_ULLONG) { + lxcError(VIR_ERR_INVALID_ARG, "%s", + _("invalid type for cpu_shares tunable, expected a 'ullong'")); + goto cleanup; + } + + if (flags & VIR_DOMAIN_AFFECT_LIVE) { + rc = virCgroupSetCpuShares(group, params[i].value.ul); + if (rc != 0) { + virReportSystemError(-rc, "%s", + _("unable to set cpu shares tunable")); + goto cleanup; + } + + vm->def->cputune.shares = params[i].value.ul; + } + + if (flags & VIR_DOMAIN_AFFECT_CONFIG) { + vmdef->cputune.shares = params[i].value.ul; + } + } else if (STREQ(param->field, VIR_DOMAIN_SCHEDULER_VCPU_PERIOD)) { + if (param->type != VIR_TYPED_PARAM_ULLONG) { + lxcError(VIR_ERR_INVALID_ARG, "%s", + _("invalid type for vcpu_period tunable," + " expected a 'ullong'")); + goto cleanup; + } + + if (flags & VIR_DOMAIN_AFFECT_LIVE) { + rc = lxcSetVcpuBWLive(group, params[i].value.ul, 0); + if (rc != 0) + goto cleanup; + + if (params[i].value.ul) + vm->def->cputune.period = params[i].value.ul; + } + + if (flags & VIR_DOMAIN_AFFECT_CONFIG) { + vmdef->cputune.period = params[i].value.ul; + } + } else if (STREQ(param->field, VIR_DOMAIN_SCHEDULER_VCPU_QUOTA)) { + if (param->type != VIR_TYPED_PARAM_LLONG) { + lxcError(VIR_ERR_INVALID_ARG, "%s", + _("invalid type for vcpu_quota tunable," + " expected a 'llong'")); + goto cleanup; + } + + if (flags & VIR_DOMAIN_AFFECT_LIVE) { + rc = lxcSetVcpuBWLive(group, 0, params[i].value.l); + if (rc != 0) + goto cleanup; + + if (params[i].value.l) + vm->def->cputune.quota = params[i].value.l; + } + + if (flags & VIR_DOMAIN_AFFECT_CONFIG) { + vmdef->cputune.quota = params[i].value.l; + } + } else { lxcError(VIR_ERR_INVALID_ARG, _("Invalid parameter `%s'"), param->field); goto cleanup; } + }
- if (param->type != VIR_TYPED_PARAM_ULLONG) { - lxcError(VIR_ERR_INVALID_ARG, "%s", - _("Invalid type for cpu_shares tunable, expected a 'ullong'")); - goto cleanup; - } + if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0) + goto cleanup;
- int rc = virCgroupSetCpuShares(group, params[i].value.ul); - if (rc != 0) { - virReportSystemError(-rc, _("failed to set cpu_shares=%llu"), - params[i].value.ul); + + if (flags & VIR_DOMAIN_AFFECT_CONFIG) { + rc = virDomainSaveConfig(driver->configDir, vmdef); + if (rc < 0) goto cleanup; - }
- vm->def->cputune.shares = params[i].value.ul; + virDomainObjAssignDef(vm, vmdef, false); + vmdef = NULL; } + ret = 0;
cleanup: - lxcDriverUnlock(driver); + virDomainDefFree(vmdef); virCgroupFree(&group); if (vm) virDomainObjUnlock(vm); + lxcDriverUnlock(driver); return ret; }
@@ -2686,55 +2930,170 @@ lxcSetSchedulerParameters(virDomainPtr domain, }
static int -lxcGetSchedulerParametersFlags(virDomainPtr domain, +lxcGetSchedulerParametersFlags(virDomainPtr dom, virTypedParameterPtr params, int *nparams, unsigned int flags) { - lxc_driver_t *driver = domain->conn->privateData; + lxc_driver_t *driver = dom->conn->privateData; virCgroupPtr group = NULL; virDomainObjPtr vm = NULL; - unsigned long long val; + unsigned long long shares = 0; + unsigned long long period = 0; + long long quota = 0; int ret = -1; + int rc; + bool isActive; + bool cpu_bw_status = false; + int saved_nparams = 0;
- virCheckFlags(0, -1); - - if (driver->cgroup == NULL) - return -1; + virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | + VIR_DOMAIN_AFFECT_CONFIG, -1);
lxcDriverLock(driver); - vm = virDomainFindByUUID(&driver->domains, domain->uuid); + + if ((flags & (VIR_DOMAIN_AFFECT_LIVE | VIR_DOMAIN_AFFECT_CONFIG)) == + (VIR_DOMAIN_AFFECT_LIVE | VIR_DOMAIN_AFFECT_CONFIG)) { + lxcError(VIR_ERR_INVALID_ARG, "%s", + _("cannot query live and config together")); + goto cleanup; + } + + if (*nparams > 1) { + rc = lxcGetCpuBWStatus(driver->cgroup); + if (rc < 0) + goto cleanup; + cpu_bw_status = !!rc; + } + + vm = virDomainFindByUUID(&driver->domains, dom->uuid);
if (vm == NULL) { - char uuidstr[VIR_UUID_STRING_BUFLEN]; - virUUIDFormat(domain->uuid, uuidstr); - lxcError(VIR_ERR_NO_DOMAIN, - _("No domain with matching uuid '%s'"), uuidstr); + lxcError(VIR_ERR_INTERNAL_ERROR, + _("No such domain %s"), dom->uuid); + goto cleanup; + } + + isActive = virDomainObjIsActive(vm); + + if (flags == VIR_DOMAIN_AFFECT_CURRENT) { + if (isActive) + flags = VIR_DOMAIN_AFFECT_LIVE; + else + flags = VIR_DOMAIN_AFFECT_CONFIG; + } + + if (flags & VIR_DOMAIN_AFFECT_CONFIG) { + if (!vm->persistent) { + lxcError(VIR_ERR_OPERATION_INVALID, "%s", + _("cannot query persistent config of a transient domain")); + goto cleanup; + } + + if (isActive) { + virDomainDefPtr persistentDef; + + persistentDef = virDomainObjGetPersistentDef(driver->caps, vm); + if (!persistentDef) { + lxcError(VIR_ERR_INTERNAL_ERROR, "%s", + _("can't get persistentDef")); + goto cleanup; + } + shares = persistentDef->cputune.shares; + if (*nparams > 1 && cpu_bw_status) { + period = persistentDef->cputune.period; + quota = persistentDef->cputune.quota; + } + } else { + shares = vm->def->cputune.shares; + if (*nparams > 1 && cpu_bw_status) { + period = vm->def->cputune.period; + quota = vm->def->cputune.quota; + } + } + goto out; + } + + if (!isActive) { + lxcError(VIR_ERR_OPERATION_INVALID, "%s", + _("domain is not running")); + goto cleanup; + } + + if (!lxcCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_CPU)) { + lxcError(VIR_ERR_OPERATION_INVALID, + "%s", _("cgroup CPU controller is not mounted")); goto cleanup; }
- if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0) + if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0) { + lxcError(VIR_ERR_INTERNAL_ERROR, + _("cannot find cgroup for domain %s"), vm->def->name); goto cleanup; + }
- if (virCgroupGetCpuShares(group, &val) != 0) + rc = virCgroupGetCpuShares(group, &shares); + if (rc != 0) { + virReportSystemError(-rc, "%s", + _("unable to get cpu shares tunable")); goto cleanup; - params[0].value.ul = val; + } + + if (*nparams > 1 && cpu_bw_status) { + rc = lxcGetVcpuBWLive(group, &period, "a); + if (rc != 0) + goto cleanup; + } +out: + params[0].value.ul = shares; + params[0].type = VIR_TYPED_PARAM_ULLONG; if (virStrcpyStatic(params[0].field, VIR_DOMAIN_SCHEDULER_CPU_SHARES) == NULL) { lxcError(VIR_ERR_INTERNAL_ERROR, - "%s", _("Field cpu_shares too big for destination")); + _("Field name '%s' too long"), + VIR_DOMAIN_SCHEDULER_CPU_SHARES); goto cleanup; } - params[0].type = VIR_TYPED_PARAM_ULLONG;
- *nparams = 1; + saved_nparams++; + + if (cpu_bw_status) { + if (*nparams > saved_nparams) { + params[1].value.ul = period; + params[1].type = VIR_TYPED_PARAM_ULLONG; + if (virStrcpyStatic(params[1].field, + VIR_DOMAIN_SCHEDULER_VCPU_PERIOD) == NULL) { + lxcError(VIR_ERR_INTERNAL_ERROR, + _("Field name '%s' too long"), + VIR_DOMAIN_SCHEDULER_VCPU_PERIOD); + goto cleanup; + } + saved_nparams++; + } + + if (*nparams > saved_nparams) { + params[2].value.ul = quota; + params[2].type = VIR_TYPED_PARAM_LLONG; + if (virStrcpyStatic(params[2].field, + VIR_DOMAIN_SCHEDULER_VCPU_QUOTA) == NULL) { + lxcError(VIR_ERR_INTERNAL_ERROR, + _("Field name '%s' too long"), + VIR_DOMAIN_SCHEDULER_VCPU_QUOTA); + goto cleanup; + } + saved_nparams++; + } + } + + *nparams = saved_nparams; + ret = 0;
cleanup: - lxcDriverUnlock(driver); virCgroupFree(&group); if (vm) virDomainObjUnlock(vm); + lxcDriverUnlock(driver); return ret; }
ACK, It tried to look if some of that code could not be moved as generic one under utils/cgroup.[ch] but it's not that easy to share with qemu driver, doesn't seems one would gain much Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

On Thu, Nov 24, 2011 at 10:05:50PM +0800, Daniel Veillard wrote:
On Thu, Nov 24, 2011 at 11:38:14AM +0000, Daniel P. Berrange wrote:
From: "Daniel P. Berrange" <berrange@redhat.com>
* src/lxc/lxc_driver.c: Support changing quota/period for LXC containers * src/lxc/lxc_controller.c: Set initial quota/period at startup --- src/lxc/lxc_controller.c | 49 ++++- src/lxc/lxc_driver.c | 463 ++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 450 insertions(+), 62 deletions(-)
ACK, It tried to look if some of that code could not be moved as generic one under utils/cgroup.[ch] but it's not that easy to share with qemu driver, doesn't seems one would gain much
It can't really go into utils/cgroup.[ch], but I think there is scope for creating a shared API in src/conf/domain_cgroup.[ch], like we did for domain_audit.[ch]. To be investigated later... Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

From: "Daniel P. Berrange" <berrange@redhat.com> * src/lxc/lxc_controller.c: Refactor setting of initial blkio tuning parameters * src/lxc/lxc_driver.c: Enable live change of blkio tuning --- src/lxc/lxc_controller.c | 31 ++++-- src/lxc/lxc_driver.c | 283 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 305 insertions(+), 9 deletions(-) diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index 1685e15..c17b172 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -418,6 +418,26 @@ cleanup: } +static int lxcSetContainerBlkioTune(virCgroupPtr cgroup, virDomainDefPtr def) +{ + int ret = -1; + + if (def->blkio.weight) { + int rc = virCgroupSetBlkioWeight(cgroup, def->blkio.weight); + if (rc != 0) { + virReportSystemError(-rc, + _("Unable to set Blkio weight for domain %s"), + def->name); + goto cleanup; + } + } + + ret = 0; +cleanup: + return ret; +} + + /** * lxcSetContainerResources * @def: pointer to virtual machine structure @@ -471,15 +491,8 @@ static int lxcSetContainerResources(virDomainDefPtr def) if (lxcSetContainerCpuTune(cgroup, def) < 0) goto cleanup; - if (def->blkio.weight) { - rc = virCgroupSetBlkioWeight(cgroup, def->blkio.weight); - if (rc != 0) { - virReportSystemError(-rc, - _("Unable to set Blkio weight for domain %s"), - def->name); - goto cleanup; - } - } + if (lxcSetContainerBlkioTune(cgroup, def) < 0) + goto cleanup; rc = virCgroupSetMemory(cgroup, def->mem.max_balloon); if (rc != 0) { diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c index 4445b5c..5099517 100644 --- a/src/lxc/lxc_driver.c +++ b/src/lxc/lxc_driver.c @@ -3105,6 +3105,287 @@ lxcGetSchedulerParameters(virDomainPtr domain, return lxcGetSchedulerParametersFlags(domain, params, nparams, 0); } + +static int lxcDomainSetBlkioParameters(virDomainPtr dom, + virTypedParameterPtr params, + int nparams, + unsigned int flags) +{ + lxc_driver_t *driver = dom->conn->privateData; + int i; + virCgroupPtr group = NULL; + virDomainObjPtr vm = NULL; + virDomainDefPtr persistentDef = NULL; + int ret = -1; + bool isActive; + + virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | + VIR_DOMAIN_AFFECT_CONFIG, -1); + lxcDriverLock(driver); + + vm = virDomainFindByUUID(&driver->domains, dom->uuid); + + if (vm == NULL) { + lxcError(VIR_ERR_INTERNAL_ERROR, + _("No such domain %s"), dom->uuid); + goto cleanup; + } + + isActive = virDomainObjIsActive(vm); + + if (flags == VIR_DOMAIN_AFFECT_CURRENT) { + if (isActive) + flags = VIR_DOMAIN_AFFECT_LIVE; + else + flags = VIR_DOMAIN_AFFECT_CONFIG; + } + + if (flags & VIR_DOMAIN_AFFECT_LIVE) { + if (!isActive) { + lxcError(VIR_ERR_OPERATION_INVALID, + "%s", _("domain is not running")); + goto cleanup; + } + + if (!lxcCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_BLKIO)) { + lxcError(VIR_ERR_OPERATION_INVALID, _("blkio cgroup isn't mounted")); + goto cleanup; + } + + if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0) { + lxcError(VIR_ERR_INTERNAL_ERROR, + _("cannot find cgroup for domain %s"), vm->def->name); + goto cleanup; + } + } + + if (flags & VIR_DOMAIN_AFFECT_CONFIG) { + if (!vm->persistent) { + lxcError(VIR_ERR_OPERATION_INVALID, "%s", + _("cannot change persistent config of a transient domain")); + goto cleanup; + } + if (!(persistentDef = virDomainObjGetPersistentDef(driver->caps, vm))) + goto cleanup; + } + + ret = 0; + if (flags & VIR_DOMAIN_AFFECT_LIVE) { + for (i = 0; i < nparams; i++) { + virTypedParameterPtr param = ¶ms[i]; + + if (STREQ(param->field, VIR_DOMAIN_BLKIO_WEIGHT)) { + int rc; + if (param->type != VIR_TYPED_PARAM_UINT) { + lxcError(VIR_ERR_INVALID_ARG, "%s", + _("invalid type for blkio weight tunable, expected a 'unsigned int'")); + ret = -1; + continue; + } + + if (params[i].value.ui > 1000 || params[i].value.ui < 100) { + lxcError(VIR_ERR_INVALID_ARG, "%s", + _("out of blkio weight range.")); + ret = -1; + continue; + } + + rc = virCgroupSetBlkioWeight(group, params[i].value.ui); + if (rc != 0) { + virReportSystemError(-rc, "%s", + _("unable to set blkio weight tunable")); + ret = -1; + } + } else { + lxcError(VIR_ERR_INVALID_ARG, + _("Parameter `%s' not supported"), param->field); + ret = -1; + } + } + } else if (flags & VIR_DOMAIN_AFFECT_CONFIG) { + /* Clang can't see that if we get here, persistentDef was set. */ + sa_assert(persistentDef); + + for (i = 0; i < nparams; i++) { + virTypedParameterPtr param = ¶ms[i]; + + if (STREQ(param->field, VIR_DOMAIN_BLKIO_WEIGHT)) { + if (param->type != VIR_TYPED_PARAM_UINT) { + lxcError(VIR_ERR_INVALID_ARG, "%s", + _("invalid type for blkio weight tunable, expected a 'unsigned int'")); + ret = -1; + continue; + } + + if (params[i].value.ui > 1000 || params[i].value.ui < 100) { + lxcError(VIR_ERR_INVALID_ARG, "%s", + _("out of blkio weight range.")); + ret = -1; + continue; + } + + persistentDef->blkio.weight = params[i].value.ui; + } else { + lxcError(VIR_ERR_INVALID_ARG, + _("Parameter `%s' not supported"), param->field); + ret = -1; + } + } + + if (virDomainSaveConfig(driver->configDir, persistentDef) < 0) + ret = -1; + } + +cleanup: + virCgroupFree(&group); + if (vm) + virDomainObjUnlock(vm); + lxcDriverUnlock(driver); + return ret; +} + + +#define LXC_NB_BLKIO_PARAM 1 +static int lxcDomainGetBlkioParameters(virDomainPtr dom, + virTypedParameterPtr params, + int *nparams, + unsigned int flags) +{ + lxc_driver_t *driver = dom->conn->privateData; + int i; + virCgroupPtr group = NULL; + virDomainObjPtr vm = NULL; + virDomainDefPtr persistentDef = NULL; + unsigned int val; + int ret = -1; + int rc; + bool isActive; + + virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | + VIR_DOMAIN_AFFECT_CONFIG, -1); + lxcDriverLock(driver); + + vm = virDomainFindByUUID(&driver->domains, dom->uuid); + + if (vm == NULL) { + lxcError(VIR_ERR_INTERNAL_ERROR, + _("No such domain %s"), dom->uuid); + goto cleanup; + } + + if ((*nparams) == 0) { + /* Current number of blkio parameters supported by cgroups */ + *nparams = LXC_NB_BLKIO_PARAM; + ret = 0; + goto cleanup; + } + + isActive = virDomainObjIsActive(vm); + + if (flags == VIR_DOMAIN_AFFECT_CURRENT) { + if (isActive) + flags = VIR_DOMAIN_AFFECT_LIVE; + else + flags = VIR_DOMAIN_AFFECT_CONFIG; + } + + if (flags & VIR_DOMAIN_AFFECT_LIVE) { + if (!isActive) { + lxcError(VIR_ERR_OPERATION_INVALID, + "%s", _("domain is not running")); + goto cleanup; + } + + if (!lxcCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_BLKIO)) { + lxcError(VIR_ERR_OPERATION_INVALID, _("blkio cgroup isn't mounted")); + goto cleanup; + } + + if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0) { + lxcError(VIR_ERR_INTERNAL_ERROR, + _("cannot find cgroup for domain %s"), vm->def->name); + goto cleanup; + } + } + + if (flags & VIR_DOMAIN_AFFECT_CONFIG) { + if (!vm->persistent) { + lxcError(VIR_ERR_OPERATION_INVALID, "%s", + _("cannot change persistent config of a transient domain")); + goto cleanup; + } + if (!(persistentDef = virDomainObjGetPersistentDef(driver->caps, vm))) + goto cleanup; + } + + if (flags & VIR_DOMAIN_AFFECT_LIVE) { + for (i = 0; i < *nparams && i < LXC_NB_BLKIO_PARAM; i++) { + virTypedParameterPtr param = ¶ms[i]; + val = 0; + param->value.ui = 0; + param->type = VIR_TYPED_PARAM_UINT; + + switch (i) { + case 0: /* fill blkio weight here */ + rc = virCgroupGetBlkioWeight(group, &val); + if (rc != 0) { + virReportSystemError(-rc, "%s", + _("unable to get blkio weight")); + goto cleanup; + } + if (virStrcpyStatic(param->field, VIR_DOMAIN_BLKIO_WEIGHT) == NULL) { + lxcError(VIR_ERR_INTERNAL_ERROR, + _("Field name '%s' too long"), + VIR_DOMAIN_BLKIO_WEIGHT); + goto cleanup; + } + param->value.ui = val; + break; + + default: + break; + /* should not hit here */ + } + } + } else if (flags & VIR_DOMAIN_AFFECT_CONFIG) { + for (i = 0; i < *nparams && i < LXC_NB_BLKIO_PARAM; i++) { + virTypedParameterPtr param = ¶ms[i]; + val = 0; + param->value.ui = 0; + param->type = VIR_TYPED_PARAM_UINT; + + switch (i) { + case 0: /* fill blkio weight here */ + if (virStrcpyStatic(param->field, VIR_DOMAIN_BLKIO_WEIGHT) == NULL) { + lxcError(VIR_ERR_INTERNAL_ERROR, + _("Field name '%s' too long"), + VIR_DOMAIN_BLKIO_WEIGHT); + goto cleanup; + } + param->value.ui = persistentDef->blkio.weight; + break; + + default: + break; + /* should not hit here */ + } + } + } + + if (LXC_NB_BLKIO_PARAM < *nparams) + *nparams = LXC_NB_BLKIO_PARAM; + ret = 0; + +cleanup: + if (group) + virCgroupFree(&group); + if (vm) + virDomainObjUnlock(vm); + lxcDriverUnlock(driver); + return ret; +} + + #ifdef __linux__ static int lxcDomainInterfaceStats(virDomainPtr dom, @@ -3600,6 +3881,8 @@ static virDriver lxcDriver = { .domainSetMemory = lxcDomainSetMemory, /* 0.7.2 */ .domainSetMemoryParameters = lxcDomainSetMemoryParameters, /* 0.8.5 */ .domainGetMemoryParameters = lxcDomainGetMemoryParameters, /* 0.8.5 */ + .domainSetBlkioParameters = lxcDomainSetBlkioParameters, /* 0.9.8 */ + .domainGetBlkioParameters = lxcDomainGetBlkioParameters, /* 0.9.8 */ .domainGetInfo = lxcDomainGetInfo, /* 0.4.2 */ .domainGetState = lxcDomainGetState, /* 0.9.2 */ .domainGetXMLDesc = lxcDomainGetXMLDesc, /* 0.4.2 */ -- 1.7.6.4

On Thu, Nov 24, 2011 at 11:38:15AM +0000, Daniel P. Berrange wrote:
From: "Daniel P. Berrange" <berrange@redhat.com>
* src/lxc/lxc_controller.c: Refactor setting of initial blkio tuning parameters * src/lxc/lxc_driver.c: Enable live change of blkio tuning --- src/lxc/lxc_controller.c | 31 ++++-- src/lxc/lxc_driver.c | 283 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 305 insertions(+), 9 deletions(-)
diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index 1685e15..c17b172 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -418,6 +418,26 @@ cleanup: }
+static int lxcSetContainerBlkioTune(virCgroupPtr cgroup, virDomainDefPtr def) +{ + int ret = -1; + + if (def->blkio.weight) { + int rc = virCgroupSetBlkioWeight(cgroup, def->blkio.weight); + if (rc != 0) { + virReportSystemError(-rc, + _("Unable to set Blkio weight for domain %s"), + def->name); + goto cleanup; + } + } + + ret = 0; +cleanup: + return ret; +} + + /** * lxcSetContainerResources * @def: pointer to virtual machine structure @@ -471,15 +491,8 @@ static int lxcSetContainerResources(virDomainDefPtr def) if (lxcSetContainerCpuTune(cgroup, def) < 0) goto cleanup;
- if (def->blkio.weight) { - rc = virCgroupSetBlkioWeight(cgroup, def->blkio.weight); - if (rc != 0) { - virReportSystemError(-rc, - _("Unable to set Blkio weight for domain %s"), - def->name); - goto cleanup; - } - } + if (lxcSetContainerBlkioTune(cgroup, def) < 0) + goto cleanup;
rc = virCgroupSetMemory(cgroup, def->mem.max_balloon); if (rc != 0) { diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c index 4445b5c..5099517 100644 --- a/src/lxc/lxc_driver.c +++ b/src/lxc/lxc_driver.c @@ -3105,6 +3105,287 @@ lxcGetSchedulerParameters(virDomainPtr domain, return lxcGetSchedulerParametersFlags(domain, params, nparams, 0); }
+ +static int lxcDomainSetBlkioParameters(virDomainPtr dom, + virTypedParameterPtr params, + int nparams, + unsigned int flags) +{ + lxc_driver_t *driver = dom->conn->privateData; + int i; + virCgroupPtr group = NULL; + virDomainObjPtr vm = NULL; + virDomainDefPtr persistentDef = NULL; + int ret = -1; + bool isActive; + + virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | + VIR_DOMAIN_AFFECT_CONFIG, -1); + lxcDriverLock(driver); + + vm = virDomainFindByUUID(&driver->domains, dom->uuid); + + if (vm == NULL) { + lxcError(VIR_ERR_INTERNAL_ERROR, + _("No such domain %s"), dom->uuid); + goto cleanup; + } + + isActive = virDomainObjIsActive(vm); + + if (flags == VIR_DOMAIN_AFFECT_CURRENT) { + if (isActive) + flags = VIR_DOMAIN_AFFECT_LIVE; + else + flags = VIR_DOMAIN_AFFECT_CONFIG; + } + + if (flags & VIR_DOMAIN_AFFECT_LIVE) { + if (!isActive) { + lxcError(VIR_ERR_OPERATION_INVALID, + "%s", _("domain is not running")); + goto cleanup; + } + + if (!lxcCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_BLKIO)) { + lxcError(VIR_ERR_OPERATION_INVALID, _("blkio cgroup isn't mounted")); + goto cleanup; + } + + if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0) { + lxcError(VIR_ERR_INTERNAL_ERROR, + _("cannot find cgroup for domain %s"), vm->def->name); + goto cleanup; + } + } + + if (flags & VIR_DOMAIN_AFFECT_CONFIG) { + if (!vm->persistent) { + lxcError(VIR_ERR_OPERATION_INVALID, "%s", + _("cannot change persistent config of a transient domain")); + goto cleanup; + } + if (!(persistentDef = virDomainObjGetPersistentDef(driver->caps, vm))) + goto cleanup; + } + + ret = 0; + if (flags & VIR_DOMAIN_AFFECT_LIVE) { + for (i = 0; i < nparams; i++) { + virTypedParameterPtr param = ¶ms[i]; + + if (STREQ(param->field, VIR_DOMAIN_BLKIO_WEIGHT)) { + int rc; + if (param->type != VIR_TYPED_PARAM_UINT) { + lxcError(VIR_ERR_INVALID_ARG, "%s", + _("invalid type for blkio weight tunable, expected a 'unsigned int'")); + ret = -1; + continue; + } + + if (params[i].value.ui > 1000 || params[i].value.ui < 100) { + lxcError(VIR_ERR_INVALID_ARG, "%s", + _("out of blkio weight range.")); + ret = -1; + continue; + } + + rc = virCgroupSetBlkioWeight(group, params[i].value.ui); + if (rc != 0) { + virReportSystemError(-rc, "%s", + _("unable to set blkio weight tunable")); + ret = -1; + } + } else { + lxcError(VIR_ERR_INVALID_ARG, + _("Parameter `%s' not supported"), param->field); + ret = -1; + } + } + } else if (flags & VIR_DOMAIN_AFFECT_CONFIG) { + /* Clang can't see that if we get here, persistentDef was set. */ + sa_assert(persistentDef); + + for (i = 0; i < nparams; i++) { + virTypedParameterPtr param = ¶ms[i]; + + if (STREQ(param->field, VIR_DOMAIN_BLKIO_WEIGHT)) { + if (param->type != VIR_TYPED_PARAM_UINT) { + lxcError(VIR_ERR_INVALID_ARG, "%s", + _("invalid type for blkio weight tunable, expected a 'unsigned int'")); + ret = -1; + continue; + } + + if (params[i].value.ui > 1000 || params[i].value.ui < 100) { + lxcError(VIR_ERR_INVALID_ARG, "%s", + _("out of blkio weight range.")); + ret = -1; + continue; + } + + persistentDef->blkio.weight = params[i].value.ui; + } else { + lxcError(VIR_ERR_INVALID_ARG, + _("Parameter `%s' not supported"), param->field); + ret = -1; + } + } + + if (virDomainSaveConfig(driver->configDir, persistentDef) < 0) + ret = -1; + } + +cleanup: + virCgroupFree(&group); + if (vm) + virDomainObjUnlock(vm); + lxcDriverUnlock(driver); + return ret; +} + + +#define LXC_NB_BLKIO_PARAM 1 +static int lxcDomainGetBlkioParameters(virDomainPtr dom, + virTypedParameterPtr params, + int *nparams, + unsigned int flags) +{ + lxc_driver_t *driver = dom->conn->privateData; + int i; + virCgroupPtr group = NULL; + virDomainObjPtr vm = NULL; + virDomainDefPtr persistentDef = NULL; + unsigned int val; + int ret = -1; + int rc; + bool isActive; + + virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | + VIR_DOMAIN_AFFECT_CONFIG, -1); + lxcDriverLock(driver); + + vm = virDomainFindByUUID(&driver->domains, dom->uuid); + + if (vm == NULL) { + lxcError(VIR_ERR_INTERNAL_ERROR, + _("No such domain %s"), dom->uuid); + goto cleanup; + } + + if ((*nparams) == 0) { + /* Current number of blkio parameters supported by cgroups */ + *nparams = LXC_NB_BLKIO_PARAM; + ret = 0; + goto cleanup; + } + + isActive = virDomainObjIsActive(vm); + + if (flags == VIR_DOMAIN_AFFECT_CURRENT) { + if (isActive) + flags = VIR_DOMAIN_AFFECT_LIVE; + else + flags = VIR_DOMAIN_AFFECT_CONFIG; + } + + if (flags & VIR_DOMAIN_AFFECT_LIVE) { + if (!isActive) { + lxcError(VIR_ERR_OPERATION_INVALID, + "%s", _("domain is not running")); + goto cleanup; + } + + if (!lxcCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_BLKIO)) { + lxcError(VIR_ERR_OPERATION_INVALID, _("blkio cgroup isn't mounted")); + goto cleanup; + } + + if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0) { + lxcError(VIR_ERR_INTERNAL_ERROR, + _("cannot find cgroup for domain %s"), vm->def->name); + goto cleanup; + } + } + + if (flags & VIR_DOMAIN_AFFECT_CONFIG) { + if (!vm->persistent) { + lxcError(VIR_ERR_OPERATION_INVALID, "%s", + _("cannot change persistent config of a transient domain")); + goto cleanup; + } + if (!(persistentDef = virDomainObjGetPersistentDef(driver->caps, vm))) + goto cleanup; + } + + if (flags & VIR_DOMAIN_AFFECT_LIVE) { + for (i = 0; i < *nparams && i < LXC_NB_BLKIO_PARAM; i++) { + virTypedParameterPtr param = ¶ms[i]; + val = 0; + param->value.ui = 0; + param->type = VIR_TYPED_PARAM_UINT; + + switch (i) { + case 0: /* fill blkio weight here */ + rc = virCgroupGetBlkioWeight(group, &val); + if (rc != 0) { + virReportSystemError(-rc, "%s", + _("unable to get blkio weight")); + goto cleanup; + } + if (virStrcpyStatic(param->field, VIR_DOMAIN_BLKIO_WEIGHT) == NULL) { + lxcError(VIR_ERR_INTERNAL_ERROR, + _("Field name '%s' too long"), + VIR_DOMAIN_BLKIO_WEIGHT); + goto cleanup; + } + param->value.ui = val; + break; + + default: + break; + /* should not hit here */ + } + } + } else if (flags & VIR_DOMAIN_AFFECT_CONFIG) { + for (i = 0; i < *nparams && i < LXC_NB_BLKIO_PARAM; i++) { + virTypedParameterPtr param = ¶ms[i]; + val = 0; + param->value.ui = 0; + param->type = VIR_TYPED_PARAM_UINT; + + switch (i) { + case 0: /* fill blkio weight here */ + if (virStrcpyStatic(param->field, VIR_DOMAIN_BLKIO_WEIGHT) == NULL) { + lxcError(VIR_ERR_INTERNAL_ERROR, + _("Field name '%s' too long"), + VIR_DOMAIN_BLKIO_WEIGHT); + goto cleanup; + } + param->value.ui = persistentDef->blkio.weight; + break; + + default: + break; + /* should not hit here */ + } + } + } + + if (LXC_NB_BLKIO_PARAM < *nparams) + *nparams = LXC_NB_BLKIO_PARAM; + ret = 0; + +cleanup: + if (group) + virCgroupFree(&group); + if (vm) + virDomainObjUnlock(vm); + lxcDriverUnlock(driver); + return ret; +} + + #ifdef __linux__ static int lxcDomainInterfaceStats(virDomainPtr dom, @@ -3600,6 +3881,8 @@ static virDriver lxcDriver = { .domainSetMemory = lxcDomainSetMemory, /* 0.7.2 */ .domainSetMemoryParameters = lxcDomainSetMemoryParameters, /* 0.8.5 */ .domainGetMemoryParameters = lxcDomainGetMemoryParameters, /* 0.8.5 */ + .domainSetBlkioParameters = lxcDomainSetBlkioParameters, /* 0.9.8 */ + .domainGetBlkioParameters = lxcDomainGetBlkioParameters, /* 0.9.8 */ .domainGetInfo = lxcDomainGetInfo, /* 0.4.2 */ .domainGetState = lxcDomainGetState, /* 0.9.2 */ .domainGetXMLDesc = lxcDomainGetXMLDesc, /* 0.4.2 */
ACK, Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

From: "Daniel P. Berrange" <berrange@redhat.com> To make lxcSetContainerResources smaller, pull the mem tune and device ACL setup code out into separate methods * src/lxc/lxc_controller.c: Introduce lxcSetContainerMemTune and lxcSetContainerDeviceACL --- src/lxc/lxc_controller.c | 138 +++++++++++++++++++++++++++------------------ 1 files changed, 83 insertions(+), 55 deletions(-) diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index c17b172..f5e38a7 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -438,61 +438,10 @@ cleanup: } -/** - * lxcSetContainerResources - * @def: pointer to virtual machine structure - * - * Creates a cgroup for the container, moves the task inside, - * and sets resource limits - * - * Returns 0 on success or -1 in case of error - */ -static int lxcSetContainerResources(virDomainDefPtr def) +static int lxcSetContainerMemTune(virCgroupPtr cgroup, virDomainDefPtr def) { - virCgroupPtr driver; - virCgroupPtr cgroup; - int rc = -1; - int i; - struct cgroup_device_policy devices[] = { - {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_NULL}, - {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_ZERO}, - {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_FULL}, - {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_RANDOM}, - {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_URANDOM}, - {'c', LXC_DEV_MAJ_TTY, LXC_DEV_MIN_TTY}, - {'c', LXC_DEV_MAJ_TTY, LXC_DEV_MIN_PTMX}, - {0, 0, 0}}; - - if (lxcSetContainerCpuAffinity(def) < 0) - return -1; - - if (lxcSetContainerNUMAPolicy(def) < 0) - return -1; - - rc = virCgroupForDriver("lxc", &driver, 1, 0); - if (rc != 0) { - /* Skip all if no driver cgroup is configured */ - if (rc == -ENXIO || rc == -ENOENT) - return 0; - - virReportSystemError(-rc, "%s", - _("Unable to get cgroup for driver")); - return rc; - } - - rc = virCgroupForDomain(driver, def->name, &cgroup, 1); - if (rc != 0) { - virReportSystemError(-rc, - _("Unable to create cgroup for domain %s"), - def->name); - goto cleanup; - } - - if (lxcSetContainerCpuTune(cgroup, def) < 0) - goto cleanup; - - if (lxcSetContainerBlkioTune(cgroup, def) < 0) - goto cleanup; + int ret = -1; + int rc; rc = virCgroupSetMemory(cgroup, def->mem.max_balloon); if (rc != 0) { @@ -532,6 +481,27 @@ static int lxcSetContainerResources(virDomainDefPtr def) } } + ret = 0; +cleanup: + return ret; +} + + +static int lxcSetContainerDeviceACL(virCgroupPtr cgroup, virDomainDefPtr def) +{ + int ret = -1; + int rc; + size_t i; + static const struct cgroup_device_policy devices[] = { + {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_NULL}, + {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_ZERO}, + {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_FULL}, + {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_RANDOM}, + {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_URANDOM}, + {'c', LXC_DEV_MAJ_TTY, LXC_DEV_MIN_TTY}, + {'c', LXC_DEV_MAJ_TTY, LXC_DEV_MIN_PTMX}, + {0, 0, 0}}; + rc = virCgroupDenyAllDevices(cgroup); if (rc != 0) { virReportSystemError(-rc, @@ -541,7 +511,7 @@ static int lxcSetContainerResources(virDomainDefPtr def) } for (i = 0; devices[i].type != 0; i++) { - struct cgroup_device_policy *dev = &devices[i]; + const struct cgroup_device_policy *dev = &devices[i]; rc = virCgroupAllowDevice(cgroup, dev->type, dev->major, @@ -581,6 +551,64 @@ static int lxcSetContainerResources(virDomainDefPtr def) goto cleanup; } + ret = 0; +cleanup: + return ret; +} + + +/** + * lxcSetContainerResources + * @def: pointer to virtual machine structure + * + * Creates a cgroup for the container, moves the task inside, + * and sets resource limits + * + * Returns 0 on success or -1 in case of error + */ +static int lxcSetContainerResources(virDomainDefPtr def) +{ + virCgroupPtr driver; + virCgroupPtr cgroup; + int rc = -1; + + if (lxcSetContainerCpuAffinity(def) < 0) + return -1; + + if (lxcSetContainerNUMAPolicy(def) < 0) + return -1; + + rc = virCgroupForDriver("lxc", &driver, 1, 0); + if (rc != 0) { + /* Skip all if no driver cgroup is configured */ + if (rc == -ENXIO || rc == -ENOENT) + return 0; + + virReportSystemError(-rc, "%s", + _("Unable to get cgroup for driver")); + return rc; + } + + rc = virCgroupForDomain(driver, def->name, &cgroup, 1); + if (rc != 0) { + virReportSystemError(-rc, + _("Unable to create cgroup for domain %s"), + def->name); + goto cleanup; + } + + if (lxcSetContainerCpuTune(cgroup, def) < 0) + goto cleanup; + + if (lxcSetContainerBlkioTune(cgroup, def) < 0) + goto cleanup; + + if (lxcSetContainerMemTune(cgroup, def) < 0) + goto cleanup; + + if (lxcSetContainerDeviceACL(cgroup, def) < 0) + goto cleanup; + rc = virCgroupAddTask(cgroup, getpid()); if (rc != 0) { virReportSystemError(-rc, -- 1.7.6.4

On Thu, Nov 24, 2011 at 11:38:16AM +0000, Daniel P. Berrange wrote:
From: "Daniel P. Berrange" <berrange@redhat.com>
To make lxcSetContainerResources smaller, pull the mem tune and device ACL setup code out into separate methods
* src/lxc/lxc_controller.c: Introduce lxcSetContainerMemTune and lxcSetContainerDeviceACL --- src/lxc/lxc_controller.c | 138 +++++++++++++++++++++++++++------------------ 1 files changed, 83 insertions(+), 55 deletions(-)
diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index c17b172..f5e38a7 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -438,61 +438,10 @@ cleanup: }
-/** - * lxcSetContainerResources - * @def: pointer to virtual machine structure - * - * Creates a cgroup for the container, moves the task inside, - * and sets resource limits - * - * Returns 0 on success or -1 in case of error - */ -static int lxcSetContainerResources(virDomainDefPtr def) +static int lxcSetContainerMemTune(virCgroupPtr cgroup, virDomainDefPtr def) { - virCgroupPtr driver; - virCgroupPtr cgroup; - int rc = -1; - int i; - struct cgroup_device_policy devices[] = { - {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_NULL}, - {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_ZERO}, - {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_FULL}, - {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_RANDOM}, - {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_URANDOM}, - {'c', LXC_DEV_MAJ_TTY, LXC_DEV_MIN_TTY}, - {'c', LXC_DEV_MAJ_TTY, LXC_DEV_MIN_PTMX}, - {0, 0, 0}}; - - if (lxcSetContainerCpuAffinity(def) < 0) - return -1; - - if (lxcSetContainerNUMAPolicy(def) < 0) - return -1; - - rc = virCgroupForDriver("lxc", &driver, 1, 0); - if (rc != 0) { - /* Skip all if no driver cgroup is configured */ - if (rc == -ENXIO || rc == -ENOENT) - return 0; - - virReportSystemError(-rc, "%s", - _("Unable to get cgroup for driver")); - return rc; - } - - rc = virCgroupForDomain(driver, def->name, &cgroup, 1); - if (rc != 0) { - virReportSystemError(-rc, - _("Unable to create cgroup for domain %s"), - def->name); - goto cleanup; - } - - if (lxcSetContainerCpuTune(cgroup, def) < 0) - goto cleanup; - - if (lxcSetContainerBlkioTune(cgroup, def) < 0) - goto cleanup; + int ret = -1; + int rc;
rc = virCgroupSetMemory(cgroup, def->mem.max_balloon); if (rc != 0) { @@ -532,6 +481,27 @@ static int lxcSetContainerResources(virDomainDefPtr def) } }
+ ret = 0; +cleanup: + return ret; +} + + +static int lxcSetContainerDeviceACL(virCgroupPtr cgroup, virDomainDefPtr def) +{ + int ret = -1; + int rc; + size_t i; + static const struct cgroup_device_policy devices[] = { + {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_NULL}, + {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_ZERO}, + {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_FULL}, + {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_RANDOM}, + {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_URANDOM}, + {'c', LXC_DEV_MAJ_TTY, LXC_DEV_MIN_TTY}, + {'c', LXC_DEV_MAJ_TTY, LXC_DEV_MIN_PTMX}, + {0, 0, 0}}; + rc = virCgroupDenyAllDevices(cgroup); if (rc != 0) { virReportSystemError(-rc, @@ -541,7 +511,7 @@ static int lxcSetContainerResources(virDomainDefPtr def) }
for (i = 0; devices[i].type != 0; i++) { - struct cgroup_device_policy *dev = &devices[i]; + const struct cgroup_device_policy *dev = &devices[i]; rc = virCgroupAllowDevice(cgroup, dev->type, dev->major, @@ -581,6 +551,64 @@ static int lxcSetContainerResources(virDomainDefPtr def) goto cleanup; }
+ ret = 0; +cleanup: + return ret; +} + + +/** + * lxcSetContainerResources + * @def: pointer to virtual machine structure + * + * Creates a cgroup for the container, moves the task inside, + * and sets resource limits + * + * Returns 0 on success or -1 in case of error + */ +static int lxcSetContainerResources(virDomainDefPtr def) +{ + virCgroupPtr driver; + virCgroupPtr cgroup; + int rc = -1; + + if (lxcSetContainerCpuAffinity(def) < 0) + return -1; + + if (lxcSetContainerNUMAPolicy(def) < 0) + return -1; + + rc = virCgroupForDriver("lxc", &driver, 1, 0); + if (rc != 0) { + /* Skip all if no driver cgroup is configured */ + if (rc == -ENXIO || rc == -ENOENT) + return 0; + + virReportSystemError(-rc, "%s", + _("Unable to get cgroup for driver")); + return rc; + } + + rc = virCgroupForDomain(driver, def->name, &cgroup, 1); + if (rc != 0) { + virReportSystemError(-rc, + _("Unable to create cgroup for domain %s"), + def->name); + goto cleanup; + } + + if (lxcSetContainerCpuTune(cgroup, def) < 0) + goto cleanup; + + if (lxcSetContainerBlkioTune(cgroup, def) < 0) + goto cleanup; + + if (lxcSetContainerMemTune(cgroup, def) < 0) + goto cleanup; + + if (lxcSetContainerDeviceACL(cgroup, def) < 0) + goto cleanup; + rc = virCgroupAddTask(cgroup, getpid()); if (rc != 0) { virReportSystemError(-rc,
ACK, it doesn't look like the refactoring changes the order of the operations, Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/
participants (2)
-
Daniel P. Berrange
-
Daniel Veillard