[libvirt PATCH v3 00/13] cgroup and thread management in ch driver.

This patchset adds support for cgroup management of ch threads. This version correctly manages cgroups for vcpu and emulator threads created by ch. cgroup management for iothreads is not yet supported. Along with cgroup management, this patchset also enables support for pinning vcpu and emulator threads to selected host cpus. v3: * addrressed all the formatting comments in v2 patch set * dropped indentation patches are they do not adhere to libvirt coding style * fixed build issue in qemu driver that was introduced in v2 Praveen K Paladugu (5): util: Helper functions to get process info ch_driver,ch_domain: vcpu info getter callbacks qemu,hypervisor: refactor some cgroup mgmt methods ch_process: Setup emulator and iothread settings ch_driver: emulator threadinfo & pinning callbacks Vineeth Pillai (8): ch_domain: add virCHDomainGetMonitor helper method ch_domain: add methods to manage private vcpu data ch_driver: domainGetVcpuPinInfo and nodeGetCPUMap ch_monitor: Get nicindexes in prep for cgroup mgmt ch: methods for cgroup mgmt in ch driver ch_driver,ch_domain: vcpupin callback in ch driver ch_driver: enable typed param string for numatune ch_driver: add numatune callbacks for CH driver src/ch/ch_conf.c | 2 + src/ch/ch_conf.h | 6 +- src/ch/ch_domain.c | 172 ++++++- src/ch/ch_domain.h | 32 +- src/ch/ch_driver.c | 789 +++++++++++++++++++++++++++++++++ src/ch/ch_monitor.c | 341 +++++++++++--- src/ch/ch_monitor.h | 60 ++- src/ch/ch_process.c | 385 +++++++++++++++- src/ch/ch_process.h | 3 + src/ch/meson.build | 1 + src/hypervisor/domain_cgroup.c | 426 +++++++++++++++++- src/hypervisor/domain_cgroup.h | 52 +++ src/libvirt_private.syms | 15 + src/qemu/qemu_cgroup.c | 410 +---------------- src/qemu/qemu_cgroup.h | 11 - src/qemu/qemu_driver.c | 130 +----- src/qemu/qemu_hotplug.c | 7 +- src/qemu/qemu_process.c | 20 +- src/util/virprocess.c | 108 +++++ src/util/virprocess.h | 5 + 20 files changed, 2357 insertions(+), 618 deletions(-) -- 2.27.0

Move qemuGetProcessInfo and qemuGetSchedInfo methods to util and share them with ch driver. Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- src/libvirt_private.syms | 2 + src/qemu/qemu_driver.c | 116 ++------------------------------------- src/util/virprocess.c | 108 ++++++++++++++++++++++++++++++++++++ src/util/virprocess.h | 5 ++ 4 files changed, 120 insertions(+), 111 deletions(-) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index da27ee7b53..56adc192cd 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -3101,8 +3101,10 @@ virProcessGetAffinity; virProcessGetMaxMemLock; virProcessGetNamespaces; virProcessGetPids; +virProcessGetSchedInfo; virProcessGetStartTime; virProcessGetStat; +virProcessGetStatInfo; virProcessGroupGet; virProcessGroupKill; virProcessKill; diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index e444ad2d45..ba3efef42b 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -1317,113 +1317,6 @@ qemuGetSchedstatDelay(unsigned long long *cpudelay, return 0; } - -static int -qemuGetSchedInfo(unsigned long long *cpuWait, - pid_t pid, pid_t tid) -{ - g_autofree char *proc = NULL; - g_autofree char *data = NULL; - g_auto(GStrv) lines = NULL; - size_t i; - double val; - - *cpuWait = 0; - - /* In general, we cannot assume pid_t fits in int; but /proc parsing - * is specific to Linux where int works fine. */ - if (tid) - proc = g_strdup_printf("/proc/%d/task/%d/sched", (int)pid, (int)tid); - else - proc = g_strdup_printf("/proc/%d/sched", (int)pid); - if (!proc) - return -1; - - /* The file is not guaranteed to exist (needs CONFIG_SCHED_DEBUG) */ - if (access(proc, R_OK) < 0) { - return 0; - } - - if (virFileReadAll(proc, (1<<16), &data) < 0) - return -1; - - lines = g_strsplit(data, "\n", 0); - if (!lines) - return -1; - - for (i = 0; lines[i] != NULL; i++) { - const char *line = lines[i]; - - /* Needs CONFIG_SCHEDSTATS. The second check - * is the old name the kernel used in past */ - if (STRPREFIX(line, "se.statistics.wait_sum") || - STRPREFIX(line, "se.wait_sum")) { - line = strchr(line, ':'); - if (!line) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Missing separator in sched info '%s'"), - lines[i]); - return -1; - } - line++; - while (*line == ' ') - line++; - - if (virStrToDouble(line, NULL, &val) < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Unable to parse sched info value '%s'"), - line); - return -1; - } - - *cpuWait = (unsigned long long)(val * 1000000); - break; - } - } - - return 0; -} - - -static int -qemuGetProcessInfo(unsigned long long *cpuTime, int *lastCpu, long *vm_rss, - pid_t pid, pid_t tid) -{ - g_auto(GStrv) proc_stat = virProcessGetStat(pid, tid); - unsigned long long usertime = 0, systime = 0; - long rss = 0; - int cpu = 0; - - if (!proc_stat || - virStrToLong_ullp(proc_stat[VIR_PROCESS_STAT_UTIME], NULL, 10, &usertime) < 0 || - virStrToLong_ullp(proc_stat[VIR_PROCESS_STAT_STIME], NULL, 10, &systime) < 0 || - virStrToLong_l(proc_stat[VIR_PROCESS_STAT_RSS], NULL, 10, &rss) < 0 || - virStrToLong_i(proc_stat[VIR_PROCESS_STAT_PROCESSOR], NULL, 10, &cpu) < 0) { - VIR_WARN("cannot parse process status data"); - } - - /* We got jiffies - * We want nanoseconds - * _SC_CLK_TCK is jiffies per second - * So calculate thus.... - */ - if (cpuTime) - *cpuTime = 1000ull * 1000ull * 1000ull * (usertime + systime) - / (unsigned long long)sysconf(_SC_CLK_TCK); - if (lastCpu) - *lastCpu = cpu; - - if (vm_rss) - *vm_rss = rss * virGetSystemPageSizeKB(); - - - VIR_DEBUG("Got status for %d/%d user=%llu sys=%llu cpu=%d rss=%ld", - (int)pid, tid, usertime, systime, cpu, rss); - - return 0; -} - - static int qemuDomainHelperGetVcpus(virDomainObj *vm, virVcpuInfoPtr info, @@ -1463,7 +1356,7 @@ qemuDomainHelperGetVcpus(virDomainObj *vm, vcpuinfo->number = i; vcpuinfo->state = VIR_VCPU_RUNNING; - if (qemuGetProcessInfo(&vcpuinfo->cpuTime, + if (virProcessGetStatInfo(&vcpuinfo->cpuTime, &vcpuinfo->cpu, NULL, vm->pid, vcpupid) < 0) { virReportSystemError(errno, "%s", @@ -1483,7 +1376,7 @@ qemuDomainHelperGetVcpus(virDomainObj *vm, } if (cpuwait) { - if (qemuGetSchedInfo(&(cpuwait[ncpuinfo]), vm->pid, vcpupid) < 0) + if (virProcessGetSchedInfo(&(cpuwait[ncpuinfo]), vm->pid, vcpupid) < 0) return -1; } @@ -2626,7 +2519,8 @@ qemuDomainGetInfo(virDomainPtr dom, } if (virDomainObjIsActive(vm)) { - if (qemuGetProcessInfo(&(info->cpuTime), NULL, NULL, vm->pid, 0) < 0) { + if (virProcessGetStatInfo(&(info->cpuTime), NULL, NULL, + vm->pid, 0) < 0) { virReportError(VIR_ERR_OPERATION_FAILED, "%s", _("cannot read cputime for domain")); goto cleanup; @@ -10623,7 +10517,7 @@ qemuDomainMemoryStatsInternal(virQEMUDriver *driver, ret = 0; } - if (qemuGetProcessInfo(NULL, NULL, &rss, vm->pid, 0) < 0) { + if (virProcessGetStatInfo(NULL, NULL, &rss, vm->pid, 0) < 0) { virReportError(VIR_ERR_OPERATION_FAILED, "%s", _("cannot get RSS for domain")); } else { diff --git a/src/util/virprocess.c b/src/util/virprocess.c index 7b0ad9c97b..3be9080b67 100644 --- a/src/util/virprocess.c +++ b/src/util/virprocess.c @@ -1764,3 +1764,111 @@ virProcessGetStat(pid_t pid, return ret; } + + +int +virProcessGetStatInfo(unsigned long long *cpuTime, int *lastCpu, long *vm_rss, + pid_t pid, pid_t tid) +{ + g_auto(GStrv) proc_stat = virProcessGetStat(pid, tid); + unsigned long long usertime = 0, systime = 0; + long rss = 0; + int cpu = 0; + + if (!proc_stat || + virStrToLong_ullp(proc_stat[VIR_PROCESS_STAT_UTIME], NULL, 10, + &usertime) < 0 + || virStrToLong_ullp(proc_stat[VIR_PROCESS_STAT_STIME], NULL, 10, + &systime) < 0 + || virStrToLong_l(proc_stat[VIR_PROCESS_STAT_RSS], NULL, 10, &rss) < 0 + || virStrToLong_i(proc_stat[VIR_PROCESS_STAT_PROCESSOR], NULL, 10, + &cpu) < 0) { + VIR_WARN("cannot parse process status data"); + } + + /* We got jiffies + * We want nanoseconds + * _SC_CLK_TCK is jiffies per second + * So calculate thus.... + */ + if (cpuTime) + *cpuTime = 1000ull * 1000ull * 1000ull * (usertime + systime) + / (unsigned long long) sysconf(_SC_CLK_TCK); + if (lastCpu) + *lastCpu = cpu; + + if (vm_rss) + *vm_rss = rss * virGetSystemPageSizeKB(); + + + VIR_DEBUG("Got status for %d/%d user=%llu sys=%llu cpu=%d rss=%ld", + (int) pid, tid, usertime, systime, cpu, rss); + + return 0; +} + +int +virProcessGetSchedInfo(unsigned long long *cpuWait, pid_t pid, pid_t tid) +{ + g_autofree char *proc = NULL; + g_autofree char *data = NULL; + + g_auto(GStrv) lines = NULL; + size_t i; + double val; + + *cpuWait = 0; + + /* In general, we cannot assume pid_t fits in int; but /proc parsing + * is specific to Linux where int works fine. */ + if (tid) + proc = g_strdup_printf("/proc/%d/task/%d/sched", (int) pid, (int) tid); + else + proc = g_strdup_printf("/proc/%d/sched", (int) pid); + if (!proc) + return -1; + + /* The file is not guaranteed to exist (needs CONFIG_SCHED_DEBUG) */ + if (access(proc, R_OK) < 0) { + return 0; + } + + if (virFileReadAll(proc, (1 << 16), &data) < 0) + return -1; + + lines = g_strsplit(data, "\n", 0); + if (!lines) + return -1; + + for (i = 0; lines[i] != NULL; i++) { + const char *line = lines[i]; + + /* Needs CONFIG_SCHEDSTATS. The second check + * is the old name the kernel used in past */ + if (STRPREFIX(line, "se.statistics.wait_sum") || + STRPREFIX(line, "se.wait_sum")) { + line = strchr(line, ':'); + if (!line) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Missing separator in sched info '%s'"), + lines[i]); + return -1; + } + line++; + while (*line == ' ') + line++; + + if (virStrToDouble(line, NULL, &val) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Unable to parse sched info value '%s'"), + line); + return -1; + } + + *cpuWait = (unsigned long long) (val * 1000000); + break; + } + } + + return 0; +} diff --git a/src/util/virprocess.h b/src/util/virprocess.h index 82b7403964..d9d27c29b8 100644 --- a/src/util/virprocess.h +++ b/src/util/virprocess.h @@ -193,3 +193,8 @@ typedef enum { } virProcessNamespaceFlags; int virProcessNamespaceAvailable(unsigned int ns); + +int virProcessGetStatInfo(unsigned long long *cpuTime, int *lastCpu, + long *vm_rss, pid_t pid, pid_t tid); +int virProcessGetSchedInfo(unsigned long long *cpuWait, pid_t pid, + pid_t tid); -- 2.27.0

On 12/10/21 21:34, Praveen K Paladugu wrote:
Move qemuGetProcessInfo and qemuGetSchedInfo methods to util and share them with ch driver.
Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- src/libvirt_private.syms | 2 + src/qemu/qemu_driver.c | 116 ++------------------------------------- src/util/virprocess.c | 108 ++++++++++++++++++++++++++++++++++++ src/util/virprocess.h | 5 ++ 4 files changed, 120 insertions(+), 111 deletions(-)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index da27ee7b53..56adc192cd 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -3101,8 +3101,10 @@ virProcessGetAffinity; virProcessGetMaxMemLock; virProcessGetNamespaces; virProcessGetPids; +virProcessGetSchedInfo; virProcessGetStartTime; virProcessGetStat; +virProcessGetStatInfo; virProcessGroupGet; virProcessGroupKill; virProcessKill; diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index e444ad2d45..ba3efef42b 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c
@@ -1463,7 +1356,7 @@ qemuDomainHelperGetVcpus(virDomainObj *vm, vcpuinfo->number = i; vcpuinfo->state = VIR_VCPU_RUNNING;
- if (qemuGetProcessInfo(&vcpuinfo->cpuTime, + if (virProcessGetStatInfo(&vcpuinfo->cpuTime, &vcpuinfo->cpu, NULL, vm->pid, vcpupid) < 0) {
Indentation.
virReportSystemError(errno, "%s", @@ -1483,7 +1376,7 @@ qemuDomainHelperGetVcpus(virDomainObj *vm, }
if (cpuwait) { - if (qemuGetSchedInfo(&(cpuwait[ncpuinfo]), vm->pid, vcpupid) < 0) + if (virProcessGetSchedInfo(&(cpuwait[ncpuinfo]), vm->pid, vcpupid) < 0) return -1; }
@@ -2626,7 +2519,8 @@ qemuDomainGetInfo(virDomainPtr dom, }
if (virDomainObjIsActive(vm)) { - if (qemuGetProcessInfo(&(info->cpuTime), NULL, NULL, vm->pid, 0) < 0) { + if (virProcessGetStatInfo(&(info->cpuTime), NULL, NULL, + vm->pid, 0) < 0) {
And here too.
virReportError(VIR_ERR_OPERATION_FAILED, "%s", _("cannot read cputime for domain")); goto cleanup; @@ -10623,7 +10517,7 @@ qemuDomainMemoryStatsInternal(virQEMUDriver *driver, ret = 0; }
- if (qemuGetProcessInfo(NULL, NULL, &rss, vm->pid, 0) < 0) { + if (virProcessGetStatInfo(NULL, NULL, &rss, vm->pid, 0) < 0) { virReportError(VIR_ERR_OPERATION_FAILED, "%s", _("cannot get RSS for domain")); } else { diff --git a/src/util/virprocess.c b/src/util/virprocess.c index 7b0ad9c97b..3be9080b67 100644 --- a/src/util/virprocess.c +++ b/src/util/virprocess.c @@ -1764,3 +1764,111 @@ virProcessGetStat(pid_t pid,
return ret; } + + +int +virProcessGetStatInfo(unsigned long long *cpuTime, int *lastCpu, long *vm_rss, + pid_t pid, pid_t tid)
Since you are touching this, it would be nice if these arguments are on separate lines. This applies here and to the rest of the patches. The idea behind is that when a new argument is introduced a smaller diff can be produced, which in turn opens a possibility of less conflicts during backports. I'm not going to raise this in the rest of my review. Reviewed-by: Michal Privoznik <mprivozn@redhat.com> Michal

From: Vineeth Pillai <viremana@linux.microsoft.com> Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- src/ch/ch_domain.c | 6 ++++++ src/ch/ch_domain.h | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/src/ch/ch_domain.c b/src/ch/ch_domain.c index dd4de9e1ea..bf4ce83595 100644 --- a/src/ch/ch_domain.c +++ b/src/ch/ch_domain.c @@ -290,3 +290,9 @@ virDomainDefParserConfig virCHDriverDomainDefParserConfig = { .domainPostParseCallback = virCHDomainDefPostParse, .deviceValidateCallback = chValidateDomainDeviceDef, }; + +virCHMonitor * +virCHDomainGetMonitor(virDomainObj *vm) +{ + return CH_DOMAIN_PRIVATE(vm)->monitor; +} diff --git a/src/ch/ch_domain.h b/src/ch/ch_domain.h index 61b34b0467..c053b25c65 100644 --- a/src/ch/ch_domain.h +++ b/src/ch/ch_domain.h @@ -57,6 +57,11 @@ struct _virCHDomainObjPrivate { virChrdevs *chrdevs; }; +#define CH_DOMAIN_PRIVATE(vm) \ + ((virCHDomainObjPrivate*)(vm)->privateData) + +virCHMonitor *virCHDomainGetMonitor(virDomainObj *vm); + extern virDomainXMLPrivateDataCallbacks virCHDriverPrivateDataCallbacks; extern virDomainDefParserConfig virCHDriverDomainDefParserConfig; -- 2.27.0

On 12/10/21 21:34, Praveen K Paladugu wrote:
From: Vineeth Pillai <viremana@linux.microsoft.com>
Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- src/ch/ch_domain.c | 6 ++++++ src/ch/ch_domain.h | 5 +++++ 2 files changed, 11 insertions(+)
Reviewed-by: Michal Privoznik <mprivozn@redhat.com> Michal

From: Vineeth Pillai <viremana@linux.microsoft.com> Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- src/ch/ch_domain.c | 50 +++++++++++++++++++++++++++++++++++++++++----- src/ch/ch_domain.h | 11 ++++++++++ 2 files changed, 56 insertions(+), 5 deletions(-) diff --git a/src/ch/ch_domain.c b/src/ch/ch_domain.c index bf4ce83595..36333a8a3f 100644 --- a/src/ch/ch_domain.c +++ b/src/ch/ch_domain.c @@ -164,11 +164,6 @@ virCHDomainObjPrivateFree(void *data) g_free(priv); } -virDomainXMLPrivateDataCallbacks virCHDriverPrivateDataCallbacks = { - .alloc = virCHDomainObjPrivateAlloc, - .free = virCHDomainObjPrivateFree, -}; - static int virCHDomainDefPostParseBasic(virDomainDef *def, void *opaque G_GNUC_UNUSED) @@ -185,6 +180,45 @@ virCHDomainDefPostParseBasic(virDomainDef *def, return 0; } +static virClass *virCHDomainVcpuPrivateClass; +static void virCHDomainVcpuPrivateDispose(void *obj); + +static int +virCHDomainVcpuPrivateOnceInit(void) +{ + if (!VIR_CLASS_NEW(virCHDomainVcpuPrivate, virClassForObject())) + return -1; + + return 0; +} + +VIR_ONCE_GLOBAL_INIT(virCHDomainVcpuPrivate); + +static virObject * +virCHDomainVcpuPrivateNew(void) +{ + virCHDomainVcpuPrivate *priv; + + if (virCHDomainVcpuPrivateInitialize() < 0) + return NULL; + + if (!(priv = virObjectNew(virCHDomainVcpuPrivateClass))) + return NULL; + + return (virObject *) priv; +} + + +static void +virCHDomainVcpuPrivateDispose(void *obj) +{ + virCHDomainVcpuPrivate *priv = obj; + + priv->tid = 0; + + return; +} + static int virCHDomainDefPostParse(virDomainDef *def, unsigned int parseFlags G_GNUC_UNUSED, @@ -203,6 +237,12 @@ virCHDomainDefPostParse(virDomainDef *def, return 0; } +virDomainXMLPrivateDataCallbacks virCHDriverPrivateDataCallbacks = { + .alloc = virCHDomainObjPrivateAlloc, + .free = virCHDomainObjPrivateFree, + .vcpuNew = virCHDomainVcpuPrivateNew, +}; + static int chValidateDomainDeviceDef(const virDomainDeviceDef *dev, const virDomainDef *def G_GNUC_UNUSED, diff --git a/src/ch/ch_domain.h b/src/ch/ch_domain.h index c053b25c65..f01c0e5ad0 100644 --- a/src/ch/ch_domain.h +++ b/src/ch/ch_domain.h @@ -62,6 +62,17 @@ struct _virCHDomainObjPrivate { virCHMonitor *virCHDomainGetMonitor(virDomainObj *vm); +typedef struct _virCHDomainVcpuPrivate virCHDomainVcpuPrivate; +struct _virCHDomainVcpuPrivate { + virObject parent; + + pid_t tid; /* vcpu thread id */ + virTristateBool halted; +}; + +#define CH_DOMAIN_VCPU_PRIVATE(vcpu) \ + ((virCHDomainVcpuPrivate *) (vcpu)->privateData) + extern virDomainXMLPrivateDataCallbacks virCHDriverPrivateDataCallbacks; extern virDomainDefParserConfig virCHDriverDomainDefParserConfig; -- 2.27.0

On 12/10/21 21:34, Praveen K Paladugu wrote:
From: Vineeth Pillai <viremana@linux.microsoft.com>
Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- src/ch/ch_domain.c | 50 +++++++++++++++++++++++++++++++++++++++++----- src/ch/ch_domain.h | 11 ++++++++++ 2 files changed, 56 insertions(+), 5 deletions(-)
diff --git a/src/ch/ch_domain.c b/src/ch/ch_domain.c index bf4ce83595..36333a8a3f 100644 --- a/src/ch/ch_domain.c +++ b/src/ch/ch_domain.c @@ -164,11 +164,6 @@ virCHDomainObjPrivateFree(void *data) g_free(priv); }
-virDomainXMLPrivateDataCallbacks virCHDriverPrivateDataCallbacks = { - .alloc = virCHDomainObjPrivateAlloc, - .free = virCHDomainObjPrivateFree, -}; - static int virCHDomainDefPostParseBasic(virDomainDef *def, void *opaque G_GNUC_UNUSED) @@ -185,6 +180,45 @@ virCHDomainDefPostParseBasic(virDomainDef *def, return 0; }
+static virClass *virCHDomainVcpuPrivateClass; +static void virCHDomainVcpuPrivateDispose(void *obj);
This forward declaration could be replaced with the actual implementation.
+ +static int +virCHDomainVcpuPrivateOnceInit(void) +{ + if (!VIR_CLASS_NEW(virCHDomainVcpuPrivate, virClassForObject())) + return -1; + + return 0; +} + +VIR_ONCE_GLOBAL_INIT(virCHDomainVcpuPrivate); + +static virObject * +virCHDomainVcpuPrivateNew(void) +{ + virCHDomainVcpuPrivate *priv; + + if (virCHDomainVcpuPrivateInitialize() < 0) + return NULL; + + if (!(priv = virObjectNew(virCHDomainVcpuPrivateClass))) + return NULL; + + return (virObject *) priv; +} + + +static void +virCHDomainVcpuPrivateDispose(void *obj) +{ + virCHDomainVcpuPrivate *priv = obj; + + priv->tid = 0; + + return;
This is basically a NOP. Nothing here deallocates any memory and clearing our memory to zero is needless.
+}
Reviewed-by: Michal Privoznik <mprivozn@redhat.com> Michal

Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- src/ch/ch_domain.c | 25 +++++++++ src/ch/ch_domain.h | 4 ++ src/ch/ch_driver.c | 134 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 163 insertions(+) diff --git a/src/ch/ch_domain.c b/src/ch/ch_domain.c index 36333a8a3f..3f34e87e04 100644 --- a/src/ch/ch_domain.c +++ b/src/ch/ch_domain.c @@ -336,3 +336,28 @@ virCHDomainGetMonitor(virDomainObj *vm) { return CH_DOMAIN_PRIVATE(vm)->monitor; } + +pid_t +virCHDomainGetVcpuPid(virDomainObj *vm, unsigned int vcpuid) +{ + virDomainVcpuDef *vcpu = virDomainDefGetVcpu(vm->def, vcpuid); + + return CH_DOMAIN_VCPU_PRIVATE(vcpu)->tid; +} + +bool +virCHDomainHasVcpuPids(virDomainObj *vm) +{ + size_t i; + size_t maxvcpus = virDomainDefGetVcpusMax(vm->def); + virDomainVcpuDef *vcpu; + + for (i = 0; i < maxvcpus; i++) { + vcpu = virDomainDefGetVcpu(vm->def, i); + + if (CH_DOMAIN_VCPU_PRIVATE(vcpu)->tid > 0) + return true; + } + + return false; +} diff --git a/src/ch/ch_domain.h b/src/ch/ch_domain.h index f01c0e5ad0..1ec7643216 100644 --- a/src/ch/ch_domain.h +++ b/src/ch/ch_domain.h @@ -82,3 +82,7 @@ virCHDomainObjBeginJob(virDomainObj *obj, enum virCHDomainJob job) void virCHDomainObjEndJob(virDomainObj *obj); + +int virCHDomainRefreshVcpuInfo(virDomainObj *vm); +pid_t virCHDomainGetVcpuPid(virDomainObj *vm, unsigned int vcpuid); +bool virCHDomainHasVcpuPids(virDomainObj *vm); diff --git a/src/ch/ch_driver.c b/src/ch/ch_driver.c index 108644e503..31e9de7f6a 100644 --- a/src/ch/ch_driver.c +++ b/src/ch/ch_driver.c @@ -942,6 +942,137 @@ static int chStateInitialize(bool privileged, return ret; } +static int +chDomainGetVcpusFlags(virDomainPtr dom, unsigned int flags) +{ + virDomainObj *vm; + virDomainDef *def; + int ret = -1; + + virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | + VIR_DOMAIN_AFFECT_CONFIG | + VIR_DOMAIN_VCPU_MAXIMUM | VIR_DOMAIN_VCPU_GUEST, -1); + + if (!(vm = chDomObjFromDomain(dom))) + return -1; + + if (virDomainGetVcpusFlagsEnsureACL(dom->conn, vm->def, flags) < 0) + goto cleanup; + + if (!(def = virDomainObjGetOneDef(vm, flags))) + goto cleanup; + + if (flags & VIR_DOMAIN_VCPU_MAXIMUM) + ret = virDomainDefGetVcpusMax(def); + else + ret = virDomainDefGetVcpus(def); + + + cleanup: + virDomainObjEndAPI(&vm); + return ret; +} + +static int +chDomainGetMaxVcpus(virDomainPtr dom) +{ + return chDomainGetVcpusFlags(dom, + (VIR_DOMAIN_AFFECT_LIVE | + VIR_DOMAIN_VCPU_MAXIMUM)); +} + +static int +chDomainHelperGetVcpus(virDomainObj *vm, + virVcpuInfoPtr info, + unsigned long long *cpuwait, + int maxinfo, unsigned char *cpumaps, int maplen) +{ + size_t ncpuinfo = 0; + size_t i; + + if (maxinfo == 0) + return 0; + + if (!virCHDomainHasVcpuPids(vm)) { + virReportError(VIR_ERR_OPERATION_INVALID, + "%s", _("cpu affinity is not supported")); + return -1; + } + + if (info) + memset(info, 0, sizeof(*info) * maxinfo); + + if (cpumaps) + memset(cpumaps, 0, sizeof(*cpumaps) * maxinfo); + + for (i = 0; i < virDomainDefGetVcpusMax(vm->def) && ncpuinfo < maxinfo; i++) { + virDomainVcpuDef *vcpu = virDomainDefGetVcpu(vm->def, i); + pid_t vcpupid = virCHDomainGetVcpuPid(vm, i); + virVcpuInfoPtr vcpuinfo = info + ncpuinfo; + + if (!vcpu->online) + continue; + + if (info) { + vcpuinfo->number = i; + vcpuinfo->state = VIR_VCPU_RUNNING; + if (virProcessGetStatInfo(&vcpuinfo->cpuTime, + &vcpuinfo->cpu, NULL, + vm->pid, vcpupid) < 0) { + virReportSystemError(errno, "%s", + _("cannot get vCPU placement & pCPU time")); + return -1; + } + } + + if (cpumaps) { + unsigned char *cpumap = VIR_GET_CPUMAP(cpumaps, maplen, ncpuinfo); + g_autoptr(virBitmap) map = NULL; + + if (!(map = virProcessGetAffinity(vcpupid))) + return -1; + + virBitmapToDataBuf(map, cpumap, maplen); + } + + if (cpuwait) { + if (virProcessGetSchedInfo(&(cpuwait[ncpuinfo]), vm->pid, vcpupid) < 0) + return -1; + } + + ncpuinfo++; + } + + return ncpuinfo; +} + +static int +chDomainGetVcpus(virDomainPtr dom, + virVcpuInfoPtr info, + int maxinfo, unsigned char *cpumaps, int maplen) +{ + virDomainObj *vm; + int ret = -1; + + if (!(vm = chDomObjFromDomain(dom))) + goto cleanup; + + if (virDomainGetVcpusEnsureACL(dom->conn, vm->def) < 0) + goto cleanup; + + if (virDomainObjCheckActive(vm) < 0) { + virReportError(VIR_ERR_OPERATION_INVALID, "%s", + _("cannot retrieve vcpu information for inactive domain")); + goto cleanup; + } + + ret = chDomainHelperGetVcpus(vm, info, NULL, maxinfo, cpumaps, maplen); + + cleanup: + virDomainObjEndAPI(&vm); + return ret; +} + /* Function Tables */ static virHypervisorDriver chHypervisorDriver = { .name = "CH", @@ -978,6 +1109,9 @@ static virHypervisorDriver chHypervisorDriver = { .domainIsActive = chDomainIsActive, /* 7.5.0 */ .domainOpenConsole = chDomainOpenConsole, /* 7.8.0 */ .nodeGetInfo = chNodeGetInfo, /* 7.5.0 */ + .domainGetVcpus = chDomainGetVcpus, /* 8.0.0 */ + .domainGetVcpusFlags = chDomainGetVcpusFlags, /* 8.0.0 */ + .domainGetMaxVcpus = chDomainGetMaxVcpus, /* 8.0.0 */ }; static virConnectDriver chConnectDriver = { -- 2.27.0

On 12/10/21 21:34, Praveen K Paladugu wrote:
Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- src/ch/ch_domain.c | 25 +++++++++ src/ch/ch_domain.h | 4 ++ src/ch/ch_driver.c | 134 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 163 insertions(+)
Reviewed-by: Michal Privoznik <mprivozn@redhat.com> Michal

From: Vineeth Pillai <viremana@linux.microsoft.com> Add domainGetVcpuPinInfo and nodeGetCPUMap callbacks to ch driver Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- src/ch/ch_domain.h | 10 +++++++-- src/ch/ch_driver.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 2 deletions(-) diff --git a/src/ch/ch_domain.h b/src/ch/ch_domain.h index 1ec7643216..c36405808b 100644 --- a/src/ch/ch_domain.h +++ b/src/ch/ch_domain.h @@ -23,6 +23,7 @@ #include "ch_conf.h" #include "ch_monitor.h" #include "virchrdev.h" +#include "vircgroup.h" /* Give up waiting for mutex after 30 seconds */ #define CH_JOB_WAIT_TIME (1000ull * 30) @@ -52,9 +53,14 @@ typedef struct _virCHDomainObjPrivate virCHDomainObjPrivate; struct _virCHDomainObjPrivate { struct virCHDomainJobObj job; + virChrdevs *chrdevs; + virCgroup *cgroup; + virCHDriver *driver; virCHMonitor *monitor; - - virChrdevs *chrdevs; + char *machineName; + virBitmap *autoNodeset; + virBitmap *autoCpuset; + virChrdevs *devs; }; #define CH_DOMAIN_PRIVATE(vm) \ diff --git a/src/ch/ch_driver.c b/src/ch/ch_driver.c index 31e9de7f6a..ad7383e96e 100644 --- a/src/ch/ch_driver.c +++ b/src/ch/ch_driver.c @@ -981,6 +981,55 @@ chDomainGetMaxVcpus(virDomainPtr dom) VIR_DOMAIN_VCPU_MAXIMUM)); } +static int +chDomainGetVcpuPinInfo(virDomain *dom, + int ncpumaps, + unsigned char *cpumaps, int maplen, unsigned int flags) +{ + virDomainObj *vm = NULL; + virDomainDef *def; + bool live; + int ret = -1; + + g_autoptr(virBitmap) hostcpus = NULL; + virBitmap *autoCpuset = NULL; + + virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | VIR_DOMAIN_AFFECT_CONFIG, -1); + + if (!(vm = chDomObjFromDomain(dom))) + goto cleanup; + + if (virDomainGetVcpuPinInfoEnsureACL(dom->conn, vm->def) < 0) + goto cleanup; + + if (!(def = virDomainObjGetOneDefState(vm, flags, &live))) + goto cleanup; + + if (!(hostcpus = virHostCPUGetAvailableCPUsBitmap())) + goto cleanup; + + if (live) + autoCpuset = CH_DOMAIN_PRIVATE(vm)->autoCpuset; + + ret = virDomainDefGetVcpuPinInfoHelper(def, maplen, ncpumaps, cpumaps, + hostcpus, autoCpuset); + cleanup: + virDomainObjEndAPI(&vm); + return ret; +} + +static int +chNodeGetCPUMap(virConnectPtr conn, + unsigned char **cpumap, + unsigned int *online, unsigned int flags) +{ + if (virNodeGetCPUMapEnsureACL(conn) < 0) + return -1; + + return virHostCPUGetMap(cpumap, online, flags); +} + + static int chDomainHelperGetVcpus(virDomainObj *vm, virVcpuInfoPtr info, @@ -1112,6 +1161,8 @@ static virHypervisorDriver chHypervisorDriver = { .domainGetVcpus = chDomainGetVcpus, /* 8.0.0 */ .domainGetVcpusFlags = chDomainGetVcpusFlags, /* 8.0.0 */ .domainGetMaxVcpus = chDomainGetMaxVcpus, /* 8.0.0 */ + .domainGetVcpuPinInfo = chDomainGetVcpuPinInfo, /* 8.0.0 */ + .nodeGetCPUMap = chNodeGetCPUMap, /* 8.0.0 */ }; static virConnectDriver chConnectDriver = { -- 2.27.0

On 12/10/21 21:34, Praveen K Paladugu wrote:
From: Vineeth Pillai <viremana@linux.microsoft.com>
Add domainGetVcpuPinInfo and nodeGetCPUMap callbacks to ch driver
Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- src/ch/ch_domain.h | 10 +++++++-- src/ch/ch_driver.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 2 deletions(-)
diff --git a/src/ch/ch_domain.h b/src/ch/ch_domain.h index 1ec7643216..c36405808b 100644 --- a/src/ch/ch_domain.h +++ b/src/ch/ch_domain.h @@ -23,6 +23,7 @@ #include "ch_conf.h" #include "ch_monitor.h" #include "virchrdev.h" +#include "vircgroup.h"
/* Give up waiting for mutex after 30 seconds */ #define CH_JOB_WAIT_TIME (1000ull * 30) @@ -52,9 +53,14 @@ typedef struct _virCHDomainObjPrivate virCHDomainObjPrivate; struct _virCHDomainObjPrivate { struct virCHDomainJobObj job;
+ virChrdevs *chrdevs; + virCgroup *cgroup; + virCHDriver *driver; virCHMonitor *monitor; - - virChrdevs *chrdevs; + char *machineName; + virBitmap *autoNodeset; + virBitmap *autoCpuset; + virChrdevs *devs;
Some of these members are not used in this patch. It would be better if they were introduced in the same patch that needs them. Reviewed-by: Michal Privoznik <mprivozn@redhat.com> Michal

From: Vineeth Pillai <viremana@linux.microsoft.com> Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- src/ch/ch_conf.h | 2 + src/ch/ch_domain.c | 27 ++++++- src/ch/ch_domain.h | 2 + src/ch/ch_driver.c | 1 + src/ch/ch_monitor.c | 185 ++++++++++++++++++++++++++++++-------------- src/ch/ch_monitor.h | 14 +++- src/ch/ch_process.c | 6 +- src/ch/meson.build | 1 + 8 files changed, 179 insertions(+), 59 deletions(-) diff --git a/src/ch/ch_conf.h b/src/ch/ch_conf.h index 37c36d9a09..8fe69c8545 100644 --- a/src/ch/ch_conf.h +++ b/src/ch/ch_conf.h @@ -44,6 +44,8 @@ struct _virCHDriver { virMutex lock; + bool privileged; + /* Require lock to get a reference on the object, * lockless access thereafter */ virCaps *caps; diff --git a/src/ch/ch_domain.c b/src/ch/ch_domain.c index 3f34e87e04..3a32ac63d9 100644 --- a/src/ch/ch_domain.c +++ b/src/ch/ch_domain.c @@ -21,10 +21,12 @@ #include <config.h> #include "ch_domain.h" +#include "domain_driver.h" #include "viralloc.h" #include "virchrdev.h" #include "virlog.h" #include "virtime.h" +#include "virsystemd.h" #define VIR_FROM_THIS VIR_FROM_CH @@ -134,7 +136,7 @@ virCHDomainObjEndJob(virDomainObj *obj) } static void * -virCHDomainObjPrivateAlloc(void *opaque G_GNUC_UNUSED) +virCHDomainObjPrivateAlloc(void *opaque) { virCHDomainObjPrivate *priv; @@ -150,6 +152,7 @@ virCHDomainObjPrivateAlloc(void *opaque G_GNUC_UNUSED) g_free(priv); return NULL; } + priv->driver = opaque; return priv; } @@ -361,3 +364,25 @@ virCHDomainHasVcpuPids(virDomainObj *vm) return false; } + +char * +virCHDomainGetMachineName(virDomainObj *vm) +{ + virCHDomainObjPrivate *priv = CH_DOMAIN_PRIVATE(vm); + virCHDriver *driver = priv->driver; + char *ret = NULL; + + if (vm->pid > 0) { + ret = virSystemdGetMachineNameByPID(vm->pid); + if (!ret) + virResetLastError(); + } + + if (!ret) + ret = virDomainDriverGenerateMachineName("ch", + NULL, + vm->def->id, vm->def->name, + driver->privileged); + + return ret; +} diff --git a/src/ch/ch_domain.h b/src/ch/ch_domain.h index c36405808b..4fc6251380 100644 --- a/src/ch/ch_domain.h +++ b/src/ch/ch_domain.h @@ -92,3 +92,5 @@ virCHDomainObjEndJob(virDomainObj *obj); int virCHDomainRefreshVcpuInfo(virDomainObj *vm); pid_t virCHDomainGetVcpuPid(virDomainObj *vm, unsigned int vcpuid); bool virCHDomainHasVcpuPids(virDomainObj *vm); + +char *virCHDomainGetMachineName(virDomainObj *vm); diff --git a/src/ch/ch_driver.c b/src/ch/ch_driver.c index ad7383e96e..97fbd76959 100644 --- a/src/ch/ch_driver.c +++ b/src/ch/ch_driver.c @@ -934,6 +934,7 @@ static int chStateInitialize(bool privileged, goto cleanup; } + ch_driver->privileged = privileged; ret = VIR_DRV_STATE_INIT_COMPLETE; cleanup: diff --git a/src/ch/ch_monitor.c b/src/ch/ch_monitor.c index 12c10da874..d48b057814 100644 --- a/src/ch/ch_monitor.c +++ b/src/ch/ch_monitor.c @@ -226,7 +226,8 @@ virCHMonitorBuildDisksJson(virJSONValue *content, virDomainDef *vmdef) } static int -virCHMonitorBuildNetJson(virJSONValue *nets, virDomainNetDef *netdef) +virCHMonitorBuildNetJson(virJSONValue *nets, virDomainNetDef *netdef, + size_t *nnicindexes, int **nicindexes) { virDomainNetType netType = virDomainNetGetActualType(netdef); char macaddr[VIR_MAC_STRING_BUFLEN]; @@ -236,59 +237,72 @@ virCHMonitorBuildNetJson(virJSONValue *nets, virDomainNetDef *netdef) net = virJSONValueNewObject(); switch (netType) { - case VIR_DOMAIN_NET_TYPE_ETHERNET: - if (netdef->guestIP.nips == 1) { - const virNetDevIPAddr *ip = netdef->guestIP.ips[0]; - g_autofree char *addr = NULL; - virSocketAddr netmask; - g_autofree char *netmaskStr = NULL; - if (!(addr = virSocketAddrFormat(&ip->address))) - return -1; - if (virJSONValueObjectAppendString(net, "ip", addr) < 0) - return -1; + case VIR_DOMAIN_NET_TYPE_ETHERNET: + if (netdef->guestIP.nips == 1) { + const virNetDevIPAddr *ip = netdef->guestIP.ips[0]; + g_autofree char *addr = NULL; + virSocketAddr netmask; + g_autofree char *netmaskStr = NULL; + + if (!(addr = virSocketAddrFormat(&ip->address))) + return -1; + if (virJSONValueObjectAppendString(net, "ip", addr) < 0) + return -1; + + if (virSocketAddrPrefixToNetmask(ip->prefix, &netmask, AF_INET) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Failed to translate net prefix %d to netmask"), + ip->prefix); + return -1; + } + if (!(netmaskStr = virSocketAddrFormat(&netmask))) + return -1; + if (virJSONValueObjectAppendString(net, "mask", netmaskStr) < 0) + return -1; + } else if (netdef->guestIP.nips > 1) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("ethernet type supports a single guest ip")); + } - if (virSocketAddrPrefixToNetmask(ip->prefix, &netmask, AF_INET) < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Failed to translate net prefix %d to netmask"), - ip->prefix); - return -1; + /* network and bridge use a tap device, and direct uses a + * macvtap device + */ + if (nicindexes && nnicindexes && netdef->ifname) { + int nicindex = 0; + + if (virNetDevGetIndex(netdef->ifname, &nicindex) < 0) + return -1; + + VIR_APPEND_ELEMENT(*nicindexes, *nnicindexes, nicindex); } - if (!(netmaskStr = virSocketAddrFormat(&netmask))) - return -1; - if (virJSONValueObjectAppendString(net, "mask", netmaskStr) < 0) - return -1; - } else if (netdef->guestIP.nips > 1) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("ethernet type supports a single guest ip")); - } - break; - case VIR_DOMAIN_NET_TYPE_VHOSTUSER: - if ((virDomainChrType)netdef->data.vhostuser->type != VIR_DOMAIN_CHR_TYPE_UNIX) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + break; + case VIR_DOMAIN_NET_TYPE_VHOSTUSER: + if ((virDomainChrType)netdef->data.vhostuser->type != VIR_DOMAIN_CHR_TYPE_UNIX) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", _("vhost_user type support UNIX socket in this CH")); - return -1; - } else { - if (virJSONValueObjectAppendString(net, "vhost_socket", netdef->data.vhostuser->data.nix.path) < 0) - return -1; - if (virJSONValueObjectAppendBoolean(net, "vhost_user", true) < 0) return -1; - } - break; - case VIR_DOMAIN_NET_TYPE_BRIDGE: - case VIR_DOMAIN_NET_TYPE_NETWORK: - case VIR_DOMAIN_NET_TYPE_DIRECT: - case VIR_DOMAIN_NET_TYPE_USER: - case VIR_DOMAIN_NET_TYPE_SERVER: - case VIR_DOMAIN_NET_TYPE_CLIENT: - case VIR_DOMAIN_NET_TYPE_MCAST: - case VIR_DOMAIN_NET_TYPE_INTERNAL: - case VIR_DOMAIN_NET_TYPE_HOSTDEV: - case VIR_DOMAIN_NET_TYPE_UDP: - case VIR_DOMAIN_NET_TYPE_VDPA: - case VIR_DOMAIN_NET_TYPE_LAST: - default: - virReportEnumRangeError(virDomainNetType, netType); - return -1; + } else { + if (virJSONValueObjectAppendString(net, "vhost_socket", netdef->data.vhostuser->data.nix.path) < 0) + return -1; + if (virJSONValueObjectAppendBoolean(net, "vhost_user", true) < 0) + return -1; + } + break; + case VIR_DOMAIN_NET_TYPE_BRIDGE: + case VIR_DOMAIN_NET_TYPE_NETWORK: + case VIR_DOMAIN_NET_TYPE_DIRECT: + case VIR_DOMAIN_NET_TYPE_USER: + case VIR_DOMAIN_NET_TYPE_SERVER: + case VIR_DOMAIN_NET_TYPE_CLIENT: + case VIR_DOMAIN_NET_TYPE_MCAST: + case VIR_DOMAIN_NET_TYPE_INTERNAL: + case VIR_DOMAIN_NET_TYPE_HOSTDEV: + case VIR_DOMAIN_NET_TYPE_UDP: + case VIR_DOMAIN_NET_TYPE_VDPA: + case VIR_DOMAIN_NET_TYPE_LAST: + default: + virReportEnumRangeError(virDomainNetType, netType); + return -1; } if (netdef->ifname != NULL) { @@ -329,7 +343,8 @@ virCHMonitorBuildNetJson(virJSONValue *nets, virDomainNetDef *netdef) } static int -virCHMonitorBuildNetsJson(virJSONValue *content, virDomainDef *vmdef) +virCHMonitorBuildNetsJson(virJSONValue *content, virDomainDef *vmdef, + size_t *nnicindexes, int **nicindexes) { g_autoptr(virJSONValue) nets = NULL; size_t i; @@ -338,7 +353,8 @@ virCHMonitorBuildNetsJson(virJSONValue *content, virDomainDef *vmdef) nets = virJSONValueNewArray(); for (i = 0; i < vmdef->nnets; i++) { - if (virCHMonitorBuildNetJson(nets, vmdef->nets[i]) < 0) + if (virCHMonitorBuildNetJson(nets, vmdef->nets[i], + nnicindexes, nicindexes) < 0) return -1; } if (virJSONValueObjectAppend(content, "net", &nets) < 0) @@ -349,7 +365,59 @@ virCHMonitorBuildNetsJson(virJSONValue *content, virDomainDef *vmdef) } static int -virCHMonitorBuildVMJson(virDomainDef *vmdef, char **jsonstr) +virCHMonitorBuildDeviceJson(virJSONValue *devices, + virDomainHostdevDef *hostdevdef) +{ + g_autoptr(virJSONValue) device = NULL; + + + if (hostdevdef->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS && + hostdevdef->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) { + g_autofree char *name = NULL; + g_autofree char *path = NULL; + virDomainHostdevSubsysPCI *pcisrc = &hostdevdef->source.subsys.u.pci; + + device = virJSONValueNewObject(); + name = virPCIDeviceAddressAsString(&pcisrc->addr); + path = g_strdup_printf("/sys/bus/pci/devices/%s/", name); + if (!virFileExists(path)) { + virReportError(VIR_ERR_DEVICE_MISSING, + _("host pci device %s not found"), path); + return -1; + } + if (virJSONValueObjectAppendString(device, "path", path) < 0) + return -1; + if (virJSONValueArrayAppend(devices, &device) < 0) + return -1; + } + + return 0; +} + +static int +virCHMonitorBuildDevicesJson(virJSONValue *content, virDomainDef *vmdef) +{ + size_t i; + + g_autoptr(virJSONValue) devices = NULL; + + if (vmdef->nhostdevs == 0) + return 0; + + devices = virJSONValueNewArray(); + for (i = 0; i < vmdef->nhostdevs; i++) { + if (virCHMonitorBuildDeviceJson(devices, vmdef->hostdevs[i]) < 0) + return -1; + } + if (virJSONValueObjectAppend(content, "devices", &devices) < 0) + return -1; + + return 0; +} + +static int +virCHMonitorBuildVMJson(virDomainDef *vmdef, char **jsonstr, + size_t *nnicindexes, int **nicindexes) { g_autoptr(virJSONValue) content = virJSONValueNewObject(); @@ -374,7 +442,11 @@ virCHMonitorBuildVMJson(virDomainDef *vmdef, char **jsonstr) if (virCHMonitorBuildDisksJson(content, vmdef) < 0) return -1; - if (virCHMonitorBuildNetsJson(content, vmdef) < 0) + + if (virCHMonitorBuildNetsJson(content, vmdef, nnicindexes, nicindexes) < 0) + return -1; + + if (virCHMonitorBuildDevicesJson(content, vmdef) < 0) return -1; if (!(*jsonstr = virJSONValueToString(content, false))) @@ -671,7 +743,7 @@ virCHMonitorShutdownVMM(virCHMonitor *mon) } int -virCHMonitorCreateVM(virCHMonitor *mon) +virCHMonitorCreateVM(virCHMonitor *mon, size_t *nnicindexes, int **nicindexes) { g_autofree char *url = NULL; int responseCode = 0; @@ -683,7 +755,8 @@ virCHMonitorCreateVM(virCHMonitor *mon) headers = curl_slist_append(headers, "Accept: application/json"); headers = curl_slist_append(headers, "Content-Type: application/json"); - if (virCHMonitorBuildVMJson(mon->vm->def, &payload) != 0) + if (virCHMonitorBuildVMJson(mon->vm->def, &payload, + nnicindexes, nicindexes) != 0) return -1; virObjectLock(mon); diff --git a/src/ch/ch_monitor.h b/src/ch/ch_monitor.h index 0f684ca583..8ca9e17a9a 100644 --- a/src/ch/ch_monitor.h +++ b/src/ch/ch_monitor.h @@ -55,10 +55,22 @@ virCHMonitor *virCHMonitorNew(virDomainObj *vm, const char *socketdir); void virCHMonitorClose(virCHMonitor *mon); G_DEFINE_AUTOPTR_CLEANUP_FUNC(virCHMonitor, virCHMonitorClose); -int virCHMonitorCreateVM(virCHMonitor *mon); + +int virCHMonitorCreateVM(virCHMonitor *mon, + size_t *nnicindexes, int **nicindexes); int virCHMonitorBootVM(virCHMonitor *mon); int virCHMonitorShutdownVM(virCHMonitor *mon); int virCHMonitorRebootVM(virCHMonitor *mon); int virCHMonitorSuspendVM(virCHMonitor *mon); int virCHMonitorResumeVM(virCHMonitor *mon); int virCHMonitorGetInfo(virCHMonitor *mon, virJSONValue **info); + +typedef struct _virCHMonitorCPUInfo virCHMonitorCPUInfo; +struct _virCHMonitorCPUInfo { + pid_t tid; + bool online; +}; +void virCHMonitorCPUInfoFree(virCHMonitorCPUInfo *cpus); +int virCHMonitorGetCPUInfo(virCHMonitor *mon, + virCHMonitorCPUInfo **vcpus, + size_t maxvcpus); diff --git a/src/ch/ch_process.c b/src/ch/ch_process.c index 9f82e04485..65ca03cfaf 100644 --- a/src/ch/ch_process.c +++ b/src/ch/ch_process.c @@ -147,6 +147,8 @@ int virCHProcessStart(virCHDriver *driver, { int ret = -1; virCHDomainObjPrivate *priv = vm->privateData; + g_autofree int *nicindexes = NULL; + size_t nnicindexes = 0; if (!priv->monitor) { /* And we can get the first monitor connection now too */ @@ -156,7 +158,8 @@ int virCHProcessStart(virCHDriver *driver, goto cleanup; } - if (virCHMonitorCreateVM(priv->monitor) < 0) { + if (virCHMonitorCreateVM(priv->monitor, + &nnicindexes, &nicindexes) < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("failed to create guest VM")); goto cleanup; @@ -169,6 +172,7 @@ int virCHProcessStart(virCHDriver *driver, goto cleanup; } + priv->machineName = virCHDomainGetMachineName(vm); vm->pid = priv->monitor->pid; vm->def->id = vm->pid; diff --git a/src/ch/meson.build b/src/ch/meson.build index e34974d56c..e0afdb390a 100644 --- a/src/ch/meson.build +++ b/src/ch/meson.build @@ -29,6 +29,7 @@ if conf.has('WITH_CH') ], include_directories: [ conf_inc_dir, + hypervisor_inc_dir, ], ) -- 2.27.0

On 12/10/21 21:34, Praveen K Paladugu wrote:
From: Vineeth Pillai <viremana@linux.microsoft.com>
Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- src/ch/ch_conf.h | 2 + src/ch/ch_domain.c | 27 ++++++- src/ch/ch_domain.h | 2 + src/ch/ch_driver.c | 1 + src/ch/ch_monitor.c | 185 ++++++++++++++++++++++++++++++-------------- src/ch/ch_monitor.h | 14 +++- src/ch/ch_process.c | 6 +- src/ch/meson.build | 1 + 8 files changed, 179 insertions(+), 59 deletions(-)
diff --git a/src/ch/ch_process.c b/src/ch/ch_process.c index 9f82e04485..65ca03cfaf 100644 --- a/src/ch/ch_process.c +++ b/src/ch/ch_process.c
@@ -169,6 +172,7 @@ int virCHProcessStart(virCHDriver *driver, goto cleanup; }
+ priv->machineName = virCHDomainGetMachineName(vm);
This needs to be coupled with corresponding free call in virCHProcessStop() and virCHDomainObjPrivateFree() (the latter one I'm not 100% sure, but it's better to be safe than sorry). Reviewed-by: Michal Privoznik <mprivozn@redhat.com> Michal

Refactor some cgroup management methods from qemu into hypervisor. These methods will be shared with ch driver for cgroup management. Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- src/hypervisor/domain_cgroup.c | 426 ++++++++++++++++++++++++++++++++- src/hypervisor/domain_cgroup.h | 52 ++++ src/libvirt_private.syms | 13 + src/qemu/qemu_cgroup.c | 410 +------------------------------ src/qemu/qemu_cgroup.h | 11 - src/qemu/qemu_driver.c | 14 +- src/qemu/qemu_hotplug.c | 7 +- src/qemu/qemu_process.c | 20 +- 8 files changed, 522 insertions(+), 431 deletions(-) diff --git a/src/hypervisor/domain_cgroup.c b/src/hypervisor/domain_cgroup.c index 61b54f071c..0a1b3867b1 100644 --- a/src/hypervisor/domain_cgroup.c +++ b/src/hypervisor/domain_cgroup.c @@ -22,11 +22,12 @@ #include "domain_cgroup.h" #include "domain_driver.h" - +#include "util/virnuma.h" +#include "virlog.h" #include "virutil.h" #define VIR_FROM_THIS VIR_FROM_DOMAIN - +VIR_LOG_INIT("domain.cgroup"); int virDomainCgroupSetupBlkio(virCgroup *cgroup, virDomainBlkiotune blkio) @@ -269,3 +270,424 @@ virDomainCgroupSetMemoryLimitParameters(virCgroup *cgroup, return 0; } + + +int +virSetupBlkioCgroup(virDomainObj *vm, virCgroup *cgroup) +{ + + if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_BLKIO)) { + if (vm->def->blkio.weight || vm->def->blkio.ndevices) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Block I/O tuning is not available on this host")); + return -1; + } else { + return 0; + } + } + + return virDomainCgroupSetupBlkio(cgroup, vm->def->blkio); +} + + +int +virSetupMemoryCgroup(virDomainObj *vm, virCgroup *cgroup) +{ + + if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_MEMORY)) { + if (virMemoryLimitIsSet(vm->def->mem.hard_limit) || + virMemoryLimitIsSet(vm->def->mem.soft_limit) || + virMemoryLimitIsSet(vm->def->mem.swap_hard_limit)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Memory cgroup is not available on this host")); + return -1; + } else { + return 0; + } + } + + return virDomainCgroupSetupMemtune(cgroup, vm->def->mem); +} + + +int +virSetupCpusetCgroup(virCgroup *cgroup) +{ + + if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) + return 0; + + if (virCgroupSetCpusetMemoryMigrate(cgroup, true) < 0) + return -1; + + return 0; +} + + +int +virSetupCpuCgroup(virDomainObj *vm, virCgroup *cgroup) +{ + + if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPU)) { + if (vm->def->cputune.sharesSpecified) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("CPU tuning is not available on this host")); + return -1; + } else { + return 0; + } + } + + if (vm->def->cputune.sharesSpecified) { + + if (virCgroupSetCpuShares(cgroup, vm->def->cputune.shares) < 0) + return -1; + + } + + return 0; +} + + +int +virInitCgroup(const char *prefix, + virDomainObj * vm, + size_t nnicindexes, int *nicindexes, + virCgroup * cgroup, int cgroupControllers, + unsigned int maxThreadsPerProc, + bool privileged, char *machineName) +{ + if (!privileged) + return 0; + + if (!virCgroupAvailable()) + return 0; + + virCgroupFree(cgroup); + cgroup = NULL; + + if (!vm->def->resource) + vm->def->resource = g_new0(virDomainResourceDef, 1); + + if (!vm->def->resource->partition) + vm->def->resource->partition = g_strdup("/machine"); + + if (!g_path_is_absolute(vm->def->resource->partition)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Resource partition '%s' must start with '/'"), + vm->def->resource->partition); + return -1; + } + + if (virCgroupNewMachine(machineName, + prefix, + vm->def->uuid, + NULL, + vm->pid, + false, + nnicindexes, nicindexes, + vm->def->resource->partition, + cgroupControllers, + maxThreadsPerProc, &cgroup) < 0) { + if (virCgroupNewIgnoreError()) + return 0; + + return -1; + } + + return 0; +} + + +void +virRestoreCgroupState(virDomainObj *vm, virCgroup *cgroup) +{ + g_autofree char *mem_mask = NULL; + size_t i = 0; + + g_autoptr(virBitmap) all_nodes = NULL; + + if (!virNumaIsAvailable() || + !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) + return; + + if (!(all_nodes = virNumaGetHostMemoryNodeset())) + goto error; + + if (!(mem_mask = virBitmapFormat(all_nodes))) + goto error; + + if (virCgroupHasEmptyTasks(cgroup, VIR_CGROUP_CONTROLLER_CPUSET) <= 0) + goto error; + + if (virCgroupSetCpusetMems(cgroup, mem_mask) < 0) + goto error; + + for (i = 0; i < virDomainDefGetVcpusMax(vm->def); i++) { + virDomainVcpuDef *vcpu = virDomainDefGetVcpu(vm->def, i); + + if (!vcpu->online) + continue; + + if (virRestoreCgroupThread(cgroup, VIR_CGROUP_THREAD_VCPU, i) < 0) + return; + } + + for (i = 0; i < vm->def->niothreadids; i++) { + if (virRestoreCgroupThread(cgroup, VIR_CGROUP_THREAD_IOTHREAD, + vm->def->iothreadids[i]->iothread_id) < 0) + return; + } + + if (virRestoreCgroupThread(cgroup, VIR_CGROUP_THREAD_EMULATOR, 0) < 0) + return; + + return; + + error: + virResetLastError(); + VIR_DEBUG("Couldn't restore cgroups to meaningful state"); + return; +} + + +int +virRestoreCgroupThread(virCgroup *cgroup, virCgroupThreadName thread, int id) +{ + g_autoptr(virCgroup) cgroup_temp = NULL; + g_autofree char *nodeset = NULL; + + if (virCgroupNewThread(cgroup, thread, id, false, &cgroup_temp) < 0) + return -1; + + if (virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0) + return -1; + + if (virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0) + return -1; + + if (virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0) + return -1; + + return 0; +} + + +int +virConnectCgroup(const char *prefix, virDomainObj *vm, virCgroup *cgroup, + int cgroupControllers, bool privileged, char *machineName) +{ + if (privileged) + return 0; + + if (!virCgroupAvailable()) + return 0; + + virCgroupFree(cgroup); + + if (virCgroupNewDetectMachine(vm->def->name, + prefix, + vm->pid, + cgroupControllers, machineName, &cgroup) < 0) + return -1; + + virRestoreCgroupState(vm, cgroup); + return 0; +} + + +int +virSetupCgroup(const char *prefix, + virDomainObj * vm, + size_t nnicindexes, int *nicindexes, + virCgroup * cgroup, + int cgroupControllers, + unsigned int maxThreadsPerProc, + bool privileged, char *machineName) +{ + if (!vm->pid) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Cannot setup cgroups until process is started")); + return -1; + } + + if (virInitCgroup(prefix, vm, nnicindexes, nicindexes, + cgroup, cgroupControllers, + maxThreadsPerProc, privileged, machineName) < 0) + return -1; + + if (!cgroup) + return 0; + + if (virSetupBlkioCgroup(vm, cgroup) < 0) + return -1; + + if (virSetupMemoryCgroup(vm, cgroup) < 0) + return -1; + + if (virSetupCpuCgroup(vm, cgroup) < 0) + return -1; + + if (virSetupCpusetCgroup(cgroup) < 0) + return -1; + + return 0; +} + + +int +virSetupCgroupVcpuBW(virCgroup *cgroup, + unsigned long long period, long long quota) +{ + return virCgroupSetupCpuPeriodQuota(cgroup, period, quota); +} + + +int +virSetupCgroupCpusetCpus(virCgroup *cgroup, virBitmap *cpumask) +{ + return virCgroupSetupCpusetCpus(cgroup, cpumask); +} + + +int +virSetupGlobalCpuCgroup(virDomainObj *vm, virCgroup *cgroup, + virBitmap *autoNodeset) +{ + unsigned long long period = vm->def->cputune.global_period; + long long quota = vm->def->cputune.global_quota; + g_autofree char *mem_mask = NULL; + virDomainNumatuneMemMode mem_mode; + + if ((period || quota) && + !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPU)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("cgroup cpu is required for scheduler tuning")); + return -1; + } + + /* + * If CPU cgroup controller is not initialized here, then we need + * neither period nor quota settings. And if CPUSET controller is + * not initialized either, then there's nothing to do anyway. + */ + if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPU) && + !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) + return 0; + + + if (virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 && + mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT && + virDomainNumatuneMaybeFormatNodeset(vm->def->numa, + autoNodeset, &mem_mask, -1) < 0) + return -1; + + if (period || quota) { + if (virSetupCgroupVcpuBW(cgroup, period, quota) < 0) + return -1; + } + + return 0; +} + + +int +virRemoveCgroup(virDomainObj *vm, virCgroup *cgroup, char *machineName) +{ + if (cgroup == NULL) + return 0; /* Not supported, so claim success */ + + if (virCgroupTerminateMachine(machineName) < 0) { + if (!virCgroupNewIgnoreError()) + VIR_DEBUG("Failed to terminate cgroup for %s", vm->def->name); + } + + return virCgroupRemove(cgroup); +} + + +void +virCgroupEmulatorAllNodesDataFree(virCgroupEmulatorAllNodesData *data) +{ + if (!data) + return; + + virCgroupFree(data->emulatorCgroup); + g_free(data->emulatorMemMask); + g_free(data); +} + + +/** + * virCgroupEmulatorAllNodesAllow: + * @cgroup: domain cgroup pointer + * @retData: filled with structure used to roll back the operation + * + * Allows all NUMA nodes for the cloud hypervisor thread temporarily. This is + * necessary when hotplugging cpus since it requires memory allocated in the + * DMA region. Afterwards the operation can be reverted by + * virCgroupEmulatorAllNodesRestore. + * + * Returns 0 on success -1 on error + */ +int +virCgroupEmulatorAllNodesAllow(virCgroup *cgroup, + virCgroupEmulatorAllNodesData **retData) +{ + virCgroupEmulatorAllNodesData *data = NULL; + g_autofree char *all_nodes_str = NULL; + + g_autoptr(virBitmap) all_nodes = NULL; + int ret = -1; + + if (!virNumaIsAvailable() || + !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) + return 0; + + if (!(all_nodes = virNumaGetHostMemoryNodeset())) + goto cleanup; + + if (!(all_nodes_str = virBitmapFormat(all_nodes))) + goto cleanup; + + data = g_new0(virCgroupEmulatorAllNodesData, 1); + + if (virCgroupNewThread(cgroup, VIR_CGROUP_THREAD_EMULATOR, 0, + false, &data->emulatorCgroup) < 0) + goto cleanup; + + if (virCgroupGetCpusetMems(data->emulatorCgroup, &data->emulatorMemMask) < 0 + || virCgroupSetCpusetMems(data->emulatorCgroup, all_nodes_str) < 0) + goto cleanup; + + *retData = g_steal_pointer(&data); + ret = 0; + + cleanup: + virCgroupEmulatorAllNodesDataFree(data); + + return ret; +} + + +/** + * virCgroupEmulatorAllNodesRestore: + * @data: data structure created by virCgroupEmulatorAllNodesAllow + * + * Rolls back the setting done by virCgroupEmulatorAllNodesAllow and frees the + * associated data. + */ +void +virCgroupEmulatorAllNodesRestore(virCgroupEmulatorAllNodesData *data) +{ + virError *err; + + if (!data) + return; + + virErrorPreserveLast(&err); + virCgroupSetCpusetMems(data->emulatorCgroup, data->emulatorMemMask); + virErrorRestore(&err); + + virCgroupEmulatorAllNodesDataFree(data); +} diff --git a/src/hypervisor/domain_cgroup.h b/src/hypervisor/domain_cgroup.h index f93e5f74fe..fd8b74a4ee 100644 --- a/src/hypervisor/domain_cgroup.h +++ b/src/hypervisor/domain_cgroup.h @@ -23,6 +23,11 @@ #include "vircgroup.h" #include "domain_conf.h" +typedef struct _virCgroupEmulatorAllNodesData virCgroupEmulatorAllNodesData; +struct _virCgroupEmulatorAllNodesData { + virCgroup *emulatorCgroup; + char *emulatorMemMask; +}; int virDomainCgroupSetupBlkio(virCgroup *cgroup, virDomainBlkiotune blkio); int virDomainCgroupSetupMemtune(virCgroup *cgroup, virDomainMemtune mem); @@ -36,3 +41,50 @@ int virDomainCgroupSetMemoryLimitParameters(virCgroup *cgroup, virDomainDef *persistentDef, virTypedParameterPtr params, int nparams); +int +virSetupBlkioCgroup(virDomainObj * vm, virCgroup *cgroup); +int +virSetupMemoryCgroup(virDomainObj * vm, virCgroup *cgroup); +int +virSetupCpusetCgroup(virCgroup *cgroup); +int +virSetupCpuCgroup(virDomainObj * vm, virCgroup *cgroup); +int +virInitCgroup(const char *prefix, virDomainObj * vm, + size_t nnicindexes, int *nicindexes, + virCgroup *cgroup, int cgroupControllers, + unsigned int maxThreadsPerProc, + bool privileged, + char *machineName); +void +virRestoreCgroupState(virDomainObj * vm, virCgroup *cgroup); +int +virConnectCgroup(const char *prefix, virDomainObj * vm, virCgroup *cgroup, + int cgroupControllers, bool privileged, char *machineName); +int +virSetupCgroup(const char *prefix, virDomainObj * vm, + size_t nnicindexes, int *nicindexes, + virCgroup *cgroup, int cgroupControllers, + unsigned int maxThreadsPerProc, + bool privileged, + char *machineName); +void +virCgroupEmulatorAllNodesDataFree(virCgroupEmulatorAllNodesData * data); +int +virCgroupEmulatorAllNodesAllow(virCgroup * cgroup, + virCgroupEmulatorAllNodesData ** retData); +void +virCgroupEmulatorAllNodesRestore(virCgroupEmulatorAllNodesData * data); +int +virSetupCgroupVcpuBW(virCgroup * cgroup, + unsigned long long period, long long quota); +int +virSetupCgroupCpusetCpus(virCgroup * cgroup, virBitmap * cpumask); +int +virSetupGlobalCpuCgroup(virDomainObj * vm, virCgroup * cgroup, + virBitmap *autoNodeset); +int +virRemoveCgroup(virDomainObj * vm, virCgroup * cgroup, char *machineName); +int +virRestoreCgroupThread(virCgroup *cgroup, virCgroupThreadName thread, + int id); diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 56adc192cd..09b1fbb8c4 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1543,10 +1543,23 @@ virSetConnectStorage; # hypervisor/domain_cgroup.h +virCgroupEmulatorAllNodesAllow; +virCgroupEmulatorAllNodesRestore; +virConnectCgroup; virDomainCgroupSetMemoryLimitParameters; virDomainCgroupSetupBlkio; virDomainCgroupSetupDomainBlkioParameters; virDomainCgroupSetupMemtune; +virInitCgroup; +virRemoveCgroup; +virSetupBlkioCgroup; +virSetupCgroup; +virSetupCgroupCpusetCpus; +virSetupCgroupVcpuBW; +virSetupCpuCgroup; +virSetupCpusetCgroup; +virSetupGlobalCpuCgroup; +virSetupMemoryCgroup; # hypervisor/domain_driver.h diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 308be5b00f..b1d2875959 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -593,46 +593,6 @@ qemuSetupVideoCgroup(virDomainObj *vm, return ret; } - -static int -qemuSetupBlkioCgroup(virDomainObj *vm) -{ - qemuDomainObjPrivate *priv = vm->privateData; - - if (!virCgroupHasController(priv->cgroup, - VIR_CGROUP_CONTROLLER_BLKIO)) { - if (vm->def->blkio.weight || vm->def->blkio.ndevices) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("Block I/O tuning is not available on this host")); - return -1; - } - return 0; - } - - return virDomainCgroupSetupBlkio(priv->cgroup, vm->def->blkio); -} - - -static int -qemuSetupMemoryCgroup(virDomainObj *vm) -{ - qemuDomainObjPrivate *priv = vm->privateData; - - if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_MEMORY)) { - if (virMemoryLimitIsSet(vm->def->mem.hard_limit) || - virMemoryLimitIsSet(vm->def->mem.soft_limit) || - virMemoryLimitIsSet(vm->def->mem.swap_hard_limit)) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("Memory cgroup is not available on this host")); - return -1; - } - return 0; - } - - return virDomainCgroupSetupMemtune(priv->cgroup, vm->def->mem); -} - - static int qemuSetupFirmwareCgroup(virDomainObj *vm) { @@ -861,44 +821,6 @@ qemuSetupDevicesCgroup(virDomainObj *vm) } -static int -qemuSetupCpusetCgroup(virDomainObj *vm) -{ - qemuDomainObjPrivate *priv = vm->privateData; - - if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) - return 0; - - if (virCgroupSetCpusetMemoryMigrate(priv->cgroup, true) < 0) - return -1; - - return 0; -} - - -static int -qemuSetupCpuCgroup(virDomainObj *vm) -{ - qemuDomainObjPrivate *priv = vm->privateData; - - if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) { - if (vm->def->cputune.sharesSpecified) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("CPU tuning is not available on this host")); - return -1; - } - return 0; - } - - if (vm->def->cputune.sharesSpecified) { - if (virCgroupSetCpuShares(priv->cgroup, vm->def->cputune.shares) < 0) - return -1; - } - - return 0; -} - - static int qemuSetupCgroupAppid(virDomainObj *vm) { @@ -927,166 +849,13 @@ qemuSetupCgroupAppid(virDomainObj *vm) } -static int -qemuInitCgroup(virDomainObj *vm, - size_t nnicindexes, - int *nicindexes) -{ - qemuDomainObjPrivate *priv = vm->privateData; - g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver); - - if (!priv->driver->privileged) - return 0; - - if (!virCgroupAvailable()) - return 0; - - virCgroupFree(priv->cgroup); - priv->cgroup = NULL; - - if (!vm->def->resource) - vm->def->resource = g_new0(virDomainResourceDef, 1); - - if (!vm->def->resource->partition) - vm->def->resource->partition = g_strdup("/machine"); - - if (!g_path_is_absolute(vm->def->resource->partition)) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, - _("Resource partition '%s' must start with '/'"), - vm->def->resource->partition); - return -1; - } - - if (virCgroupNewMachine(priv->machineName, - "qemu", - vm->def->uuid, - NULL, - vm->pid, - false, - nnicindexes, nicindexes, - vm->def->resource->partition, - cfg->cgroupControllers, - cfg->maxThreadsPerProc, - &priv->cgroup) < 0) { - if (virCgroupNewIgnoreError()) - return 0; - - return -1; - } - - return 0; -} - -static int -qemuRestoreCgroupThread(virCgroup *cgroup, - virCgroupThreadName thread, - int id) -{ - g_autoptr(virCgroup) cgroup_temp = NULL; - g_autofree char *nodeset = NULL; - - if (virCgroupNewThread(cgroup, thread, id, false, &cgroup_temp) < 0) - return -1; - - if (virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0) - return -1; - - if (virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0) - return -1; - - if (virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0) - return -1; - - return 0; -} - -static void -qemuRestoreCgroupState(virDomainObj *vm) -{ - g_autofree char *mem_mask = NULL; - qemuDomainObjPrivate *priv = vm->privateData; - size_t i = 0; - g_autoptr(virBitmap) all_nodes = NULL; - - if (!virNumaIsAvailable() || - !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) - return; - - if (!(all_nodes = virNumaGetHostMemoryNodeset())) - goto error; - - if (!(mem_mask = virBitmapFormat(all_nodes))) - goto error; - - if (virCgroupHasEmptyTasks(priv->cgroup, - VIR_CGROUP_CONTROLLER_CPUSET) <= 0) - goto error; - - if (virCgroupSetCpusetMems(priv->cgroup, mem_mask) < 0) - goto error; - - for (i = 0; i < virDomainDefGetVcpusMax(vm->def); i++) { - virDomainVcpuDef *vcpu = virDomainDefGetVcpu(vm->def, i); - - if (!vcpu->online) - continue; - - if (qemuRestoreCgroupThread(priv->cgroup, - VIR_CGROUP_THREAD_VCPU, i) < 0) - return; - } - - for (i = 0; i < vm->def->niothreadids; i++) { - if (qemuRestoreCgroupThread(priv->cgroup, VIR_CGROUP_THREAD_IOTHREAD, - vm->def->iothreadids[i]->iothread_id) < 0) - return; - } - - if (qemuRestoreCgroupThread(priv->cgroup, - VIR_CGROUP_THREAD_EMULATOR, 0) < 0) - return; - - return; - - error: - virResetLastError(); - VIR_DEBUG("Couldn't restore cgroups to meaningful state"); - return; -} - -int -qemuConnectCgroup(virDomainObj *vm) -{ - qemuDomainObjPrivate *priv = vm->privateData; - g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver); - - if (!priv->driver->privileged) - return 0; - - if (!virCgroupAvailable()) - return 0; - - virCgroupFree(priv->cgroup); - priv->cgroup = NULL; - - if (virCgroupNewDetectMachine(vm->def->name, - "qemu", - vm->pid, - cfg->cgroupControllers, - priv->machineName, - &priv->cgroup) < 0) - return -1; - - qemuRestoreCgroupState(vm); - return 0; -} - int qemuSetupCgroup(virDomainObj *vm, size_t nnicindexes, int *nicindexes) { qemuDomainObjPrivate *priv = vm->privateData; + g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver); if (!vm->pid) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", @@ -1094,7 +863,12 @@ qemuSetupCgroup(virDomainObj *vm, return -1; } - if (qemuInitCgroup(vm, nnicindexes, nicindexes) < 0) + if (virInitCgroup("qemu", vm, nnicindexes, nicindexes, + priv->cgroup, + cfg->cgroupControllers, + cfg->maxThreadsPerProc, + priv->driver->privileged, + priv->machineName) < 0) return -1; if (!priv->cgroup) @@ -1103,16 +877,16 @@ qemuSetupCgroup(virDomainObj *vm, if (qemuSetupDevicesCgroup(vm) < 0) return -1; - if (qemuSetupBlkioCgroup(vm) < 0) + if (virSetupBlkioCgroup(vm, priv->cgroup) < 0) return -1; - if (qemuSetupMemoryCgroup(vm) < 0) + if (virSetupMemoryCgroup(vm, priv->cgroup) < 0) return -1; - if (qemuSetupCpuCgroup(vm) < 0) + if (virSetupCpuCgroup(vm, priv->cgroup) < 0) return -1; - if (qemuSetupCpusetCgroup(vm) < 0) + if (virSetupCpusetCgroup(priv->cgroup) < 0) return -1; if (qemuSetupCgroupAppid(vm) < 0) @@ -1121,23 +895,6 @@ qemuSetupCgroup(virDomainObj *vm, return 0; } -int -qemuSetupCgroupVcpuBW(virCgroup *cgroup, - unsigned long long period, - long long quota) -{ - return virCgroupSetupCpuPeriodQuota(cgroup, period, quota); -} - - -int -qemuSetupCgroupCpusetCpus(virCgroup *cgroup, - virBitmap *cpumask) -{ - return virCgroupSetupCpusetCpus(cgroup, cpumask); -} - - int qemuSetupCgroupForExtDevices(virDomainObj *vm, virQEMUDriver *driver) @@ -1164,148 +921,3 @@ qemuSetupCgroupForExtDevices(virDomainObj *vm, return qemuExtDevicesSetupCgroup(driver, vm, cgroup_temp); } - - -int -qemuSetupGlobalCpuCgroup(virDomainObj *vm) -{ - qemuDomainObjPrivate *priv = vm->privateData; - unsigned long long period = vm->def->cputune.global_period; - long long quota = vm->def->cputune.global_quota; - g_autofree char *mem_mask = NULL; - virDomainNumatuneMemMode mem_mode; - - if ((period || quota) && - !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("cgroup cpu is required for scheduler tuning")); - return -1; - } - - /* - * If CPU cgroup controller is not initialized here, then we need - * neither period nor quota settings. And if CPUSET controller is - * not initialized either, then there's nothing to do anyway. - */ - if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU) && - !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) - return 0; - - - if (virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 && - mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT && - virDomainNumatuneMaybeFormatNodeset(vm->def->numa, - priv->autoNodeset, - &mem_mask, -1) < 0) - return -1; - - if (period || quota) { - if (qemuSetupCgroupVcpuBW(priv->cgroup, period, quota) < 0) - return -1; - } - - return 0; -} - - -int -qemuRemoveCgroup(virDomainObj *vm) -{ - qemuDomainObjPrivate *priv = vm->privateData; - - if (priv->cgroup == NULL) - return 0; /* Not supported, so claim success */ - - if (virCgroupTerminateMachine(priv->machineName) < 0) { - if (!virCgroupNewIgnoreError()) - VIR_DEBUG("Failed to terminate cgroup for %s", vm->def->name); - } - - return virCgroupRemove(priv->cgroup); -} - - -static void -qemuCgroupEmulatorAllNodesDataFree(qemuCgroupEmulatorAllNodesData *data) -{ - if (!data) - return; - - virCgroupFree(data->emulatorCgroup); - g_free(data->emulatorMemMask); - g_free(data); -} - - -/** - * qemuCgroupEmulatorAllNodesAllow: - * @cgroup: domain cgroup pointer - * @retData: filled with structure used to roll back the operation - * - * Allows all NUMA nodes for the qemu emulator thread temporarily. This is - * necessary when hotplugging cpus since it requires memory allocated in the - * DMA region. Afterwards the operation can be reverted by - * qemuCgroupEmulatorAllNodesRestore. - * - * Returns 0 on success -1 on error - */ -int -qemuCgroupEmulatorAllNodesAllow(virCgroup *cgroup, - qemuCgroupEmulatorAllNodesData **retData) -{ - qemuCgroupEmulatorAllNodesData *data = NULL; - g_autofree char *all_nodes_str = NULL; - g_autoptr(virBitmap) all_nodes = NULL; - int ret = -1; - - if (!virNumaIsAvailable() || - !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) - return 0; - - if (!(all_nodes = virNumaGetHostMemoryNodeset())) - goto cleanup; - - if (!(all_nodes_str = virBitmapFormat(all_nodes))) - goto cleanup; - - data = g_new0(qemuCgroupEmulatorAllNodesData, 1); - - if (virCgroupNewThread(cgroup, VIR_CGROUP_THREAD_EMULATOR, 0, - false, &data->emulatorCgroup) < 0) - goto cleanup; - - if (virCgroupGetCpusetMems(data->emulatorCgroup, &data->emulatorMemMask) < 0 || - virCgroupSetCpusetMems(data->emulatorCgroup, all_nodes_str) < 0) - goto cleanup; - - *retData = g_steal_pointer(&data); - ret = 0; - - cleanup: - qemuCgroupEmulatorAllNodesDataFree(data); - - return ret; -} - - -/** - * qemuCgroupEmulatorAllNodesRestore: - * @data: data structure created by qemuCgroupEmulatorAllNodesAllow - * - * Rolls back the setting done by qemuCgroupEmulatorAllNodesAllow and frees the - * associated data. - */ -void -qemuCgroupEmulatorAllNodesRestore(qemuCgroupEmulatorAllNodesData *data) -{ - virErrorPtr err; - - if (!data) - return; - - virErrorPreserveLast(&err); - virCgroupSetCpusetMems(data->emulatorCgroup, data->emulatorMemMask); - virErrorRestore(&err); - - qemuCgroupEmulatorAllNodesDataFree(data); -} diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h index cd537ebd82..f09134947f 100644 --- a/src/qemu/qemu_cgroup.h +++ b/src/qemu/qemu_cgroup.h @@ -56,18 +56,11 @@ int qemuSetupChardevCgroup(virDomainObj *vm, virDomainChrDef *dev); int qemuTeardownChardevCgroup(virDomainObj *vm, virDomainChrDef *dev); -int qemuConnectCgroup(virDomainObj *vm); int qemuSetupCgroup(virDomainObj *vm, size_t nnicindexes, int *nicindexes); -int qemuSetupCgroupVcpuBW(virCgroup *cgroup, - unsigned long long period, - long long quota); -int qemuSetupCgroupCpusetCpus(virCgroup *cgroup, virBitmap *cpumask); -int qemuSetupGlobalCpuCgroup(virDomainObj *vm); int qemuSetupCgroupForExtDevices(virDomainObj *vm, virQEMUDriver *driver); -int qemuRemoveCgroup(virDomainObj *vm); typedef struct _qemuCgroupEmulatorAllNodesData qemuCgroupEmulatorAllNodesData; struct _qemuCgroupEmulatorAllNodesData { @@ -75,8 +68,4 @@ struct _qemuCgroupEmulatorAllNodesData { char *emulatorMemMask; }; -int qemuCgroupEmulatorAllNodesAllow(virCgroup *cgroup, - qemuCgroupEmulatorAllNodesData **data); -void qemuCgroupEmulatorAllNodesRestore(qemuCgroupEmulatorAllNodesData *data); - extern const char *const defaultDeviceACL[]; diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index ba3efef42b..d365036743 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -4424,7 +4424,7 @@ qemuDomainPinVcpuLive(virDomainObj *vm, if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_VCPU, vcpu, false, &cgroup_vcpu) < 0) goto cleanup; - if (qemuSetupCgroupCpusetCpus(cgroup_vcpu, cpumap) < 0) + if (virSetupCgroupCpusetCpus(cgroup_vcpu, cpumap) < 0) goto cleanup; } @@ -4633,7 +4633,7 @@ qemuDomainPinEmulator(virDomainPtr dom, 0, false, &cgroup_emulator) < 0) goto endjob; - if (qemuSetupCgroupCpusetCpus(cgroup_emulator, pcpumap) < 0) { + if (virSetupCgroupCpusetCpus(cgroup_emulator, pcpumap) < 0) { virReportError(VIR_ERR_OPERATION_INVALID, "%s", _("failed to set cpuset.cpus in cgroup" " for emulator threads")); @@ -5038,7 +5038,7 @@ qemuDomainPinIOThread(virDomainPtr dom, if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_IOTHREAD, iothread_id, false, &cgroup_iothread) < 0) goto endjob; - if (qemuSetupCgroupCpusetCpus(cgroup_iothread, pcpumap) < 0) { + if (virSetupCgroupCpusetCpus(cgroup_iothread, pcpumap) < 0) { virReportError(VIR_ERR_OPERATION_INVALID, _("failed to set cpuset.cpus in cgroup" " for iothread %d"), iothread_id); @@ -8926,7 +8926,7 @@ qemuSetGlobalBWLive(virCgroup *cgroup, unsigned long long period, if (period == 0 && quota == 0) return 0; - if (qemuSetupCgroupVcpuBW(cgroup, period, quota) < 0) + if (virSetupCgroupVcpuBW(cgroup, period, quota) < 0) return -1; return 0; @@ -9121,7 +9121,7 @@ qemuSetVcpusBWLive(virDomainObj *vm, virCgroup *cgroup, false, &cgroup_vcpu) < 0) return -1; - if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0) + if (virSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0) return -1; } @@ -9142,7 +9142,7 @@ qemuSetEmulatorBandwidthLive(virCgroup *cgroup, false, &cgroup_emulator) < 0) return -1; - if (qemuSetupCgroupVcpuBW(cgroup_emulator, period, quota) < 0) + if (virSetupCgroupVcpuBW(cgroup_emulator, period, quota) < 0) return -1; return 0; @@ -9169,7 +9169,7 @@ qemuSetIOThreadsBWLive(virDomainObj *vm, virCgroup *cgroup, false, &cgroup_iothread) < 0) return -1; - if (qemuSetupCgroupVcpuBW(cgroup_iothread, period, quota) < 0) + if (virSetupCgroupVcpuBW(cgroup_iothread, period, quota) < 0) return -1; } diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index 04c6600f26..30f04aac57 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -37,6 +37,7 @@ #include "qemu_snapshot.h" #include "qemu_virtiofs.h" #include "domain_audit.h" +#include "domain_cgroup.h" #include "netdev_bandwidth_conf.h" #include "domain_nwfilter.h" #include "virlog.h" @@ -6551,11 +6552,11 @@ qemuDomainSetVcpusLive(virQEMUDriver *driver, bool enable) { qemuDomainObjPrivate *priv = vm->privateData; - qemuCgroupEmulatorAllNodesData *emulatorCgroup = NULL; + virCgroupEmulatorAllNodesData *emulatorCgroup = NULL; ssize_t nextvcpu = -1; int ret = -1; - if (qemuCgroupEmulatorAllNodesAllow(priv->cgroup, &emulatorCgroup) < 0) + if (virCgroupEmulatorAllNodesAllow(priv->cgroup, &emulatorCgroup) < 0) goto cleanup; if (enable) { @@ -6576,7 +6577,7 @@ qemuDomainSetVcpusLive(virQEMUDriver *driver, ret = 0; cleanup: - qemuCgroupEmulatorAllNodesRestore(emulatorCgroup); + virCgroupEmulatorAllNodesRestore(emulatorCgroup); return ret; } diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 4963ce383f..21c41e83c9 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -73,6 +73,7 @@ #include "virpidfile.h" #include "virhostcpu.h" #include "domain_audit.h" +#include "domain_cgroup.h" #include "domain_nwfilter.h" #include "domain_validate.h" #include "locking/domain_lock.h" @@ -2744,7 +2745,7 @@ qemuProcessSetupPid(virDomainObj *vm, if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) { if (use_cpumask && - qemuSetupCgroupCpusetCpus(cgroup, use_cpumask) < 0) + virSetupCgroupCpusetCpus(cgroup, use_cpumask) < 0) goto cleanup; if (mem_mask && virCgroupSetCpusetMems(cgroup, mem_mask) < 0) @@ -2753,7 +2754,7 @@ qemuProcessSetupPid(virDomainObj *vm, } if ((period || quota) && - qemuSetupCgroupVcpuBW(cgroup, period, quota) < 0) + virSetupCgroupVcpuBW(cgroup, period, quota) < 0) goto cleanup; /* Move the thread to the sub dir */ @@ -6027,7 +6028,7 @@ qemuProcessSetupHotpluggableVcpus(virQEMUDriver *driver, { unsigned int maxvcpus = virDomainDefGetVcpusMax(vm->def); qemuDomainObjPrivate *priv = vm->privateData; - qemuCgroupEmulatorAllNodesData *emulatorCgroup = NULL; + virCgroupEmulatorAllNodesData *emulatorCgroup = NULL; virDomainVcpuDef *vcpu; qemuDomainVcpuPrivate *vcpupriv; size_t i; @@ -6055,7 +6056,7 @@ qemuProcessSetupHotpluggableVcpus(virQEMUDriver *driver, qsort(bootHotplug, nbootHotplug, sizeof(*bootHotplug), qemuProcessVcpusSortOrder); - if (qemuCgroupEmulatorAllNodesAllow(priv->cgroup, &emulatorCgroup) < 0) + if (virCgroupEmulatorAllNodesAllow(priv->cgroup, &emulatorCgroup) < 0) goto cleanup; for (i = 0; i < nbootHotplug; i++) { @@ -6079,7 +6080,7 @@ qemuProcessSetupHotpluggableVcpus(virQEMUDriver *driver, ret = 0; cleanup: - qemuCgroupEmulatorAllNodesRestore(emulatorCgroup); + virCgroupEmulatorAllNodesRestore(emulatorCgroup); return ret; } @@ -7061,7 +7062,7 @@ qemuProcessPrepareHost(virQEMUDriver *driver, /* Ensure no historical cgroup for this VM is lying around bogus * settings */ VIR_DEBUG("Ensuring no historical cgroup is lying around"); - qemuRemoveCgroup(vm); + virRemoveCgroup(vm, priv->cgroup, priv->machineName); if (g_mkdir_with_parents(cfg->logDir, 0777) < 0) { virReportSystemError(errno, @@ -7678,7 +7679,7 @@ qemuProcessLaunch(virConnectPtr conn, goto cleanup; VIR_DEBUG("Setting global CPU cgroup (if required)"); - if (qemuSetupGlobalCpuCgroup(vm) < 0) + if (virSetupGlobalCpuCgroup(vm, priv->cgroup, priv->autoNodeset) < 0) goto cleanup; VIR_DEBUG("Setting vCPU tuning/settings"); @@ -8290,7 +8291,7 @@ void qemuProcessStop(virQEMUDriver *driver, } retry: - if ((ret = qemuRemoveCgroup(vm)) < 0) { + if ((ret = virRemoveCgroup(vm, priv->cgroup, priv->machineName)) < 0) { if (ret == -EBUSY && (retries++ < 5)) { g_usleep(200*1000); goto retry; @@ -8849,7 +8850,8 @@ qemuProcessReconnect(void *opaque) if (!priv->machineName) goto error; - if (qemuConnectCgroup(obj) < 0) + if (virConnectCgroup("qemu", obj, priv->cgroup, cfg->cgroupControllers, + priv->driver->privileged, priv->machineName) < 0) goto error; if (qemuDomainPerfRestart(obj) < 0) -- 2.27.0

On 12/10/21 21:34, Praveen K Paladugu wrote:
Refactor some cgroup management methods from qemu into hypervisor. These methods will be shared with ch driver for cgroup management.
Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- src/hypervisor/domain_cgroup.c | 426 ++++++++++++++++++++++++++++++++- src/hypervisor/domain_cgroup.h | 52 ++++ src/libvirt_private.syms | 13 + src/qemu/qemu_cgroup.c | 410 +------------------------------ src/qemu/qemu_cgroup.h | 11 - src/qemu/qemu_driver.c | 14 +- src/qemu/qemu_hotplug.c | 7 +- src/qemu/qemu_process.c | 20 +- 8 files changed, 522 insertions(+), 431 deletions(-)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 56adc192cd..09b1fbb8c4 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1543,10 +1543,23 @@ virSetConnectStorage;
# hypervisor/domain_cgroup.h +virCgroupEmulatorAllNodesAllow; +virCgroupEmulatorAllNodesRestore; +virConnectCgroup; virDomainCgroupSetMemoryLimitParameters; virDomainCgroupSetupBlkio; virDomainCgroupSetupDomainBlkioParameters; virDomainCgroupSetupMemtune; +virInitCgroup; +virRemoveCgroup; +virSetupBlkioCgroup; +virSetupCgroup; +virSetupCgroupCpusetCpus; +virSetupCgroupVcpuBW; +virSetupCpuCgroup; +virSetupCpusetCgroup; +virSetupGlobalCpuCgroup; +virSetupMemoryCgroup;
Almost. Notice how pre-exisitng APIs have "virDomainCgroup" prefix? Those function you are moving should have that too. This is where I'm stopping my review. Let me merge patches I've already acked and look forward to v4. Michal

From: Vineeth Pillai <viremana@linux.microsoft.com> Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- src/ch/ch_conf.c | 2 + src/ch/ch_conf.h | 4 +- src/ch/ch_domain.c | 34 +++++ src/ch/ch_domain.h | 3 +- src/ch/ch_monitor.c | 96 ++++++++++++++ src/ch/ch_monitor.h | 54 +++++++- src/ch/ch_process.c | 304 ++++++++++++++++++++++++++++++++++++++++++-- src/ch/ch_process.h | 3 + 8 files changed, 483 insertions(+), 17 deletions(-) diff --git a/src/ch/ch_conf.c b/src/ch/ch_conf.c index ef6f4b5ba8..cfc1174354 100644 --- a/src/ch/ch_conf.c +++ b/src/ch/ch_conf.c @@ -129,6 +129,8 @@ virCHDriverConfigNew(bool privileged) if (!(cfg = virObjectNew(virCHDriverConfigClass))) return NULL; + cfg->cgroupControllers = -1; /* Auto detect */ + if (privileged) { if (virGetUserID(CH_USER, &cfg->user) < 0) return NULL; diff --git a/src/ch/ch_conf.h b/src/ch/ch_conf.h index 8fe69c8545..1790295ede 100644 --- a/src/ch/ch_conf.h +++ b/src/ch/ch_conf.h @@ -35,11 +35,13 @@ struct _virCHDriverConfig { char *stateDir; char *logDir; - + int cgroupControllers; uid_t user; gid_t group; }; +G_DEFINE_AUTOPTR_CLEANUP_FUNC(virCHDriverConfig, virObjectUnref); + struct _virCHDriver { virMutex lock; diff --git a/src/ch/ch_domain.c b/src/ch/ch_domain.c index 3a32ac63d9..42cb4e7dcf 100644 --- a/src/ch/ch_domain.c +++ b/src/ch/ch_domain.c @@ -324,6 +324,40 @@ chValidateDomainDeviceDef(const virDomainDeviceDef *dev, _("Serial can only be enabled for a PTY")); return -1; } + return 0; +} + +int +virCHDomainRefreshThreadInfo(virDomainObj *vm) +{ + size_t maxvcpus = virDomainDefGetVcpusMax(vm->def); + virCHMonitorThreadInfo *info = NULL; + size_t nthreads, ncpus = 0; + size_t i; + + nthreads = virCHMonitorGetThreadInfo(virCHDomainGetMonitor(vm), + true, &info); + + for (i = 0; i < nthreads; i++) { + virCHDomainVcpuPrivate *vcpupriv; + virDomainVcpuDef *vcpu; + virCHMonitorCPUInfo *vcpuInfo; + + if (info[i].type != virCHThreadTypeVcpu) + continue; + + /* TODO: hotplug support */ + vcpuInfo = &info[i].vcpuInfo; + vcpu = virDomainDefGetVcpu(vm->def, vcpuInfo->cpuid); + vcpupriv = CH_DOMAIN_VCPU_PRIVATE(vcpu); + vcpupriv->tid = vcpuInfo->tid; + ncpus++; + } + + /* TODO: Remove the warning when hotplug is implemented.*/ + if (ncpus != maxvcpus) + VIR_WARN("Mismatch in the number of cpus, expected: %ld, actual: %ld", + maxvcpus, ncpus); return 0; } diff --git a/src/ch/ch_domain.h b/src/ch/ch_domain.h index 4fc6251380..7cffe2b4fb 100644 --- a/src/ch/ch_domain.h +++ b/src/ch/ch_domain.h @@ -89,7 +89,8 @@ virCHDomainObjBeginJob(virDomainObj *obj, enum virCHDomainJob job) void virCHDomainObjEndJob(virDomainObj *obj); -int virCHDomainRefreshVcpuInfo(virDomainObj *vm); +int virCHDomainRefreshThreadInfo(virDomainObj *vm); + pid_t virCHDomainGetVcpuPid(virDomainObj *vm, unsigned int vcpuid); bool virCHDomainHasVcpuPids(virDomainObj *vm); diff --git a/src/ch/ch_monitor.c b/src/ch/ch_monitor.c index d48b057814..f4e0f88817 100644 --- a/src/ch/ch_monitor.c +++ b/src/ch/ch_monitor.c @@ -41,6 +41,7 @@ VIR_LOG_INIT("ch.ch_monitor"); static virClass *virCHMonitorClass; static void virCHMonitorDispose(void *obj); +static void virCHMonitorThreadInfoFree(virCHMonitor * mon); static int virCHMonitorOnceInit(void) { @@ -571,6 +572,7 @@ static void virCHMonitorDispose(void *opaque) virCHMonitor *mon = opaque; VIR_DEBUG("mon=%p", mon); + virCHMonitorThreadInfoFree(mon); virObjectUnref(mon->vm); } @@ -736,6 +738,100 @@ virCHMonitorGet(virCHMonitor *mon, const char *endpoint, virJSONValue **response return ret; } +static void +virCHMonitorThreadInfoFree(virCHMonitor *mon) +{ + mon->nthreads = 0; + if (mon->threads) + VIR_FREE(mon->threads); +} + +static size_t +virCHMonitorRefreshThreadInfo(virCHMonitor *mon) +{ + virCHMonitorThreadInfo *info = NULL; + g_autofree pid_t *tids = NULL; + virDomainObj *vm = mon->vm; + size_t ntids = 0; + size_t i; + + + virCHMonitorThreadInfoFree(mon); + if (virProcessGetPids(vm->pid, &ntids, &tids) < 0) { + mon->threads = NULL; + return 0; + } + + info = g_new0(virCHMonitorThreadInfo, ntids); + for (i = 0; i < ntids; i++) { + g_autofree char *proc = NULL; + g_autofree char *data = NULL; + + proc = g_strdup_printf("/proc/%d/task/%d/comm", + (int)vm->pid, (int)tids[i]); + + if (virFileReadAll(proc, (1 << 16), &data) < 0) { + continue; + } + + VIR_DEBUG("VM PID: %d, TID %d, COMM: %s", + (int)vm->pid, (int)tids[i], data); + if (STRPREFIX(data, "vcpu")) { + int cpuid; + char *tmp; + + if (virStrToLong_i(data + 4, &tmp, 0, &cpuid) < 0) { + VIR_WARN("Index is not specified correctly"); + continue; + } + info[i].type = virCHThreadTypeVcpu; + info[i].vcpuInfo.tid = tids[i]; + info[i].vcpuInfo.online = true; + info[i].vcpuInfo.cpuid = cpuid; + VIR_DEBUG("vcpu%d -> tid: %d", cpuid, tids[i]); + } else if (STRPREFIX(data, "_disk") || STRPREFIX(data, "_net") || + STRPREFIX(data, "_rng")) { + /* Prefixes used by cloud-hypervisor for IO Threads are captured at + * https://github.com/cloud-hypervisor/cloud-hypervisor/blob/main/vmm/src/devic... */ + info[i].type = virCHThreadTypeIO; + info[i].ioInfo.tid = tids[i]; + virStrcpy(info[i].ioInfo.thrName, data, VIRCH_THREAD_NAME_LEN - 1); + } else { + info[i].type = virCHThreadTypeEmulator; + info[i].emuInfo.tid = tids[i]; + virStrcpy(info[i].emuInfo.thrName, data, VIRCH_THREAD_NAME_LEN - 1); + } + mon->nthreads++; + + } + mon->threads = info; + + return mon->nthreads; +} + +/** + * virCHMonitorGetThreadInfo: + * @mon: Pointer to the monitor + * @refresh: Refresh thread info or not + * + * Retrive thread info and store to @threads + * + * Returns count of threads on success. + */ +size_t +virCHMonitorGetThreadInfo(virCHMonitor *mon, bool refresh, + virCHMonitorThreadInfo **threads) +{ + int nthreads = 0; + + if (refresh) + nthreads = virCHMonitorRefreshThreadInfo(mon); + + *threads = mon->threads; + + return nthreads; +} + int virCHMonitorShutdownVMM(virCHMonitor *mon) { diff --git a/src/ch/ch_monitor.h b/src/ch/ch_monitor.h index 8ca9e17a9a..f8c3fa75e8 100644 --- a/src/ch/ch_monitor.h +++ b/src/ch/ch_monitor.h @@ -37,6 +37,50 @@ #define URL_VM_RESUME "vm.resume" #define URL_VM_INFO "vm.info" +#define VIRCH_THREAD_NAME_LEN 16 + +typedef enum { + virCHThreadTypeEmulator, + virCHThreadTypeVcpu, + virCHThreadTypeIO, + virCHThreadTypeMax +} virCHThreadType; + +typedef struct _virCHMonitorCPUInfo virCHMonitorCPUInfo; + +struct _virCHMonitorCPUInfo { + int cpuid; + pid_t tid; + + bool online; +}; + +typedef struct _virCHMonitorEmuThreadInfo virCHMonitorEmuThreadInfo; + +struct _virCHMonitorEmuThreadInfo { + char thrName[VIRCH_THREAD_NAME_LEN]; + pid_t tid; +}; + +typedef struct _virCHMonitorIOThreadInfo virCHMonitorIOThreadInfo; + +struct _virCHMonitorIOThreadInfo { + char thrName[VIRCH_THREAD_NAME_LEN]; + pid_t tid; +}; + +typedef struct _virCHMonitorThreadInfo virCHMonitorThreadInfo; + +struct _virCHMonitorThreadInfo { + virCHThreadType type; + + union { + virCHMonitorCPUInfo vcpuInfo; + virCHMonitorEmuThreadInfo emuInfo; + virCHMonitorIOThreadInfo ioInfo; + }; +}; + typedef struct _virCHMonitor virCHMonitor; struct _virCHMonitor { @@ -49,6 +93,9 @@ struct _virCHMonitor { pid_t pid; virDomainObj *vm; + + size_t nthreads; + virCHMonitorThreadInfo *threads; }; virCHMonitor *virCHMonitorNew(virDomainObj *vm, const char *socketdir); @@ -65,12 +112,9 @@ int virCHMonitorSuspendVM(virCHMonitor *mon); int virCHMonitorResumeVM(virCHMonitor *mon); int virCHMonitorGetInfo(virCHMonitor *mon, virJSONValue **info); -typedef struct _virCHMonitorCPUInfo virCHMonitorCPUInfo; -struct _virCHMonitorCPUInfo { - pid_t tid; - bool online; -}; void virCHMonitorCPUInfoFree(virCHMonitorCPUInfo *cpus); int virCHMonitorGetCPUInfo(virCHMonitor *mon, virCHMonitorCPUInfo **vcpus, size_t maxvcpus); +size_t virCHMonitorGetThreadInfo(virCHMonitor *mon, bool refresh, + virCHMonitorThreadInfo **threads); diff --git a/src/ch/ch_process.c b/src/ch/ch_process.c index 65ca03cfaf..56c8d4216d 100644 --- a/src/ch/ch_process.c +++ b/src/ch/ch_process.c @@ -26,6 +26,8 @@ #include "ch_domain.h" #include "ch_monitor.h" #include "ch_process.h" +#include "domain_cgroup.h" +#include "virnuma.h" #include "viralloc.h" #include "virerror.h" #include "virjson.h" @@ -131,6 +133,254 @@ virCHProcessUpdateInfo(virDomainObj *vm) return 0; } +static int +virCHProcessGetAllCpuAffinity(virBitmap **cpumapRet) +{ + *cpumapRet = NULL; + + if (!virHostCPUHasBitmap()) + return 0; + + if (!(*cpumapRet = virHostCPUGetOnlineBitmap())) + return -1; + + return 0; +} + +#if defined(WITH_SCHED_GETAFFINITY) || defined(WITH_BSD_CPU_AFFINITY) +static int +virCHProcessInitCpuAffinity(virDomainObj *vm) +{ + g_autoptr(virBitmap) cpumapToSet = NULL; + virDomainNumatuneMemMode mem_mode; + virCHDomainObjPrivate *priv = vm->privateData; + + if (!vm->pid) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Cannot setup CPU affinity until process is started")); + return -1; + } + + if (virDomainNumaGetNodeCount(vm->def->numa) <= 1 && + virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 && + mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) { + virBitmap *nodeset = NULL; + + if (virDomainNumatuneMaybeGetNodeset(vm->def->numa, + priv->autoNodeset, + &nodeset, -1) < 0) + return -1; + + if (virNumaNodesetToCPUset(nodeset, &cpumapToSet) < 0) + return -1; + } else if (vm->def->cputune.emulatorpin) { + if (!(cpumapToSet = virBitmapNewCopy(vm->def->cputune.emulatorpin))) + return -1; + } else { + if (virCHProcessGetAllCpuAffinity(&cpumapToSet) < 0) + return -1; + } + + if (cpumapToSet && virProcessSetAffinity(vm->pid, cpumapToSet, false) < 0) { + return -1; + } + + return 0; +} +#else /* !defined(WITH_SCHED_GETAFFINITY) && !defined(WITH_BSD_CPU_AFFINITY) */ +static int +virCHProcessInitCpuAffinity(virDomainObj *vm G_GNUC_UNUSED) +{ + return 0; +} +#endif /* !defined(WITH_SCHED_GETAFFINITY) && !defined(WITH_BSD_CPU_AFFINITY) */ + +/** + * virCHProcessSetupPid: + * + * This function sets resource properties (affinity, cgroups, + * scheduler) for any PID associated with a domain. It should be used + * to set up emulator PIDs as well as vCPU and I/O thread pids to + * ensure they are all handled the same way. + * + * Returns 0 on success, -1 on error. + */ +static int +virCHProcessSetupPid(virDomainObj *vm, + pid_t pid, + virCgroupThreadName nameval, + int id, + virBitmap *cpumask, + unsigned long long period, + long long quota, + virDomainThreadSchedParam *sched) +{ + virCHDomainObjPrivate *priv = vm->privateData; + virDomainNumatuneMemMode mem_mode; + virCgroup *cgroup = NULL; + virBitmap *use_cpumask = NULL; + virBitmap *affinity_cpumask = NULL; + g_autoptr(virBitmap) hostcpumap = NULL; + g_autofree char *mem_mask = NULL; + int ret = -1; + + if ((period || quota) && + !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("cgroup cpu is required for scheduler tuning")); + goto cleanup; + } + + /* Infer which cpumask shall be used. */ + if (cpumask) { + use_cpumask = cpumask; + } else if (vm->def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) { + use_cpumask = priv->autoCpuset; + } else if (vm->def->cpumask) { + use_cpumask = vm->def->cpumask; + } else { + /* we can't assume cloud-hypervisor itself is running on all pCPUs, + * so we need to explicitly set the spawned instance to all pCPUs. */ + if (virCHProcessGetAllCpuAffinity(&hostcpumap) < 0) + goto cleanup; + affinity_cpumask = hostcpumap; + } + + /* + * If CPU cgroup controller is not initialized here, then we need + * neither period nor quota settings. And if CPUSET controller is + * not initialized either, then there's nothing to do anyway. + */ + if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU) || + virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) { + + if (virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 && + mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT && + virDomainNumatuneMaybeFormatNodeset(vm->def->numa, + priv->autoNodeset, + &mem_mask, -1) < 0) + goto cleanup; + + if (virCgroupNewThread(priv->cgroup, nameval, id, true, &cgroup) < 0) + goto cleanup; + + if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) { + if (use_cpumask && + virSetupCgroupCpusetCpus(cgroup, use_cpumask) < 0) + goto cleanup; + + if (mem_mask && virCgroupSetCpusetMems(cgroup, mem_mask) < 0) + goto cleanup; + + } + + if ((period || quota) && + virSetupCgroupVcpuBW(cgroup, period, quota) < 0) + goto cleanup; + + /* Move the thread to the sub dir */ + VIR_INFO("Adding pid %d to cgroup", pid); + if (virCgroupAddThread(cgroup, pid) < 0) + goto cleanup; + + } + + if (!affinity_cpumask) + affinity_cpumask = use_cpumask; + + /* Setup legacy affinity. */ + if (affinity_cpumask + && virProcessSetAffinity(pid, affinity_cpumask, false) < 0) + goto cleanup; + + /* Set scheduler type and priority, but not for the main thread. */ + if (sched && + nameval != VIR_CGROUP_THREAD_EMULATOR && + virProcessSetScheduler(pid, sched->policy, sched->priority) < 0) + goto cleanup; + + ret = 0; + cleanup: + if (cgroup) { + if (ret < 0) + virCgroupRemove(cgroup); + virCgroupFree(cgroup); + } + + return ret; +} + +/** + * virCHProcessSetupVcpu: + * @vm: domain object + * @vcpuid: id of VCPU to set defaults + * + * This function sets resource properties (cgroups, affinity, scheduler) for a + * vCPU. This function expects that the vCPU is online and the vCPU pids were + * correctly detected at the point when it's called. + * + * Returns 0 on success, -1 on error. + */ +int +virCHProcessSetupVcpu(virDomainObj *vm, unsigned int vcpuid) +{ + pid_t vcpupid = virCHDomainGetVcpuPid(vm, vcpuid); + virDomainVcpuDef *vcpu = virDomainDefGetVcpu(vm->def, vcpuid); + + return virCHProcessSetupPid(vm, vcpupid, VIR_CGROUP_THREAD_VCPU, + vcpuid, vcpu->cpumask, + vm->def->cputune.period, + vm->def->cputune.quota, &vcpu->sched); +} + +static int +virCHProcessSetupVcpus(virDomainObj *vm) +{ + virDomainVcpuDef *vcpu; + unsigned int maxvcpus = virDomainDefGetVcpusMax(vm->def); + size_t i; + + if ((vm->def->cputune.period || vm->def->cputune.quota) && + !virCgroupHasController(((virCHDomainObjPrivate *) vm->privateData)-> + cgroup, VIR_CGROUP_CONTROLLER_CPU)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("cgroup cpu is required for scheduler tuning")); + return -1; + } + + if (!virCHDomainHasVcpuPids(vm)) { + /* If any CPU has custom affinity that differs from the + * VM default affinity, we must reject it */ + for (i = 0; i < maxvcpus; i++) { + vcpu = virDomainDefGetVcpu(vm->def, i); + + if (!vcpu->online) + continue; + + if (vcpu->cpumask && + !virBitmapEqual(vm->def->cpumask, vcpu->cpumask)) { + virReportError(VIR_ERR_OPERATION_INVALID, "%s", + _("cpu affinity is not supported")); + return -1; + } + } + + return 0; + } + + for (i = 0; i < maxvcpus; i++) { + vcpu = virDomainDefGetVcpu(vm->def, i); + + if (!vcpu->online) + continue; + + if (virCHProcessSetupVcpu(vm, i) < 0) + return -1; + } + + return 0; +} + /** * virCHProcessStart: * @driver: pointer to driver structure @@ -141,12 +391,13 @@ virCHProcessUpdateInfo(virDomainObj *vm) * * Returns 0 on success or -1 in case of error */ -int virCHProcessStart(virCHDriver *driver, - virDomainObj *vm, - virDomainRunningReason reason) +int +virCHProcessStart(virCHDriver *driver, + virDomainObj *vm, virDomainRunningReason reason) { int ret = -1; virCHDomainObjPrivate *priv = vm->privateData; + g_autoptr(virCHDriverConfig) cfg = virCHDriverGetConfig(priv->driver); g_autofree int *nicindexes = NULL; size_t nnicindexes = 0; @@ -166,18 +417,39 @@ int virCHProcessStart(virCHDriver *driver, } } + vm->pid = priv->monitor->pid; + vm->def->id = vm->pid; + priv->machineName = virCHDomainGetMachineName(vm); + + if (virSetupCgroup("ch", vm, + nnicindexes, nicindexes, + priv->cgroup, + cfg->cgroupControllers, + 0, /*maxThreadsPerProc*/ + priv->driver->privileged, + priv->machineName) < 0) + goto cleanup; + + if (virCHProcessInitCpuAffinity(vm) < 0) + goto cleanup; + if (virCHMonitorBootVM(priv->monitor) < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("failed to boot guest VM")); goto cleanup; } - priv->machineName = virCHDomainGetMachineName(vm); - vm->pid = priv->monitor->pid; - vm->def->id = vm->pid; + virCHDomainRefreshThreadInfo(vm); - virCHProcessUpdateInfo(vm); + VIR_DEBUG("Setting global CPU cgroup (if required)"); + if (virSetupGlobalCpuCgroup(vm, priv->cgroup, priv->autoNodeset) < 0) + goto cleanup; + + VIR_DEBUG("Setting vCPU tuning/settings"); + if (virCHProcessSetupVcpus(vm) < 0) + goto cleanup; + virCHProcessUpdateInfo(vm); virDomainObjSetState(vm, VIR_DOMAIN_RUNNING, reason); return 0; @@ -189,10 +461,12 @@ int virCHProcessStart(virCHDriver *driver, return ret; } -int virCHProcessStop(virCHDriver *driver G_GNUC_UNUSED, - virDomainObj *vm, - virDomainShutoffReason reason) +int +virCHProcessStop(virCHDriver *driver G_GNUC_UNUSED, + virDomainObj *vm, virDomainShutoffReason reason) { + int ret; + int retries = 0; virCHDomainObjPrivate *priv = vm->privateData; VIR_DEBUG("Stopping VM name=%s pid=%d reason=%d", @@ -203,6 +477,16 @@ int virCHProcessStop(virCHDriver *driver G_GNUC_UNUSED, priv->monitor = NULL; } + retry: + if ((ret = virRemoveCgroup(vm, priv->cgroup, priv->machineName)) < 0) { + if (ret == -EBUSY && (retries++ < 5)) { + g_usleep(200*1000); + goto retry; + } + VIR_WARN("Failed to remove cgroup for %s", + vm->def->name); + } + vm->pid = -1; vm->def->id = -1; diff --git a/src/ch/ch_process.h b/src/ch/ch_process.h index abc4915979..800e3f4e23 100644 --- a/src/ch/ch_process.h +++ b/src/ch/ch_process.h @@ -29,3 +29,6 @@ int virCHProcessStart(virCHDriver *driver, int virCHProcessStop(virCHDriver *driver, virDomainObj *vm, virDomainShutoffReason reason); + +int virCHProcessSetupVcpu(virDomainObj *vm, + unsigned int vcpuid); -- 2.27.0

From: Vineeth Pillai <viremana@linux.microsoft.com> Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- src/ch/ch_domain.c | 30 +++++++++ src/ch/ch_domain.h | 1 + src/ch/ch_driver.c | 145 ++++++++++++++++++++++++++++++++++++++++++++ src/ch/ch_monitor.c | 2 +- 4 files changed, 177 insertions(+), 1 deletion(-) diff --git a/src/ch/ch_domain.c b/src/ch/ch_domain.c index 42cb4e7dcf..524aa330e1 100644 --- a/src/ch/ch_domain.c +++ b/src/ch/ch_domain.c @@ -20,6 +20,7 @@ #include <config.h> +#include "datatypes.h" #include "ch_domain.h" #include "domain_driver.h" #include "viralloc.h" @@ -420,3 +421,32 @@ virCHDomainGetMachineName(virDomainObj *vm) return ret; } + +/** + * virCHDomainObjFromDomain: + * @domain: Domain pointer that has to be looked up + * + * This function looks up @domain and returns the appropriate virDomainObjPtr + * that has to be released by calling virDomainObjEndAPI(). + * + * Returns the domain object with incremented reference counter which is locked + * on success, NULL otherwise. + */ +virDomainObj * +virCHDomainObjFromDomain(virDomainPtr domain) +{ + virDomainObj *vm; + virCHDriver *driver = domain->conn->privateData; + char uuidstr[VIR_UUID_STRING_BUFLEN]; + + vm = virDomainObjListFindByUUID(driver->domains, domain->uuid); + if (!vm) { + virUUIDFormat(domain->uuid, uuidstr); + virReportError(VIR_ERR_NO_DOMAIN, + _("no domain with matching uuid '%s' (%s)"), + uuidstr, domain->name); + return NULL; + } + + return vm; +} diff --git a/src/ch/ch_domain.h b/src/ch/ch_domain.h index 7cffe2b4fb..b4a245df54 100644 --- a/src/ch/ch_domain.h +++ b/src/ch/ch_domain.h @@ -95,3 +95,4 @@ pid_t virCHDomainGetVcpuPid(virDomainObj *vm, unsigned int vcpuid); bool virCHDomainHasVcpuPids(virDomainObj *vm); char *virCHDomainGetMachineName(virDomainObj *vm); +virDomainObj *virCHDomainObjFromDomain(virDomain *domain); diff --git a/src/ch/ch_driver.c b/src/ch/ch_driver.c index 97fbd76959..0cc08b6c20 100644 --- a/src/ch/ch_driver.c +++ b/src/ch/ch_driver.c @@ -25,6 +25,7 @@ #include "ch_driver.h" #include "ch_monitor.h" #include "ch_process.h" +#include "domain_cgroup.h" #include "datatypes.h" #include "driver.h" #include "viraccessapicheck.h" @@ -1123,6 +1124,148 @@ chDomainGetVcpus(virDomainPtr dom, return ret; } +static int +chDomainPinVcpuLive(virDomainObj *vm, + virDomainDef *def, + int vcpu, + virCHDriver *driver, + virCHDriverConfig *cfg, + virBitmap *cpumap) +{ + g_autoptr(virBitmap) tmpmap = NULL; + g_autoptr(virCgroup) cgroup_vcpu = NULL; + virDomainVcpuDef *vcpuinfo; + virCHDomainObjPrivate *priv = vm->privateData; + + g_autofree char *str = NULL; + + if (!virCHDomainHasVcpuPids(vm)) { + virReportError(VIR_ERR_OPERATION_INVALID, + "%s", _("cpu affinity is not supported")); + return -1; + } + + if (!(vcpuinfo = virDomainDefGetVcpu(def, vcpu))) { + virReportError(VIR_ERR_INVALID_ARG, + _("vcpu %d is out of range of live cpu count %d"), + vcpu, virDomainDefGetVcpusMax(def)); + return -1; + } + + if (!(tmpmap = virBitmapNewCopy(cpumap))) + return -1; + + if (!(str = virBitmapFormat(cpumap))) + return -1; + + if (vcpuinfo->online) { + /* Configure the corresponding cpuset cgroup before set affinity. */ + if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) { + if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_VCPU, vcpu, + false, &cgroup_vcpu) < 0) + return -1; + if (virSetupCgroupCpusetCpus(cgroup_vcpu, cpumap) < 0) + return -1; + } + + if (virProcessSetAffinity(virCHDomainGetVcpuPid(vm, vcpu), cpumap, false) < 0) + return -1; + } + + virBitmapFree(vcpuinfo->cpumask); + vcpuinfo->cpumask = tmpmap; + tmpmap = NULL; + + if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0) + return -1; + + return 0; +} + + +static int +chDomainPinVcpuFlags(virDomainPtr dom, + unsigned int vcpu, + unsigned char *cpumap, + int maplen, + unsigned int flags) +{ + virCHDriver *driver = dom->conn->privateData; + virDomainObj *vm; + virDomainDef *def; + virDomainDef *persistentDef; + int ret = -1; + virBitmap *pcpumap = NULL; + virDomainVcpuDef *vcpuinfo = NULL; + g_autoptr(virCHDriverConfig) cfg = NULL; + + virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | + VIR_DOMAIN_AFFECT_CONFIG, -1); + + cfg = virCHDriverGetConfig(driver); + + if (!(vm = virCHDomainObjFromDomain(dom))) + goto cleanup; + + if (virDomainPinVcpuFlagsEnsureACL(dom->conn, vm->def, flags) < 0) + goto cleanup; + + if (virCHDomainObjBeginJob(vm, CH_JOB_MODIFY) < 0) + goto cleanup; + + if (virDomainObjGetDefs(vm, flags, &def, &persistentDef) < 0) + goto endjob; + + if (persistentDef && + !(vcpuinfo = virDomainDefGetVcpu(persistentDef, vcpu))) { + virReportError(VIR_ERR_INVALID_ARG, + _("vcpu %d is out of range of persistent cpu count %d"), + vcpu, virDomainDefGetVcpus(persistentDef)); + goto endjob; + } + + if (!(pcpumap = virBitmapNewData(cpumap, maplen))) + goto endjob; + + if (virBitmapIsAllClear(pcpumap)) { + virReportError(VIR_ERR_INVALID_ARG, "%s", + _("Empty cpu list for pinning")); + goto endjob; + } + + if (def && + chDomainPinVcpuLive(vm, def, vcpu, driver, cfg, pcpumap) < 0) + goto endjob; + + if (persistentDef) { + virBitmapFree(vcpuinfo->cpumask); + vcpuinfo->cpumask = pcpumap; + pcpumap = NULL; + + goto endjob; + } + + ret = 0; + + endjob: + virCHDomainObjEndJob(vm); + + cleanup: + virDomainObjEndAPI(&vm); + virBitmapFree(pcpumap); + return ret; +} + +static int +chDomainPinVcpu(virDomainPtr dom, + unsigned int vcpu, + unsigned char *cpumap, + int maplen) +{ + return chDomainPinVcpuFlags(dom, vcpu, cpumap, maplen, + VIR_DOMAIN_AFFECT_LIVE); +} + /* Function Tables */ static virHypervisorDriver chHypervisorDriver = { .name = "CH", @@ -1163,6 +1306,8 @@ static virHypervisorDriver chHypervisorDriver = { .domainGetVcpusFlags = chDomainGetVcpusFlags, /* 8.0.0 */ .domainGetMaxVcpus = chDomainGetMaxVcpus, /* 8.0.0 */ .domainGetVcpuPinInfo = chDomainGetVcpuPinInfo, /* 8.0.0 */ + .domainPinVcpu = chDomainPinVcpu, /* 8.0.0 */ + .domainPinVcpuFlags = chDomainPinVcpuFlags, /* 8.0.0 */ .nodeGetCPUMap = chNodeGetCPUMap, /* 8.0.0 */ }; diff --git a/src/ch/ch_monitor.c b/src/ch/ch_monitor.c index f4e0f88817..4de86f3c03 100644 --- a/src/ch/ch_monitor.c +++ b/src/ch/ch_monitor.c @@ -41,7 +41,7 @@ VIR_LOG_INIT("ch.ch_monitor"); static virClass *virCHMonitorClass; static void virCHMonitorDispose(void *obj); -static void virCHMonitorThreadInfoFree(virCHMonitor * mon); +static void virCHMonitorThreadInfoFree(virCHMonitor *mon); static int virCHMonitorOnceInit(void) { -- 2.27.0

From: Vineeth Pillai <viremana@linux.microsoft.com> Enable support of VIR_DRV_FEATURE_TYPED_PARAM_STRING to enable numatune Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- src/ch/ch_driver.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/ch/ch_driver.c b/src/ch/ch_driver.c index 0cc08b6c20..03db6b2bc8 100644 --- a/src/ch/ch_driver.c +++ b/src/ch/ch_driver.c @@ -944,6 +944,36 @@ static int chStateInitialize(bool privileged, return ret; } +/* Which features are supported by this driver? */ +static int +chConnectSupportsFeature(virConnectPtr conn, int feature) +{ + if (virConnectSupportsFeatureEnsureACL(conn) < 0) + return -1; + + switch ((virDrvFeature) feature) { + case VIR_DRV_FEATURE_TYPED_PARAM_STRING: + return 1; + case VIR_DRV_FEATURE_MIGRATION_V2: + case VIR_DRV_FEATURE_MIGRATION_V3: + case VIR_DRV_FEATURE_MIGRATION_P2P: + case VIR_DRV_FEATURE_MIGRATE_CHANGE_PROTECTION: + case VIR_DRV_FEATURE_FD_PASSING: + case VIR_DRV_FEATURE_XML_MIGRATABLE: + case VIR_DRV_FEATURE_MIGRATION_OFFLINE: + case VIR_DRV_FEATURE_MIGRATION_PARAMS: + case VIR_DRV_FEATURE_MIGRATION_DIRECT: + case VIR_DRV_FEATURE_MIGRATION_V1: + case VIR_DRV_FEATURE_PROGRAM_KEEPALIVE: + case VIR_DRV_FEATURE_REMOTE: + case VIR_DRV_FEATURE_REMOTE_CLOSE_CALLBACK: + case VIR_DRV_FEATURE_REMOTE_EVENT_CALLBACK: + case VIR_DRV_FEATURE_NETWORK_UPDATE_HAS_CORRECT_ORDER: + default: + return 0; + } +} + static int chDomainGetVcpusFlags(virDomainPtr dom, unsigned int flags) { @@ -1279,6 +1309,7 @@ static virHypervisorDriver chHypervisorDriver = { .connectListAllDomains = chConnectListAllDomains, /* 7.5.0 */ .connectListDomains = chConnectListDomains, /* 7.5.0 */ .connectGetCapabilities = chConnectGetCapabilities, /* 7.5.0 */ + .connectSupportsFeature = chConnectSupportsFeature, /* 8.0.0 */ .domainCreateXML = chDomainCreateXML, /* 7.5.0 */ .domainCreate = chDomainCreate, /* 7.5.0 */ .domainCreateWithFlags = chDomainCreateWithFlags, /* 7.5.0 */ -- 2.27.0

From: Vineeth Pillai <viremana@linux.microsoft.com> Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- src/ch/ch_driver.c | 273 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 273 insertions(+) diff --git a/src/ch/ch_driver.c b/src/ch/ch_driver.c index 03db6b2bc8..d7008ef011 100644 --- a/src/ch/ch_driver.c +++ b/src/ch/ch_driver.c @@ -43,6 +43,7 @@ #include "viruri.h" #include "virutil.h" #include "viruuid.h" +#include "virnuma.h" #define VIR_FROM_THIS VIR_FROM_CH @@ -1296,6 +1297,276 @@ chDomainPinVcpu(virDomainPtr dom, VIR_DOMAIN_AFFECT_LIVE); } +#define CH_NB_NUMA_PARAM 2 + +static int +chDomainGetNumaParameters(virDomainPtr dom, + virTypedParameterPtr params, + int *nparams, + unsigned int flags) +{ + size_t i; + virDomainObj *vm = NULL; + virDomainNumatuneMemMode tmpmode = VIR_DOMAIN_NUMATUNE_MEM_STRICT; + virCHDomainObjPrivate *priv; + g_autofree char *nodeset = NULL; + int ret = -1; + virDomainDef *def = NULL; + bool live = false; + virBitmap *autoNodeset = NULL; + + virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | + VIR_DOMAIN_AFFECT_CONFIG | + VIR_TYPED_PARAM_STRING_OKAY, -1); + + if (!(vm = virCHDomainObjFromDomain(dom))) + return -1; + priv = vm->privateData; + + if (virDomainGetNumaParametersEnsureACL(dom->conn, vm->def) < 0) + goto cleanup; + + if (!(def = virDomainObjGetOneDefState(vm, flags, &live))) + goto cleanup; + + if (live) + autoNodeset = priv->autoNodeset; + + if ((*nparams) == 0) { + *nparams = CH_NB_NUMA_PARAM; + ret = 0; + goto cleanup; + } + + for (i = 0; i < CH_NB_NUMA_PARAM && i < *nparams; i++) { + virMemoryParameterPtr param = ¶ms[i]; + + switch (i) { + case 0: /* fill numa mode here */ + ignore_value(virDomainNumatuneGetMode(def->numa, -1, &tmpmode)); + + if (virTypedParameterAssign(param, VIR_DOMAIN_NUMA_MODE, + VIR_TYPED_PARAM_INT, tmpmode) < 0) + goto cleanup; + + break; + + case 1: /* fill numa nodeset here */ + nodeset = virDomainNumatuneFormatNodeset(def->numa, autoNodeset, -1); + + if (!nodeset || + virTypedParameterAssign(param, VIR_DOMAIN_NUMA_NODESET, + VIR_TYPED_PARAM_STRING, nodeset) < 0) + goto cleanup; + + nodeset = NULL; + break; + + /* coverity[dead_error_begin] */ + default: + break; + /* should not hit here */ + } + } + + if (*nparams > CH_NB_NUMA_PARAM) + *nparams = CH_NB_NUMA_PARAM; + ret = 0; + + cleanup: + virDomainObjEndAPI(&vm); + return ret; +} + +static int +chDomainSetNumaParamsLive(virDomainObj *vm, + virBitmap *nodeset) +{ + g_autoptr(virCgroup) cgroup_temp = NULL; + virCHDomainObjPrivate *priv = vm->privateData; + g_autofree char *nodeset_str = NULL; + virDomainNumatuneMemMode mode; + size_t i = 0; + + + if (virDomainNumatuneGetMode(vm->def->numa, -1, &mode) == 0 && + mode != VIR_DOMAIN_NUMATUNE_MEM_STRICT) { + virReportError(VIR_ERR_OPERATION_INVALID, "%s", + _("change of nodeset for running domain requires strict numa mode")); + return -1; + } + + if (!virNumaNodesetIsAvailable(nodeset)) + return -1; + + /* Ensure the cpuset string is formatted before passing to cgroup */ + if (!(nodeset_str = virBitmapFormat(nodeset))) + return -1; + + if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_EMULATOR, 0, + false, &cgroup_temp) < 0 || + virCgroupSetCpusetMems(cgroup_temp, nodeset_str) < 0) + return -1; + + + for (i = 0; i < virDomainDefGetVcpusMax(vm->def); i++) { + virDomainVcpuDef *vcpu = virDomainDefGetVcpu(vm->def, i); + + if (!vcpu->online) + continue; + + if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_VCPU, i, + false, &cgroup_temp) < 0 || + virCgroupSetCpusetMems(cgroup_temp, nodeset_str) < 0) + return -1; + + return 0; + } + + for (i = 0; i < vm->def->niothreadids; i++) { + if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_IOTHREAD, + vm->def->iothreadids[i]->iothread_id, + false, &cgroup_temp) < 0 || + virCgroupSetCpusetMems(cgroup_temp, nodeset_str) < 0) + return -1; + } + + /* set nodeset for root cgroup */ + if (virCgroupSetCpusetMems(priv->cgroup, nodeset_str) < 0) + return -1; + + return 0; +} + +static int +chDomainSetNumaParameters(virDomainPtr dom, + virTypedParameterPtr params, + int nparams, + unsigned int flags) +{ + virCHDriver *driver = dom->conn->privateData; + size_t i; + virDomainDef *def; + virDomainDef *persistentDef; + virDomainObj *vm = NULL; + int ret = -1; + g_autoptr(virCHDriverConfig) cfg = NULL; + virCHDomainObjPrivate *priv; + virBitmap *nodeset = NULL; + virDomainNumatuneMemMode config_mode; + int mode = -1; + + virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | + VIR_DOMAIN_AFFECT_CONFIG, -1); + + if (virTypedParamsValidate(params, nparams, + VIR_DOMAIN_NUMA_MODE, + VIR_TYPED_PARAM_INT, + VIR_DOMAIN_NUMA_NODESET, + VIR_TYPED_PARAM_STRING, + NULL) < 0) + return -1; + + if (!(vm = virCHDomainObjFromDomain(dom))) + return -1; + + priv = vm->privateData; + cfg = virCHDriverGetConfig(driver); + + if (virDomainSetNumaParametersEnsureACL(dom->conn, vm->def, flags) < 0) + goto cleanup; + + for (i = 0; i < nparams; i++) { + virTypedParameterPtr param = ¶ms[i]; + + if (STREQ(param->field, VIR_DOMAIN_NUMA_MODE)) { + mode = param->value.i; + + if (mode < 0 || mode >= VIR_DOMAIN_NUMATUNE_MEM_LAST) { + virReportError(VIR_ERR_INVALID_ARG, + _("unsupported numatune mode: '%d'"), mode); + goto cleanup; + } + + } else if (STREQ(param->field, VIR_DOMAIN_NUMA_NODESET)) { + if (virBitmapParse(param->value.s, &nodeset, + VIR_DOMAIN_CPUMASK_LEN) < 0) + goto cleanup; + + if (virBitmapIsAllClear(nodeset)) { + virReportError(VIR_ERR_OPERATION_INVALID, + _("Invalid nodeset of 'numatune': %s"), + param->value.s); + goto cleanup; + } + } + } + + if (virCHDomainObjBeginJob(vm, CH_JOB_MODIFY) < 0) + goto cleanup; + + if (virDomainObjGetDefs(vm, flags, &def, &persistentDef) < 0) + goto endjob; + + if (def) { + if (!driver->privileged) { + virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s", + _("NUMA tuning is not available in session mode")); + goto endjob; + } + + if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) { + virReportError(VIR_ERR_OPERATION_INVALID, "%s", + _("cgroup cpuset controller is not mounted")); + goto endjob; + } + + if (mode != -1 && + virDomainNumatuneGetMode(def->numa, -1, &config_mode) == 0 && + config_mode != mode) { + virReportError(VIR_ERR_OPERATION_INVALID, "%s", + _("can't change numatune mode for running domain")); + goto endjob; + } + + if (nodeset && + chDomainSetNumaParamsLive(vm, nodeset) < 0) + goto endjob; + + if (virDomainNumatuneSet(def->numa, + def->placement_mode == + VIR_DOMAIN_CPU_PLACEMENT_MODE_STATIC, + -1, mode, nodeset) < 0) + goto endjob; + + if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0) + goto endjob; + } + + /* + if (persistentDef) { + if (virDomainNumatuneSet(persistentDef->numa, + persistentDef->placement_mode == + VIR_DOMAIN_CPU_PLACEMENT_MODE_STATIC, + -1, mode, nodeset) < 0) + goto endjob; + + if (virDomainDefSave(persistentDef, driver->xmlopt, cfg->configDir) < 0) + goto endjob; + } + */ + + ret = 0; + + endjob: + virCHDomainObjEndJob(vm); + + cleanup: + virBitmapFree(nodeset); + virDomainObjEndAPI(&vm); + return ret; +} + /* Function Tables */ static virHypervisorDriver chHypervisorDriver = { .name = "CH", @@ -1340,6 +1611,8 @@ static virHypervisorDriver chHypervisorDriver = { .domainPinVcpu = chDomainPinVcpu, /* 8.0.0 */ .domainPinVcpuFlags = chDomainPinVcpuFlags, /* 8.0.0 */ .nodeGetCPUMap = chNodeGetCPUMap, /* 8.0.0 */ + .domainSetNumaParameters = chDomainSetNumaParameters, /* 8.0.0 */ + .domainGetNumaParameters = chDomainGetNumaParameters, /* 8.0.0 */ }; static virConnectDriver chConnectDriver = { -- 2.27.0

using virCHProcessSetupPid Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- src/ch/ch_monitor.c | 60 +++++++++++++++++++++++++++++++++++ src/ch/ch_monitor.h | 2 ++ src/ch/ch_process.c | 77 ++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 138 insertions(+), 1 deletion(-) diff --git a/src/ch/ch_monitor.c b/src/ch/ch_monitor.c index 4de86f3c03..1709c73181 100644 --- a/src/ch/ch_monitor.c +++ b/src/ch/ch_monitor.c @@ -921,3 +921,63 @@ virCHMonitorGetInfo(virCHMonitor *mon, virJSONValue **info) { return virCHMonitorGet(mon, URL_VM_INFO, info); } + +/** + * virCHMonitorGetIOThreads: + * @mon: Pointer to the monitor + * @iothreads: Location to return array of IOThreadInfo data + * + * Retrieve the list of iothreads defined/running for the machine + * + * Returns count of IOThreadInfo structures on success + * -1 on error. + */ +int virCHMonitorGetIOThreads(virCHMonitor *mon, + virDomainIOThreadInfo ***iothreads) +{ + size_t nthreads = 0, niothreads = 0; + int thd_index; + virDomainIOThreadInfo **iothreadinfolist = NULL, *iothreadinfo = NULL; + + *iothreads = NULL; + nthreads = virCHMonitorRefreshThreadInfo(mon); + + iothreadinfolist = g_new0(virDomainIOThreadInfo*, nthreads); + + for (thd_index = 0; thd_index < nthreads; thd_index++) { + virBitmap *map = NULL; + if (mon->threads[thd_index].type == virCHThreadTypeIO) { + iothreadinfo = g_new0(virDomainIOThreadInfo, 1); + + iothreadinfo->iothread_id = mon->threads[thd_index].ioInfo.tid; + + if (!(map = virProcessGetAffinity(iothreadinfo->iothread_id))) + goto cleanup; + + if (virBitmapToData(map, &(iothreadinfo->cpumap), + &(iothreadinfo->cpumaplen)) < 0) { + virBitmapFree(map); + goto cleanup; + } + virBitmapFree(map); + //Append to iothreadinfolist + iothreadinfolist[niothreads] = iothreadinfo; + niothreads++; + } + } + VIR_DELETE_ELEMENT_INPLACE(iothreadinfolist, + niothreads, nthreads); + *iothreads = iothreadinfolist; + VIR_DEBUG("niothreads = %ld", niothreads); + return niothreads; + + cleanup: + if (iothreadinfolist) { + for (thd_index = 0; thd_index < niothreads; thd_index++) + VIR_FREE(iothreadinfolist[thd_index]); + VIR_FREE(iothreadinfolist); + } + if (iothreadinfo) + VIR_FREE(iothreadinfo); + return -1; +} diff --git a/src/ch/ch_monitor.h b/src/ch/ch_monitor.h index f8c3fa75e8..98edb0faf9 100644 --- a/src/ch/ch_monitor.h +++ b/src/ch/ch_monitor.h @@ -118,3 +118,5 @@ int virCHMonitorGetCPUInfo(virCHMonitor *mon, size_t maxvcpus); size_t virCHMonitorGetThreadInfo(virCHMonitor *mon, bool refresh, virCHMonitorThreadInfo **threads); +int virCHMonitorGetIOThreads(virCHMonitor *mon, + virDomainIOThreadInfo ***iothreads); diff --git a/src/ch/ch_process.c b/src/ch/ch_process.c index 56c8d4216d..1af89e0d9e 100644 --- a/src/ch/ch_process.c +++ b/src/ch/ch_process.c @@ -41,7 +41,6 @@ VIR_LOG_INIT("ch.ch_process"); #define START_VM_POSTFIX ": starting up vm\n" - static virCHMonitor * virCHProcessConnectMonitor(virCHDriver *driver, virDomainObj *vm) @@ -310,6 +309,74 @@ virCHProcessSetupPid(virDomainObj *vm, return ret; } +static int +virCHProcessSetupIOThread(virDomainObj *vm, + virDomainIOThreadInfo *iothread) +{ + virCHDomainObjPrivate *priv = vm->privateData; + return virCHProcessSetupPid(vm, iothread->iothread_id, + VIR_CGROUP_THREAD_IOTHREAD, + iothread->iothread_id, + priv->autoCpuset, // This should be updated when CLH supports accepting + // iothread settings from input domain definition + vm->def->cputune.iothread_period, + vm->def->cputune.iothread_quota, + NULL); // CLH doesn't allow choosing a scheduler for iothreads. +} + +static int +virCHProcessSetupIOThreads(virDomainObj *vm) +{ + virCHDomainObjPrivate *priv = vm->privateData; + virDomainIOThreadInfo **iothreads = NULL; + size_t i; + size_t niothreads; + + niothreads = virCHMonitorGetIOThreads(priv->monitor, &iothreads); + for (i = 0; i < niothreads; i++) { + VIR_DEBUG("IOThread index = %ld , tid = %d", i, iothreads[i]->iothread_id); + if (virCHProcessSetupIOThread(vm, iothreads[i]) < 0) + return -1; + } + return 0; +} + + +static int +virCHProcessSetupEmulatorThread(virDomainObj *vm, + virCHMonitorEmuThreadInfo emuthread) +{ + return virCHProcessSetupPid(vm, emuthread.tid, VIR_CGROUP_THREAD_EMULATOR, + 0, vm->def->cputune.emulatorpin, + vm->def->cputune.emulator_period, + vm->def->cputune.emulator_quota, + vm->def->cputune.emulatorsched); +} + +static int +virCHProcessSetupEmulatorThreads(virDomainObj *vm) +{ + int thd_index = 0; + virCHDomainObjPrivate *priv = vm->privateData; + /* + * Cloud-hypervisor start 4 Emulator threads by default: + * vmm + * cloud-hypervisor + * http-server + * signal_handler + */ + for (thd_index = 0; thd_index < priv->monitor->nthreads; thd_index++) { + if (priv->monitor->threads[thd_index].type == virCHThreadTypeEmulator) { + VIR_DEBUG("Setup tid = %d (%s) Emulator thread", priv->monitor->threads[thd_index].emuInfo.tid, + priv->monitor->threads[thd_index].emuInfo.thrName); + + if (virCHProcessSetupEmulatorThread(vm, priv->monitor->threads[thd_index].emuInfo) < 0) + return -1; + } + } + return 0; +} + /** * virCHProcessSetupVcpu: * @vm: domain object @@ -441,6 +508,14 @@ virCHProcessStart(virCHDriver *driver, virCHDomainRefreshThreadInfo(vm); + VIR_DEBUG("Setting emulator tuning/settings"); + if (virCHProcessSetupEmulatorThreads(vm) < 0) + goto cleanup; + + VIR_DEBUG("Setting iothread tuning/settings"); + if (virCHProcessSetupIOThreads(vm) < 0) + goto cleanup; + VIR_DEBUG("Setting global CPU cgroup (if required)"); if (virSetupGlobalCpuCgroup(vm, priv->cgroup, priv->autoNodeset) < 0) goto cleanup; -- 2.27.0

Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- src/ch/ch_driver.c | 154 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) diff --git a/src/ch/ch_driver.c b/src/ch/ch_driver.c index d7008ef011..1f40f6561a 100644 --- a/src/ch/ch_driver.c +++ b/src/ch/ch_driver.c @@ -1297,6 +1297,158 @@ chDomainPinVcpu(virDomainPtr dom, VIR_DOMAIN_AFFECT_LIVE); } + + +static int +chDomainGetEmulatorPinInfo(virDomainPtr dom, + unsigned char *cpumaps, + int maplen, + unsigned int flags) +{ + virDomainObj *vm = NULL; + virDomainDef *def; + virCHDomainObjPrivate *priv; + bool live; + int ret = -1; + virBitmap *cpumask = NULL; + g_autoptr(virBitmap) bitmap = NULL; + virBitmap *autoCpuset = NULL; + + virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | + VIR_DOMAIN_AFFECT_CONFIG, -1); + + if (!(vm = chDomObjFromDomain(dom))) + goto cleanup; + + if (virDomainGetEmulatorPinInfoEnsureACL(dom->conn, vm->def) < 0) + goto cleanup; + + if (!(def = virDomainObjGetOneDefState(vm, flags, &live))) + goto cleanup; + + if (live) { + priv = vm->privateData; + autoCpuset = priv->autoCpuset; + } + if (def->cputune.emulatorpin) { + cpumask = def->cputune.emulatorpin; + } else if (def->cpumask) { + cpumask = def->cpumask; + } else if (vm->def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO && + autoCpuset) { + cpumask = autoCpuset; + } else { + if (!(bitmap = virHostCPUGetAvailableCPUsBitmap())) + goto cleanup; + cpumask = bitmap; + } + + virBitmapToDataBuf(cpumask, cpumaps, maplen); + + ret = 1; + + cleanup: + virDomainObjEndAPI(&vm); + return ret; +} + +static int +chDomainPinEmulator(virDomainPtr dom, + unsigned char *cpumap, + int maplen, + unsigned int flags) +{ + virCHDriver *driver = dom->conn->privateData; + virDomainObj *vm; + virCgroup *cgroup_emulator = NULL; + virDomainDef *def; + virDomainDef *persistentDef; + int ret = -1; + virCHDomainObjPrivate *priv; + virBitmap *pcpumap = NULL; + g_autoptr(virCHDriverConfig) cfg = NULL; + g_autofree char *str = NULL; + virTypedParameterPtr eventParams = NULL; + int eventNparams = 0; + int eventMaxparams = 0; + + virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | + VIR_DOMAIN_AFFECT_CONFIG, -1); + + cfg = virCHDriverGetConfig(driver); + + if (!(vm = chDomObjFromDomain(dom))) + goto cleanup; + + if (virDomainPinEmulatorEnsureACL(dom->conn, vm->def, flags) < 0) + goto cleanup; + + if (virCHDomainObjBeginJob(vm, CH_JOB_MODIFY) < 0) + goto cleanup; + + if (virDomainObjGetDefs(vm, flags, &def, &persistentDef) < 0) + goto endjob; + + priv = vm->privateData; + + if (!(pcpumap = virBitmapNewData(cpumap, maplen))) + goto endjob; + + if (virBitmapIsAllClear(pcpumap)) { + virReportError(VIR_ERR_INVALID_ARG, "%s", + _("Empty cpu list for pinning")); + goto endjob; + } + + if (def) { + if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) { + if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_EMULATOR, + 0, false, &cgroup_emulator) < 0) + goto endjob; + + if (virSetupCgroupCpusetCpus(cgroup_emulator, pcpumap) < 0) { + virReportError(VIR_ERR_OPERATION_INVALID, "%s", + _("failed to set cpuset.cpus in cgroup" + " for emulator threads")); + goto endjob; + } + } + + if (virProcessSetAffinity(vm->pid, pcpumap, false) < 0) + goto endjob; + + virBitmapFree(def->cputune.emulatorpin); + def->cputune.emulatorpin = NULL; + + if (!(def->cputune.emulatorpin = virBitmapNewCopy(pcpumap))) + goto endjob; + + if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0) + goto endjob; + + str = virBitmapFormat(pcpumap); + if (virTypedParamsAddString(&eventParams, &eventNparams, + &eventMaxparams, + VIR_DOMAIN_TUNABLE_CPU_EMULATORPIN, + str) < 0) + goto endjob; + + } + + + ret = 0; + + endjob: + virCHDomainObjEndJob(vm); + + cleanup: + if (cgroup_emulator) + virCgroupFree(cgroup_emulator); + virBitmapFree(pcpumap); + virDomainObjEndAPI(&vm); + return ret; +} + #define CH_NB_NUMA_PARAM 2 static int @@ -1610,6 +1762,8 @@ static virHypervisorDriver chHypervisorDriver = { .domainGetVcpuPinInfo = chDomainGetVcpuPinInfo, /* 8.0.0 */ .domainPinVcpu = chDomainPinVcpu, /* 8.0.0 */ .domainPinVcpuFlags = chDomainPinVcpuFlags, /* 8.0.0 */ + .domainPinEmulator = chDomainPinEmulator, /* 8.0.0 */ + .domainGetEmulatorPinInfo = chDomainGetEmulatorPinInfo, /* 8.0.0 */ .nodeGetCPUMap = chNodeGetCPUMap, /* 8.0.0 */ .domainSetNumaParameters = chDomainSetNumaParameters, /* 8.0.0 */ .domainGetNumaParameters = chDomainGetNumaParameters, /* 8.0.0 */ -- 2.27.0

Folks, I'd like to get this patch set in next release. Bubbling up this thread as it was sent out before the holidays. Regards, Praveen K Paladugu On 12/10/2021 2:34 PM, Praveen K Paladugu wrote:
This patchset adds support for cgroup management of ch threads. This version correctly manages cgroups for vcpu and emulator threads created by ch. cgroup management for iothreads is not yet supported.
Along with cgroup management, this patchset also enables support for pinning vcpu and emulator threads to selected host cpus.
v3: * addrressed all the formatting comments in v2 patch set * dropped indentation patches are they do not adhere to libvirt coding style * fixed build issue in qemu driver that was introduced in v2
Praveen K Paladugu (5): util: Helper functions to get process info ch_driver,ch_domain: vcpu info getter callbacks qemu,hypervisor: refactor some cgroup mgmt methods ch_process: Setup emulator and iothread settings ch_driver: emulator threadinfo & pinning callbacks
Vineeth Pillai (8): ch_domain: add virCHDomainGetMonitor helper method ch_domain: add methods to manage private vcpu data ch_driver: domainGetVcpuPinInfo and nodeGetCPUMap ch_monitor: Get nicindexes in prep for cgroup mgmt ch: methods for cgroup mgmt in ch driver ch_driver,ch_domain: vcpupin callback in ch driver ch_driver: enable typed param string for numatune ch_driver: add numatune callbacks for CH driver
src/ch/ch_conf.c | 2 + src/ch/ch_conf.h | 6 +- src/ch/ch_domain.c | 172 ++++++- src/ch/ch_domain.h | 32 +- src/ch/ch_driver.c | 789 +++++++++++++++++++++++++++++++++ src/ch/ch_monitor.c | 341 +++++++++++--- src/ch/ch_monitor.h | 60 ++- src/ch/ch_process.c | 385 +++++++++++++++- src/ch/ch_process.h | 3 + src/ch/meson.build | 1 + src/hypervisor/domain_cgroup.c | 426 +++++++++++++++++- src/hypervisor/domain_cgroup.h | 52 +++ src/libvirt_private.syms | 15 + src/qemu/qemu_cgroup.c | 410 +---------------- src/qemu/qemu_cgroup.h | 11 - src/qemu/qemu_driver.c | 130 +----- src/qemu/qemu_hotplug.c | 7 +- src/qemu/qemu_process.c | 20 +- src/util/virprocess.c | 108 +++++ src/util/virprocess.h | 5 + 20 files changed, 2357 insertions(+), 618 deletions(-)
participants (2)
-
Michal Prívozník
-
Praveen K Paladugu