[libvirt PATCH 00/13] cgroup and thread management in ch driver.

This patchset adds support for cgroup management of ch threads. This version correctly manages cgroups for vcpu and emulator threads created by ch. cgroup management for iothreads is not yet supported. Along with cgroup management, this patchset also enables support for pinning vcpu and emulator threads to selected host cpus. Praveen K Paladugu (2): ch_process: Setup emulator and iothread settings ch_driver: emulator threadinfo & pinning callbacks Vineeth Pillai (11): util: Helper functions to get process info ch: Explicitly link to virt_util_lib ch_domain: add virCHDomainGetMonitor helper method ch_domain: add methods to manage private vcpu data ch_driver,ch_domain: vcpu info getter callbacks ch_driver: domainGetVcpuPinInfo and nodeGetCPUMap ch_monitor: Get nicindexes in prep for cgroup mgmt ch_cgroup: methods for cgroup mgmt in ch driver ch_driver,ch_domain: vcpupin callback in ch driver ch_driver: enable typed param string for numatune ch_driver: add numatune callbacks for CH driver po/POTFILES.in | 1 + src/ch/ch_cgroup.c | 457 ++++++++++++++++++++++++ src/ch/ch_cgroup.h | 45 +++ src/ch/ch_conf.c | 2 + src/ch/ch_conf.h | 9 +- src/ch/ch_domain.c | 170 ++++++++- src/ch/ch_domain.h | 32 +- src/ch/ch_driver.c | 810 +++++++++++++++++++++++++++++++++++++++++- src/ch/ch_monitor.c | 254 ++++++++++++- src/ch/ch_monitor.h | 60 +++- src/ch/ch_process.c | 368 ++++++++++++++++++- src/ch/ch_process.h | 3 + src/ch/meson.build | 6 + src/util/virprocess.c | 136 +++++++ src/util/virprocess.h | 5 + 15 files changed, 2329 insertions(+), 29 deletions(-) create mode 100644 src/ch/ch_cgroup.c create mode 100644 src/ch/ch_cgroup.h -- 2.27.0

These helper methods are used to capture vcpu information in ch driver. Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- src/util/virprocess.c | 136 ++++++++++++++++++++++++++++++++++++++++++ src/util/virprocess.h | 5 ++ 2 files changed, 141 insertions(+) diff --git a/src/util/virprocess.c b/src/util/virprocess.c index 6de3f36f52..0164d70df6 100644 --- a/src/util/virprocess.c +++ b/src/util/virprocess.c @@ -1721,3 +1721,139 @@ virProcessSetScheduler(pid_t pid G_GNUC_UNUSED, } #endif /* !WITH_SCHED_SETSCHEDULER */ + +/* +TODO: This method was cloned from qemuGetProcessInfo in src/qemu/qemu_driver.c. +Need to refactor qemu driver to use this shared function. +*/ +int +virProcessGetStatInfo(unsigned long long *cpuTime, int *lastCpu, long *vm_rss, + pid_t pid, pid_t tid) +{ + g_autofree char *proc = NULL; + FILE *pidinfo; + unsigned long long usertime = 0, systime = 0; + long rss = 0; + int cpu = 0; + + /* In general, we cannot assume pid_t fits in int; but /proc parsing + * is specific to Linux where int works fine. */ + if (tid) + proc = g_strdup_printf("/proc/%d/task/%d/stat", (int)pid, tid); + else + proc = g_strdup_printf("/proc/%d/stat", (int)pid); + if (!proc) + return -1; + + pidinfo = fopen(proc, "r"); + + /* See 'man proc' for information about what all these fields are. We're + * only interested in a very few of them */ + if (!pidinfo || + fscanf(pidinfo, + /* pid -> stime */ + "%*d (%*[^)]) %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %llu %llu" + /* cutime -> endcode */ + "%*d %*d %*d %*d %*d %*d %*u %*u %ld %*u %*u %*u" + /* startstack -> processor */ + "%*u %*u %*u %*u %*u %*u %*u %*u %*u %*u %*d %d", + &usertime, &systime, &rss, &cpu) != 4) { + VIR_WARN("cannot parse process status data"); + } + + /* We got jiffies + * We want nanoseconds + * _SC_CLK_TCK is jiffies per second + * So calculate thus.... + */ + if (cpuTime) + *cpuTime = 1000ull * 1000ull * 1000ull * (usertime + systime) + / (unsigned long long)sysconf(_SC_CLK_TCK); + if (lastCpu) + *lastCpu = cpu; + + if (vm_rss) + *vm_rss = rss * virGetSystemPageSizeKB(); + + + VIR_DEBUG("Got status for %d/%d user=%llu sys=%llu cpu=%d rss=%ld", + (int)pid, tid, usertime, systime, cpu, rss); + + VIR_FORCE_FCLOSE(pidinfo); + + return 0; +} +/* +TODO: This method was cloned from qemuGetSchedInfo in src/qemu/qemu_driver.c. +Need to refactor qemu driver to use this shared function. +*/ +int virProcessGetSchedInfo(unsigned long long *cpuWait, pid_t pid, pid_t tid) +{ + g_autofree char *proc = NULL; + g_autofree char *data = NULL; + char **lines = NULL; + size_t i; + int ret = -1; + double val; + + *cpuWait = 0; + + /* In general, we cannot assume pid_t fits in int; but /proc parsing + * is specific to Linux where int works fine. */ + if (tid) + proc = g_strdup_printf("/proc/%d/task/%d/sched", (int)pid, (int)tid); + else + proc = g_strdup_printf("/proc/%d/sched", (int)pid); + if (!proc) + goto cleanup; + ret = -1; + + /* The file is not guaranteed to exist (needs CONFIG_SCHED_DEBUG) */ + if (access(proc, R_OK) < 0) { + ret = 0; + goto cleanup; + } + + if (virFileReadAll(proc, (1<<16), &data) < 0) + goto cleanup; + + lines = g_strsplit(data, "\n", 0); + if (!lines) + goto cleanup; + + for (i = 0; lines[i] != NULL; i++) { + const char *line = lines[i]; + + /* Needs CONFIG_SCHEDSTATS. The second check + * is the old name the kernel used in past */ + if (STRPREFIX(line, "se.statistics.wait_sum") || + STRPREFIX(line, "se.wait_sum")) { + line = strchr(line, ':'); + if (!line) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Missing separator in sched info '%s'"), + lines[i]); + goto cleanup; + } + line++; + while (*line == ' ') + line++; + + if (virStrToDouble(line, NULL, &val) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Unable to parse sched info value '%s'"), + line); + goto cleanup; + } + + *cpuWait = (unsigned long long)(val * 1000000); + break; + } + } + + ret = 0; + + cleanup: + g_strfreev(lines); + return ret; +} diff --git a/src/util/virprocess.h b/src/util/virprocess.h index 9910331a0c..3a7c4c2d64 100644 --- a/src/util/virprocess.h +++ b/src/util/virprocess.h @@ -127,3 +127,8 @@ typedef enum { } virProcessNamespaceFlags; int virProcessNamespaceAvailable(unsigned int ns); + +int virProcessGetStatInfo(unsigned long long *cpuTime, int *lastCpu, + long *vm_rss, pid_t pid, pid_t tid); +int virProcessGetSchedInfo(unsigned long long *cpuWait, pid_t pid, + pid_t tid); -- 2.27.0

On 10/21/21 16:31, Vineeth Pillai wrote:
These helper methods are used to capture vcpu information in ch driver.
Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- src/util/virprocess.c | 136 ++++++++++++++++++++++++++++++++++++++++++ src/util/virprocess.h | 5 ++ 2 files changed, 141 insertions(+)
diff --git a/src/util/virprocess.c b/src/util/virprocess.c index 6de3f36f52..0164d70df6 100644 --- a/src/util/virprocess.c +++ b/src/util/virprocess.c @@ -1721,3 +1721,139 @@ virProcessSetScheduler(pid_t pid G_GNUC_UNUSED, }
#endif /* !WITH_SCHED_SETSCHEDULER */ + +/* +TODO: This method was cloned from qemuGetProcessInfo in src/qemu/qemu_driver.c. +Need to refactor qemu driver to use this shared function. +*/
Instead of cloning and leaving a TODO just go ahead and do the change in the qemu_driver.c as well. You don't need to change every instance of qemuGetProcessInfo in QEMU files to use this new helper. Just change qemuGetProcessInfo to use virProcessGetStatInfo: static int qemuGetProcessInfo(unsigned long long *cpuTime, int *lastCpu, long *vm_rss, pid_t pid, int tid) { return virProcessGetStatInfo(cpuTime, lastCpu, vm_rss, pid, tid); } Same thing down below with qemuGetSchedInfo and virProcessGetSchedInfo. Thanks, Daniel
+int +virProcessGetStatInfo(unsigned long long *cpuTime, int *lastCpu, long *vm_rss, + pid_t pid, pid_t tid) +{ + g_autofree char *proc = NULL; + FILE *pidinfo; + unsigned long long usertime = 0, systime = 0; + long rss = 0; + int cpu = 0; + + /* In general, we cannot assume pid_t fits in int; but /proc parsing + * is specific to Linux where int works fine. */ + if (tid) + proc = g_strdup_printf("/proc/%d/task/%d/stat", (int)pid, tid); + else + proc = g_strdup_printf("/proc/%d/stat", (int)pid); + if (!proc) + return -1; + + pidinfo = fopen(proc, "r"); + + /* See 'man proc' for information about what all these fields are. We're + * only interested in a very few of them */ + if (!pidinfo || + fscanf(pidinfo, + /* pid -> stime */ + "%*d (%*[^)]) %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %llu %llu" + /* cutime -> endcode */ + "%*d %*d %*d %*d %*d %*d %*u %*u %ld %*u %*u %*u" + /* startstack -> processor */ + "%*u %*u %*u %*u %*u %*u %*u %*u %*u %*u %*d %d", + &usertime, &systime, &rss, &cpu) != 4) { + VIR_WARN("cannot parse process status data"); + } + + /* We got jiffies + * We want nanoseconds + * _SC_CLK_TCK is jiffies per second + * So calculate thus.... + */ + if (cpuTime) + *cpuTime = 1000ull * 1000ull * 1000ull * (usertime + systime) + / (unsigned long long)sysconf(_SC_CLK_TCK); + if (lastCpu) + *lastCpu = cpu; + + if (vm_rss) + *vm_rss = rss * virGetSystemPageSizeKB(); + + + VIR_DEBUG("Got status for %d/%d user=%llu sys=%llu cpu=%d rss=%ld", + (int)pid, tid, usertime, systime, cpu, rss); + + VIR_FORCE_FCLOSE(pidinfo); + + return 0; +} +/* +TODO: This method was cloned from qemuGetSchedInfo in src/qemu/qemu_driver.c. +Need to refactor qemu driver to use this shared function. +*/ +int virProcessGetSchedInfo(unsigned long long *cpuWait, pid_t pid, pid_t tid) +{ + g_autofree char *proc = NULL; + g_autofree char *data = NULL; + char **lines = NULL; + size_t i; + int ret = -1; + double val; + + *cpuWait = 0; + + /* In general, we cannot assume pid_t fits in int; but /proc parsing + * is specific to Linux where int works fine. */ + if (tid) + proc = g_strdup_printf("/proc/%d/task/%d/sched", (int)pid, (int)tid); + else + proc = g_strdup_printf("/proc/%d/sched", (int)pid); + if (!proc) + goto cleanup; + ret = -1; + + /* The file is not guaranteed to exist (needs CONFIG_SCHED_DEBUG) */ + if (access(proc, R_OK) < 0) { + ret = 0; + goto cleanup; + } + + if (virFileReadAll(proc, (1<<16), &data) < 0) + goto cleanup; + + lines = g_strsplit(data, "\n", 0); + if (!lines) + goto cleanup; + + for (i = 0; lines[i] != NULL; i++) { + const char *line = lines[i]; + + /* Needs CONFIG_SCHEDSTATS. The second check + * is the old name the kernel used in past */ + if (STRPREFIX(line, "se.statistics.wait_sum") || + STRPREFIX(line, "se.wait_sum")) { + line = strchr(line, ':'); + if (!line) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Missing separator in sched info '%s'"), + lines[i]); + goto cleanup; + } + line++; + while (*line == ' ') + line++; + + if (virStrToDouble(line, NULL, &val) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Unable to parse sched info value '%s'"), + line); + goto cleanup; + } + + *cpuWait = (unsigned long long)(val * 1000000); + break; + } + } + + ret = 0; + + cleanup: + g_strfreev(lines); + return ret; +} diff --git a/src/util/virprocess.h b/src/util/virprocess.h index 9910331a0c..3a7c4c2d64 100644 --- a/src/util/virprocess.h +++ b/src/util/virprocess.h @@ -127,3 +127,8 @@ typedef enum { } virProcessNamespaceFlags;
int virProcessNamespaceAvailable(unsigned int ns); + +int virProcessGetStatInfo(unsigned long long *cpuTime, int *lastCpu, + long *vm_rss, pid_t pid, pid_t tid); +int virProcessGetSchedInfo(unsigned long long *cpuWait, pid_t pid, + pid_t tid);

link to virt_util_lib while building ch driver. Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com> --- src/ch/meson.build | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/ch/meson.build b/src/ch/meson.build index e34974d56c..5c6cab2a9f 100644 --- a/src/ch/meson.build +++ b/src/ch/meson.build @@ -30,6 +30,9 @@ if conf.has('WITH_CH') include_directories: [ conf_inc_dir, ], + link_with: [ + virt_util_lib, + ] ) virt_modules += { -- 2.27.0

Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- src/ch/ch_domain.c | 6 ++++++ src/ch/ch_domain.h | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/src/ch/ch_domain.c b/src/ch/ch_domain.c index 9d32d8669a..ae79b47253 100644 --- a/src/ch/ch_domain.c +++ b/src/ch/ch_domain.c @@ -292,3 +292,9 @@ virDomainDefParserConfig virCHDriverDomainDefParserConfig = { .domainPostParseCallback = virCHDomainDefPostParse, .deviceValidateCallback = chValidateDomainDeviceDef, }; + +virCHMonitor * +virCHDomainGetMonitor(virDomainObj *vm) +{ + return CH_DOMAIN_PRIVATE(vm)->monitor; +} diff --git a/src/ch/ch_domain.h b/src/ch/ch_domain.h index 61b34b0467..04d19398b4 100644 --- a/src/ch/ch_domain.h +++ b/src/ch/ch_domain.h @@ -57,6 +57,11 @@ struct _virCHDomainObjPrivate { virChrdevs *chrdevs; }; +#define CH_DOMAIN_PRIVATE(vm) \ + ((virCHDomainObjPrivate *) (vm)->privateData) + +virCHMonitor *virCHDomainGetMonitor(virDomainObj *vm); + extern virDomainXMLPrivateDataCallbacks virCHDriverPrivateDataCallbacks; extern virDomainDefParserConfig virCHDriverDomainDefParserConfig; -- 2.27.0

Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- src/ch/ch_domain.c | 50 +++++++++++++++++++++++++++++++++++++++++----- src/ch/ch_domain.h | 11 ++++++++++ 2 files changed, 56 insertions(+), 5 deletions(-) diff --git a/src/ch/ch_domain.c b/src/ch/ch_domain.c index ae79b47253..fedde4581b 100644 --- a/src/ch/ch_domain.c +++ b/src/ch/ch_domain.c @@ -166,11 +166,6 @@ virCHDomainObjPrivateFree(void *data) g_free(priv); } -virDomainXMLPrivateDataCallbacks virCHDriverPrivateDataCallbacks = { - .alloc = virCHDomainObjPrivateAlloc, - .free = virCHDomainObjPrivateFree, -}; - static int virCHDomainDefPostParseBasic(virDomainDef *def, void *opaque G_GNUC_UNUSED) @@ -187,6 +182,45 @@ virCHDomainDefPostParseBasic(virDomainDef *def, return 0; } +static virClass *virCHDomainVcpuPrivateClass; +static void virCHDomainVcpuPrivateDispose(void *obj); + +static int +virCHDomainVcpuPrivateOnceInit(void) +{ + if (!VIR_CLASS_NEW(virCHDomainVcpuPrivate, virClassForObject())) + return -1; + + return 0; +} + +VIR_ONCE_GLOBAL_INIT(virCHDomainVcpuPrivate); + +static virObject * +virCHDomainVcpuPrivateNew(void) +{ + virCHDomainVcpuPrivate *priv; + + if (virCHDomainVcpuPrivateInitialize() < 0) + return NULL; + + if (!(priv = virObjectNew(virCHDomainVcpuPrivateClass))) + return NULL; + + return (virObject *) priv; +} + + +static void +virCHDomainVcpuPrivateDispose(void *obj) +{ + virCHDomainVcpuPrivate *priv = obj; + + priv->tid = 0; + + return; +} + static int virCHDomainDefPostParse(virDomainDef *def, unsigned int parseFlags G_GNUC_UNUSED, @@ -205,6 +239,12 @@ virCHDomainDefPostParse(virDomainDef *def, return 0; } +virDomainXMLPrivateDataCallbacks virCHDriverPrivateDataCallbacks = { + .alloc = virCHDomainObjPrivateAlloc, + .free = virCHDomainObjPrivateFree, + .vcpuNew = virCHDomainVcpuPrivateNew, +}; + static int chValidateDomainDeviceDef(const virDomainDeviceDef *dev, const virDomainDef *def G_GNUC_UNUSED, diff --git a/src/ch/ch_domain.h b/src/ch/ch_domain.h index 04d19398b4..75b9933130 100644 --- a/src/ch/ch_domain.h +++ b/src/ch/ch_domain.h @@ -62,6 +62,17 @@ struct _virCHDomainObjPrivate { virCHMonitor *virCHDomainGetMonitor(virDomainObj *vm); +typedef struct _virCHDomainVcpuPrivate virCHDomainVcpuPrivate; +struct _virCHDomainVcpuPrivate { + virObject parent; + + pid_t tid; /* vcpu thread id */ + virTristateBool halted; +}; + +#define CH_DOMAIN_VCPU_PRIVATE(vcpu) \ + ((virCHDomainVcpuPrivate *) (vcpu)->privateData) + extern virDomainXMLPrivateDataCallbacks virCHDriverPrivateDataCallbacks; extern virDomainDefParserConfig virCHDriverDomainDefParserConfig; -- 2.27.0

Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- src/ch/ch_domain.c | 25 ++++++++ src/ch/ch_domain.h | 4 ++ src/ch/ch_driver.c | 138 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 167 insertions(+) diff --git a/src/ch/ch_domain.c b/src/ch/ch_domain.c index fedde4581b..c0b0b1005a 100644 --- a/src/ch/ch_domain.c +++ b/src/ch/ch_domain.c @@ -338,3 +338,28 @@ virCHDomainGetMonitor(virDomainObj *vm) { return CH_DOMAIN_PRIVATE(vm)->monitor; } + +pid_t +virCHDomainGetVcpuPid(virDomainObj *vm, + unsigned int vcpuid) +{ + virDomainVcpuDef *vcpu = virDomainDefGetVcpu(vm->def, vcpuid); + return CH_DOMAIN_VCPU_PRIVATE(vcpu)->tid; +} + +bool +virCHDomainHasVcpuPids(virDomainObj *vm) +{ + size_t i; + size_t maxvcpus = virDomainDefGetVcpusMax(vm->def); + virDomainVcpuDef *vcpu; + + for (i = 0; i < maxvcpus; i++) { + vcpu = virDomainDefGetVcpu(vm->def, i); + + if (CH_DOMAIN_VCPU_PRIVATE(vcpu)->tid > 0) + return true; + } + + return false; +} diff --git a/src/ch/ch_domain.h b/src/ch/ch_domain.h index 75b9933130..d9c9d34a19 100644 --- a/src/ch/ch_domain.h +++ b/src/ch/ch_domain.h @@ -82,3 +82,7 @@ virCHDomainObjBeginJob(virDomainObj *obj, enum virCHDomainJob job) void virCHDomainObjEndJob(virDomainObj *obj); + +int virCHDomainRefreshVcpuInfo(virDomainObj *vm); +pid_t virCHDomainGetVcpuPid(virDomainObj *vm, unsigned int vcpuid); +bool virCHDomainHasVcpuPids(virDomainObj *vm); diff --git a/src/ch/ch_driver.c b/src/ch/ch_driver.c index 1824d2fd16..8ea5ce393d 100644 --- a/src/ch/ch_driver.c +++ b/src/ch/ch_driver.c @@ -952,6 +952,141 @@ static int chStateInitialize(bool privileged, return ret; } +static int +chDomainGetVcpusFlags(virDomainPtr dom, unsigned int flags) +{ + virDomainObj *vm; + virDomainDef *def; + int ret = -1; + + virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | + VIR_DOMAIN_AFFECT_CONFIG | + VIR_DOMAIN_VCPU_MAXIMUM | + VIR_DOMAIN_VCPU_GUEST, -1); + + if (!(vm = chDomObjFromDomain(dom))) + return -1; + + if (virDomainGetVcpusFlagsEnsureACL(dom->conn, vm->def, flags) < 0) + goto cleanup; + + if (!(def = virDomainObjGetOneDef(vm, flags))) + goto cleanup; + + if (flags & VIR_DOMAIN_VCPU_MAXIMUM) + ret = virDomainDefGetVcpusMax(def); + else + ret = virDomainDefGetVcpus(def); + + + cleanup: + virDomainObjEndAPI(&vm); + return ret; +} + +static int +chDomainGetMaxVcpus(virDomainPtr dom) +{ + return chDomainGetVcpusFlags(dom, (VIR_DOMAIN_AFFECT_LIVE | VIR_DOMAIN_VCPU_MAXIMUM)); +} + +static int +chDomainHelperGetVcpus(virDomainObj *vm, + virVcpuInfoPtr info, + unsigned long long *cpuwait, + int maxinfo, + unsigned char *cpumaps, + int maplen) +{ + size_t ncpuinfo = 0; + size_t i; + + if (maxinfo == 0) + return 0; + + if (!virCHDomainHasVcpuPids(vm)) { + virReportError(VIR_ERR_OPERATION_INVALID, + "%s", _("cpu affinity is not supported")); + return -1; + } + + if (info) + memset(info, 0, sizeof(*info) * maxinfo); + + if (cpumaps) + memset(cpumaps, 0, sizeof(*cpumaps) * maxinfo); + + for (i = 0; i < virDomainDefGetVcpusMax(vm->def) && ncpuinfo < maxinfo; i++) { + virDomainVcpuDef *vcpu = virDomainDefGetVcpu(vm->def, i); + pid_t vcpupid = virCHDomainGetVcpuPid(vm, i); + virVcpuInfoPtr vcpuinfo = info + ncpuinfo; + + if (!vcpu->online) + continue; + + if (info) { + vcpuinfo->number = i; + vcpuinfo->state = VIR_VCPU_RUNNING; + if (virProcessGetStatInfo(&vcpuinfo->cpuTime, + &vcpuinfo->cpu, NULL, + vm->pid, vcpupid) < 0) { + virReportSystemError(errno, "%s", + _("cannot get vCPU placement & pCPU time")); + return -1; + } + } + + if (cpumaps) { + unsigned char *cpumap = VIR_GET_CPUMAP(cpumaps, maplen, ncpuinfo); + virBitmap *map = NULL; + + if (!(map = virProcessGetAffinity(vcpupid))) + return -1; + + virBitmapToDataBuf(map, cpumap, maplen); + virBitmapFree(map); + } + + if (cpuwait) { + if (virProcessGetSchedInfo(&(cpuwait[ncpuinfo]), vm->pid, vcpupid) < 0) + return -1; + } + + ncpuinfo++; + } + + return ncpuinfo; +} + +static int +chDomainGetVcpus(virDomainPtr dom, + virVcpuInfoPtr info, + int maxinfo, + unsigned char *cpumaps, + int maplen) +{ + virDomainObj *vm; + int ret = -1; + + if (!(vm = chDomObjFromDomain(dom))) + goto cleanup; + + if (virDomainGetVcpusEnsureACL(dom->conn, vm->def) < 0) + goto cleanup; + + if (virDomainObjCheckActive(vm) < 0) { + virReportError(VIR_ERR_OPERATION_INVALID, "%s", + _("cannot retrieve vcpu information for inactive domain")); + goto cleanup; + } + + ret = chDomainHelperGetVcpus(vm, info, NULL, maxinfo, cpumaps, maplen); + + cleanup: + virDomainObjEndAPI(&vm); + return ret; +} + /* Function Tables */ static virHypervisorDriver chHypervisorDriver = { .name = "CH", @@ -988,6 +1123,9 @@ static virHypervisorDriver chHypervisorDriver = { .domainIsActive = chDomainIsActive, /* 7.5.0 */ .domainOpenConsole = chDomainOpenConsole, /* 7.8.0 */ .nodeGetInfo = chNodeGetInfo, /* 7.5.0 */ + .domainGetVcpus = chDomainGetVcpus, /* 7.9.0 */ + .domainGetVcpusFlags = chDomainGetVcpusFlags, /* 7.9.0 */ + .domainGetMaxVcpus = chDomainGetMaxVcpus, /* 7.9.0 */ }; static virConnectDriver chConnectDriver = { -- 2.27.0

On 10/21/21 16:31, Vineeth Pillai wrote:
Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- src/ch/ch_domain.c | 25 ++++++++ src/ch/ch_domain.h | 4 ++ src/ch/ch_driver.c | 138 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 167 insertions(+)
diff --git a/src/ch/ch_domain.c b/src/ch/ch_domain.c index fedde4581b..c0b0b1005a 100644 --- a/src/ch/ch_domain.c +++ b/src/ch/ch_domain.c @@ -338,3 +338,28 @@ virCHDomainGetMonitor(virDomainObj *vm) { return CH_DOMAIN_PRIVATE(vm)->monitor; } + +pid_t +virCHDomainGetVcpuPid(virDomainObj *vm, + unsigned int vcpuid) +{ + virDomainVcpuDef *vcpu = virDomainDefGetVcpu(vm->def, vcpuid); + return CH_DOMAIN_VCPU_PRIVATE(vcpu)->tid; +} + +bool +virCHDomainHasVcpuPids(virDomainObj *vm) +{ + size_t i; + size_t maxvcpus = virDomainDefGetVcpusMax(vm->def); + virDomainVcpuDef *vcpu; + + for (i = 0; i < maxvcpus; i++) { + vcpu = virDomainDefGetVcpu(vm->def, i); + + if (CH_DOMAIN_VCPU_PRIVATE(vcpu)->tid > 0) + return true; + } + + return false; +} diff --git a/src/ch/ch_domain.h b/src/ch/ch_domain.h index 75b9933130..d9c9d34a19 100644 --- a/src/ch/ch_domain.h +++ b/src/ch/ch_domain.h @@ -82,3 +82,7 @@ virCHDomainObjBeginJob(virDomainObj *obj, enum virCHDomainJob job)
void virCHDomainObjEndJob(virDomainObj *obj); + +int virCHDomainRefreshVcpuInfo(virDomainObj *vm); +pid_t virCHDomainGetVcpuPid(virDomainObj *vm, unsigned int vcpuid); +bool virCHDomainHasVcpuPids(virDomainObj *vm); diff --git a/src/ch/ch_driver.c b/src/ch/ch_driver.c index 1824d2fd16..8ea5ce393d 100644 --- a/src/ch/ch_driver.c +++ b/src/ch/ch_driver.c @@ -952,6 +952,141 @@ static int chStateInitialize(bool privileged, return ret; }
+static int +chDomainGetVcpusFlags(virDomainPtr dom, unsigned int flags) +{ + virDomainObj *vm; + virDomainDef *def; + int ret = -1; + + virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | + VIR_DOMAIN_AFFECT_CONFIG | + VIR_DOMAIN_VCPU_MAXIMUM | + VIR_DOMAIN_VCPU_GUEST, -1); + + if (!(vm = chDomObjFromDomain(dom))) + return -1; + + if (virDomainGetVcpusFlagsEnsureACL(dom->conn, vm->def, flags) < 0) + goto cleanup; + + if (!(def = virDomainObjGetOneDef(vm, flags))) + goto cleanup; + + if (flags & VIR_DOMAIN_VCPU_MAXIMUM) + ret = virDomainDefGetVcpusMax(def); + else + ret = virDomainDefGetVcpus(def); + + + cleanup:
Label indentation in this case is one space only, without changing the indentation level of the code that follows it. See docs/coding-style.rst for an example. chStateInitialize() in ch_driver.c has another example as well: ------- cleanup: if (ret != VIR_DRV_STATE_INIT_COMPLETE) chStateCleanup(); return ret; } -------
+ virDomainObjEndAPI(&vm); + return ret; +} + +static int +chDomainGetMaxVcpus(virDomainPtr dom) +{ + return chDomainGetVcpusFlags(dom, (VIR_DOMAIN_AFFECT_LIVE | VIR_DOMAIN_VCPU_MAXIMUM)); +} + +static int +chDomainHelperGetVcpus(virDomainObj *vm, + virVcpuInfoPtr info, + unsigned long long *cpuwait, + int maxinfo, + unsigned char *cpumaps, + int maplen) +{ + size_t ncpuinfo = 0; + size_t i; + + if (maxinfo == 0) + return 0; + + if (!virCHDomainHasVcpuPids(vm)) { + virReportError(VIR_ERR_OPERATION_INVALID, + "%s", _("cpu affinity is not supported")); + return -1; + } + + if (info) + memset(info, 0, sizeof(*info) * maxinfo); + + if (cpumaps) + memset(cpumaps, 0, sizeof(*cpumaps) * maxinfo); + + for (i = 0; i < virDomainDefGetVcpusMax(vm->def) && ncpuinfo < maxinfo; i++) { + virDomainVcpuDef *vcpu = virDomainDefGetVcpu(vm->def, i); + pid_t vcpupid = virCHDomainGetVcpuPid(vm, i); + virVcpuInfoPtr vcpuinfo = info + ncpuinfo; + + if (!vcpu->online) + continue; + + if (info) { + vcpuinfo->number = i; + vcpuinfo->state = VIR_VCPU_RUNNING; + if (virProcessGetStatInfo(&vcpuinfo->cpuTime, + &vcpuinfo->cpu, NULL, + vm->pid, vcpupid) < 0) { + virReportSystemError(errno, "%s", + _("cannot get vCPU placement & pCPU time")); + return -1; + } + } + + if (cpumaps) { + unsigned char *cpumap = VIR_GET_CPUMAP(cpumaps, maplen, ncpuinfo); + virBitmap *map = NULL; + + if (!(map = virProcessGetAffinity(vcpupid))) + return -1; + + virBitmapToDataBuf(map, cpumap, maplen); + virBitmapFree(map); + } + + if (cpuwait) { + if (virProcessGetSchedInfo(&(cpuwait[ncpuinfo]), vm->pid, vcpupid) < 0) + return -1; + } + + ncpuinfo++; + } + + return ncpuinfo; +} + +static int +chDomainGetVcpus(virDomainPtr dom, + virVcpuInfoPtr info, + int maxinfo, + unsigned char *cpumaps, + int maplen) +{ + virDomainObj *vm; + int ret = -1; + + if (!(vm = chDomObjFromDomain(dom))) + goto cleanup; + + if (virDomainGetVcpusEnsureACL(dom->conn, vm->def) < 0) + goto cleanup; + + if (virDomainObjCheckActive(vm) < 0) { + virReportError(VIR_ERR_OPERATION_INVALID, "%s", + _("cannot retrieve vcpu information for inactive domain")); + goto cleanup; + } + + ret = chDomainHelperGetVcpus(vm, info, NULL, maxinfo, cpumaps, maplen); + + cleanup:
Strange label indentation here as well. Thanks, Daniel
+ virDomainObjEndAPI(&vm); + return ret; +} + /* Function Tables */ static virHypervisorDriver chHypervisorDriver = { .name = "CH", @@ -988,6 +1123,9 @@ static virHypervisorDriver chHypervisorDriver = { .domainIsActive = chDomainIsActive, /* 7.5.0 */ .domainOpenConsole = chDomainOpenConsole, /* 7.8.0 */ .nodeGetInfo = chNodeGetInfo, /* 7.5.0 */ + .domainGetVcpus = chDomainGetVcpus, /* 7.9.0 */ + .domainGetVcpusFlags = chDomainGetVcpusFlags, /* 7.9.0 */ + .domainGetMaxVcpus = chDomainGetMaxVcpus, /* 7.9.0 */ };
static virConnectDriver chConnectDriver = {

Add domainGetVcpuPinInfo and nodeGetCPUMap callbacks to ch driver Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- src/ch/ch_domain.h | 12 +++++++++-- src/ch/ch_driver.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 2 deletions(-) diff --git a/src/ch/ch_domain.h b/src/ch/ch_domain.h index d9c9d34a19..e35777a9ec 100644 --- a/src/ch/ch_domain.h +++ b/src/ch/ch_domain.h @@ -23,6 +23,7 @@ #include "ch_conf.h" #include "ch_monitor.h" #include "virchrdev.h" +#include "vircgroup.h" /* Give up waiting for mutex after 30 seconds */ #define CH_JOB_WAIT_TIME (1000ull * 30) @@ -52,9 +53,16 @@ typedef struct _virCHDomainObjPrivate virCHDomainObjPrivate; struct _virCHDomainObjPrivate { struct virCHDomainJobObj job; - virCHMonitor *monitor; + virChrdevs *chrdevs; + + virCgroup *cgroup; - virChrdevs *chrdevs; + virCHDriver *driver; + virCHMonitor *monitor; + char *machineName; + virBitmap *autoNodeset; + virBitmap *autoCpuset; + virChrdevs *devs; }; #define CH_DOMAIN_PRIVATE(vm) \ diff --git a/src/ch/ch_driver.c b/src/ch/ch_driver.c index 8ea5ce393d..62ca6c1994 100644 --- a/src/ch/ch_driver.c +++ b/src/ch/ch_driver.c @@ -990,6 +990,58 @@ chDomainGetMaxVcpus(virDomainPtr dom) return chDomainGetVcpusFlags(dom, (VIR_DOMAIN_AFFECT_LIVE | VIR_DOMAIN_VCPU_MAXIMUM)); } +static int +chDomainGetVcpuPinInfo(virDomain *dom, + int ncpumaps, + unsigned char *cpumaps, + int maplen, + unsigned int flags) +{ + virDomainObj *vm = NULL; + virDomainDef *def; + bool live; + int ret = -1; + g_autoptr(virBitmap) hostcpus = NULL; + virBitmap *autoCpuset = NULL; + + virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | + VIR_DOMAIN_AFFECT_CONFIG, -1); + + if (!(vm = chDomObjFromDomain(dom))) + goto cleanup; + + if (virDomainGetVcpuPinInfoEnsureACL(dom->conn, vm->def) < 0) + goto cleanup; + + if (!(def = virDomainObjGetOneDefState(vm, flags, &live))) + goto cleanup; + + if (!(hostcpus = virHostCPUGetAvailableCPUsBitmap())) + goto cleanup; + + if (live) + autoCpuset = CH_DOMAIN_PRIVATE(vm)->autoCpuset; + + ret = virDomainDefGetVcpuPinInfoHelper(def, maplen, ncpumaps, cpumaps, + hostcpus, autoCpuset); + cleanup: + virDomainObjEndAPI(&vm); + return ret; +} + +static int +chNodeGetCPUMap(virConnectPtr conn, + unsigned char **cpumap, + unsigned int *online, + unsigned int flags) +{ + if (virNodeGetCPUMapEnsureACL(conn) < 0) + return -1; + + return virHostCPUGetMap(cpumap, online, flags); +} + + static int chDomainHelperGetVcpus(virDomainObj *vm, virVcpuInfoPtr info, @@ -1126,6 +1178,8 @@ static virHypervisorDriver chHypervisorDriver = { .domainGetVcpus = chDomainGetVcpus, /* 7.9.0 */ .domainGetVcpusFlags = chDomainGetVcpusFlags, /* 7.9.0 */ .domainGetMaxVcpus = chDomainGetMaxVcpus, /* 7.9.0 */ + .domainGetVcpuPinInfo = chDomainGetVcpuPinInfo, /* 7.9.0 */ + .nodeGetCPUMap = chNodeGetCPUMap, /* 7.9.0 */ }; static virConnectDriver chConnectDriver = { -- 2.27.0

On 10/21/21 16:31, Vineeth Pillai wrote:
Add domainGetVcpuPinInfo and nodeGetCPUMap callbacks to ch driver
Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- src/ch/ch_domain.h | 12 +++++++++-- src/ch/ch_driver.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 2 deletions(-)
diff --git a/src/ch/ch_domain.h b/src/ch/ch_domain.h index d9c9d34a19..e35777a9ec 100644 --- a/src/ch/ch_domain.h +++ b/src/ch/ch_domain.h @@ -23,6 +23,7 @@ #include "ch_conf.h" #include "ch_monitor.h" #include "virchrdev.h" +#include "vircgroup.h"
/* Give up waiting for mutex after 30 seconds */ #define CH_JOB_WAIT_TIME (1000ull * 30) @@ -52,9 +53,16 @@ typedef struct _virCHDomainObjPrivate virCHDomainObjPrivate; struct _virCHDomainObjPrivate { struct virCHDomainJobObj job;
- virCHMonitor *monitor; + virChrdevs *chrdevs; + + virCgroup *cgroup;
- virChrdevs *chrdevs; + virCHDriver *driver; + virCHMonitor *monitor; + char *machineName; + virBitmap *autoNodeset; + virBitmap *autoCpuset; + virChrdevs *devs; };
#define CH_DOMAIN_PRIVATE(vm) \ diff --git a/src/ch/ch_driver.c b/src/ch/ch_driver.c index 8ea5ce393d..62ca6c1994 100644 --- a/src/ch/ch_driver.c +++ b/src/ch/ch_driver.c @@ -990,6 +990,58 @@ chDomainGetMaxVcpus(virDomainPtr dom) return chDomainGetVcpusFlags(dom, (VIR_DOMAIN_AFFECT_LIVE | VIR_DOMAIN_VCPU_MAXIMUM)); }
+static int +chDomainGetVcpuPinInfo(virDomain *dom, + int ncpumaps, + unsigned char *cpumaps, + int maplen, + unsigned int flags) +{ + virDomainObj *vm = NULL; + virDomainDef *def; + bool live; + int ret = -1; + g_autoptr(virBitmap) hostcpus = NULL; + virBitmap *autoCpuset = NULL; + + virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | + VIR_DOMAIN_AFFECT_CONFIG, -1); + + if (!(vm = chDomObjFromDomain(dom))) + goto cleanup; + + if (virDomainGetVcpuPinInfoEnsureACL(dom->conn, vm->def) < 0) + goto cleanup; + + if (!(def = virDomainObjGetOneDefState(vm, flags, &live))) + goto cleanup; + + if (!(hostcpus = virHostCPUGetAvailableCPUsBitmap())) + goto cleanup; + + if (live) + autoCpuset = CH_DOMAIN_PRIVATE(vm)->autoCpuset; + + ret = virDomainDefGetVcpuPinInfoHelper(def, maplen, ncpumaps, cpumaps, + hostcpus, autoCpuset); + cleanup:
Wrong label indentation. LGTM otherwise. Daniel
+ virDomainObjEndAPI(&vm); + return ret; +} + +static int +chNodeGetCPUMap(virConnectPtr conn, + unsigned char **cpumap, + unsigned int *online, + unsigned int flags) +{ + if (virNodeGetCPUMapEnsureACL(conn) < 0) + return -1; + + return virHostCPUGetMap(cpumap, online, flags); +} + + static int chDomainHelperGetVcpus(virDomainObj *vm, virVcpuInfoPtr info, @@ -1126,6 +1178,8 @@ static virHypervisorDriver chHypervisorDriver = { .domainGetVcpus = chDomainGetVcpus, /* 7.9.0 */ .domainGetVcpusFlags = chDomainGetVcpusFlags, /* 7.9.0 */ .domainGetMaxVcpus = chDomainGetMaxVcpus, /* 7.9.0 */ + .domainGetVcpuPinInfo = chDomainGetVcpuPinInfo, /* 7.9.0 */ + .nodeGetCPUMap = chNodeGetCPUMap, /* 7.9.0 */ };
static virConnectDriver chConnectDriver = {

Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com> Signed-off-by: Praveen Paladugu <prapal@linux.microsoft.com> --- src/ch/ch_conf.h | 5 +++ src/ch/ch_domain.c | 26 +++++++++++++- src/ch/ch_domain.h | 4 +-- src/ch/ch_driver.c | 4 ++- src/ch/ch_monitor.c | 85 +++++++++++++++++++++++++++++++++++++++++---- src/ch/ch_monitor.h | 14 +++++++- src/ch/ch_process.c | 6 +++- src/ch/meson.build | 1 + 8 files changed, 132 insertions(+), 13 deletions(-) diff --git a/src/ch/ch_conf.h b/src/ch/ch_conf.h index 37c36d9a09..49f286f97a 100644 --- a/src/ch/ch_conf.h +++ b/src/ch/ch_conf.h @@ -44,6 +44,11 @@ struct _virCHDriver { virMutex lock; + bool privileged; + + /* Embedded Mode: Not yet supported */ + char *embeddedRoot; + /* Require lock to get a reference on the object, * lockless access thereafter */ virCaps *caps; diff --git a/src/ch/ch_domain.c b/src/ch/ch_domain.c index c0b0b1005a..e1030800aa 100644 --- a/src/ch/ch_domain.c +++ b/src/ch/ch_domain.c @@ -21,10 +21,12 @@ #include <config.h> #include "ch_domain.h" +#include "domain_driver.h" #include "viralloc.h" #include "virchrdev.h" #include "virlog.h" #include "virtime.h" +#include "virsystemd.h" #define VIR_FROM_THIS VIR_FROM_CH @@ -136,7 +138,7 @@ virCHDomainObjEndJob(virDomainObj *obj) } static void * -virCHDomainObjPrivateAlloc(void *opaque G_GNUC_UNUSED) +virCHDomainObjPrivateAlloc(void *opaque) { virCHDomainObjPrivate *priv; @@ -152,6 +154,7 @@ virCHDomainObjPrivateAlloc(void *opaque G_GNUC_UNUSED) g_free(priv); return NULL; } + priv->driver = opaque; return priv; } @@ -363,3 +366,24 @@ virCHDomainHasVcpuPids(virDomainObj *vm) return false; } + +char *virCHDomainGetMachineName(virDomainObj *vm) +{ + virCHDomainObjPrivate *priv = CH_DOMAIN_PRIVATE(vm); + virCHDriver *driver = priv->driver; + char *ret = NULL; + + if (vm->pid > 0) { + ret = virSystemdGetMachineNameByPID(vm->pid); + if (!ret) + virResetLastError(); + } + + if (!ret) + ret = virDomainDriverGenerateMachineName("ch", + driver->embeddedRoot, + vm->def->id, vm->def->name, + driver->privileged); + + return ret; +} diff --git a/src/ch/ch_domain.h b/src/ch/ch_domain.h index e35777a9ec..3ac3421015 100644 --- a/src/ch/ch_domain.h +++ b/src/ch/ch_domain.h @@ -54,9 +54,7 @@ struct _virCHDomainObjPrivate { struct virCHDomainJobObj job; virChrdevs *chrdevs; - virCgroup *cgroup; - virCHDriver *driver; virCHMonitor *monitor; char *machineName; @@ -94,3 +92,5 @@ virCHDomainObjEndJob(virDomainObj *obj); int virCHDomainRefreshVcpuInfo(virDomainObj *vm); pid_t virCHDomainGetVcpuPid(virDomainObj *vm, unsigned int vcpuid); bool virCHDomainHasVcpuPids(virDomainObj *vm); + +char *virCHDomainGetMachineName(virDomainObj *vm); diff --git a/src/ch/ch_driver.c b/src/ch/ch_driver.c index 62ca6c1994..ca854da123 100644 --- a/src/ch/ch_driver.c +++ b/src/ch/ch_driver.c @@ -944,7 +944,9 @@ static int chStateInitialize(bool privileged, goto cleanup; } - ret = VIR_DRV_STATE_INIT_COMPLETE; + ch_driver->privileged = privileged; + + return VIR_DRV_STATE_INIT_COMPLETE; cleanup: if (ret != VIR_DRV_STATE_INIT_COMPLETE) diff --git a/src/ch/ch_monitor.c b/src/ch/ch_monitor.c index 691d1ce64b..c0ae031200 100644 --- a/src/ch/ch_monitor.c +++ b/src/ch/ch_monitor.c @@ -228,7 +228,8 @@ virCHMonitorBuildDisksJson(virJSONValue *content, virDomainDef *vmdef) } static int -virCHMonitorBuildNetJson(virJSONValue *nets, virDomainNetDef *netdef) +virCHMonitorBuildNetJson(virJSONValue *nets, virDomainNetDef *netdef, + size_t *nnicindexes, int **nicindexes) { virDomainNetType netType = virDomainNetGetActualType(netdef); char macaddr[VIR_MAC_STRING_BUFLEN]; @@ -263,6 +264,18 @@ virCHMonitorBuildNetJson(virJSONValue *nets, virDomainNetDef *netdef) virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", _("ethernet type supports a single guest ip")); } + + /* network and bridge use a tap device, and direct uses a + * macvtap device + */ + if (nicindexes && nnicindexes && netdef->ifname) { + int nicindex = 0; + if (virNetDevGetIndex(netdef->ifname, &nicindex) < 0) + return -1; + else + VIR_APPEND_ELEMENT(*nicindexes, *nnicindexes, nicindex); + } + break; case VIR_DOMAIN_NET_TYPE_VHOSTUSER: if ((virDomainChrType)netdef->data.vhostuser->type != VIR_DOMAIN_CHR_TYPE_UNIX) { @@ -331,7 +344,8 @@ virCHMonitorBuildNetJson(virJSONValue *nets, virDomainNetDef *netdef) } static int -virCHMonitorBuildNetsJson(virJSONValue *content, virDomainDef *vmdef) +virCHMonitorBuildNetsJson(virJSONValue *content, virDomainDef *vmdef, + size_t *nnicindexes, int **nicindexes) { g_autoptr(virJSONValue) nets = NULL; size_t i; @@ -340,7 +354,8 @@ virCHMonitorBuildNetsJson(virJSONValue *content, virDomainDef *vmdef) nets = virJSONValueNewArray(); for (i = 0; i < vmdef->nnets; i++) { - if (virCHMonitorBuildNetJson(nets, vmdef->nets[i]) < 0) + if (virCHMonitorBuildNetJson(nets, vmdef->nets[i], + nnicindexes, nicindexes) < 0) return -1; } if (virJSONValueObjectAppend(content, "net", &nets) < 0) @@ -351,7 +366,57 @@ virCHMonitorBuildNetsJson(virJSONValue *content, virDomainDef *vmdef) } static int -virCHMonitorBuildVMJson(virDomainDef *vmdef, char **jsonstr) +virCHMonitorBuildDeviceJson(virJSONValue *devices, virDomainHostdevDef *hostdevdef) +{ + g_autoptr(virJSONValue) device = NULL; + + + if (hostdevdef->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS && + hostdevdef->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) { + g_autofree char *name = NULL; + g_autofree char *path = NULL; + virDomainHostdevSubsysPCI *pcisrc = &hostdevdef->source.subsys.u.pci; + device = virJSONValueNewObject(); + name = g_strdup_printf(VIR_PCI_DEVICE_ADDRESS_FMT, pcisrc->addr.domain, + pcisrc->addr.bus, pcisrc->addr.slot, + pcisrc->addr.function); + path = g_strdup_printf("/sys/bus/pci/devices/%s/", name); + if (!virFileExists(path)) { + virReportError(VIR_ERR_DEVICE_MISSING, + _("host pci device %s not found"), path); + return -1; + } + if (virJSONValueObjectAppendString(device, "path", path) < 0) + return -1; + if (virJSONValueArrayAppend(devices, &device) < 0) + return -1; + } + + return 0; +} + +static int +virCHMonitorBuildDevicesJson(virJSONValue *content, virDomainDef *vmdef) +{ + g_autoptr(virJSONValue) devices = NULL; + size_t i; + + if (vmdef->nhostdevs > 0) { + devices = virJSONValueNewArray(); + for (i = 0; i < vmdef->nhostdevs; i++) { + if (virCHMonitorBuildDeviceJson(devices, vmdef->hostdevs[i]) < 0) + return -1; + } + if (virJSONValueObjectAppend(content, "devices", &devices) < 0) + return -1; + } + + return 0; +} + +static int +virCHMonitorBuildVMJson(virDomainDef *vmdef, char **jsonstr, + size_t *nnicindexes, int **nicindexes) { g_autoptr(virJSONValue) content = virJSONValueNewObject(); @@ -376,7 +441,12 @@ virCHMonitorBuildVMJson(virDomainDef *vmdef, char **jsonstr) if (virCHMonitorBuildDisksJson(content, vmdef) < 0) return -1; - if (virCHMonitorBuildNetsJson(content, vmdef) < 0) + + if (virCHMonitorBuildNetsJson(content, vmdef, + nnicindexes, nicindexes) < 0) + return -1; + + if (virCHMonitorBuildDevicesJson(content, vmdef) < 0) return -1; if (!(*jsonstr = virJSONValueToString(content, false))) @@ -673,7 +743,7 @@ virCHMonitorShutdownVMM(virCHMonitor *mon) } int -virCHMonitorCreateVM(virCHMonitor *mon) +virCHMonitorCreateVM(virCHMonitor *mon, size_t *nnicindexes, int **nicindexes) { g_autofree char *url = NULL; int responseCode = 0; @@ -685,7 +755,8 @@ virCHMonitorCreateVM(virCHMonitor *mon) headers = curl_slist_append(headers, "Accept: application/json"); headers = curl_slist_append(headers, "Content-Type: application/json"); - if (virCHMonitorBuildVMJson(mon->vm->def, &payload) != 0) + if (virCHMonitorBuildVMJson(mon->vm->def, &payload, + nnicindexes, nicindexes) != 0) return -1; virObjectLock(mon); diff --git a/src/ch/ch_monitor.h b/src/ch/ch_monitor.h index 0f684ca583..8ca9e17a9a 100644 --- a/src/ch/ch_monitor.h +++ b/src/ch/ch_monitor.h @@ -55,10 +55,22 @@ virCHMonitor *virCHMonitorNew(virDomainObj *vm, const char *socketdir); void virCHMonitorClose(virCHMonitor *mon); G_DEFINE_AUTOPTR_CLEANUP_FUNC(virCHMonitor, virCHMonitorClose); -int virCHMonitorCreateVM(virCHMonitor *mon); + +int virCHMonitorCreateVM(virCHMonitor *mon, + size_t *nnicindexes, int **nicindexes); int virCHMonitorBootVM(virCHMonitor *mon); int virCHMonitorShutdownVM(virCHMonitor *mon); int virCHMonitorRebootVM(virCHMonitor *mon); int virCHMonitorSuspendVM(virCHMonitor *mon); int virCHMonitorResumeVM(virCHMonitor *mon); int virCHMonitorGetInfo(virCHMonitor *mon, virJSONValue **info); + +typedef struct _virCHMonitorCPUInfo virCHMonitorCPUInfo; +struct _virCHMonitorCPUInfo { + pid_t tid; + bool online; +}; +void virCHMonitorCPUInfoFree(virCHMonitorCPUInfo *cpus); +int virCHMonitorGetCPUInfo(virCHMonitor *mon, + virCHMonitorCPUInfo **vcpus, + size_t maxvcpus); diff --git a/src/ch/ch_process.c b/src/ch/ch_process.c index 1a01ca9384..3b7f6fcddf 100644 --- a/src/ch/ch_process.c +++ b/src/ch/ch_process.c @@ -149,6 +149,8 @@ int virCHProcessStart(virCHDriver *driver, { int ret = -1; virCHDomainObjPrivate *priv = vm->privateData; + g_autofree int *nicindexes = NULL; + size_t nnicindexes = 0; if (!priv->monitor) { /* And we can get the first monitor connection now too */ @@ -158,7 +160,8 @@ int virCHProcessStart(virCHDriver *driver, goto cleanup; } - if (virCHMonitorCreateVM(priv->monitor) < 0) { + if (virCHMonitorCreateVM(priv->monitor, + &nnicindexes, &nicindexes) < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("failed to create guest VM")); goto cleanup; @@ -171,6 +174,7 @@ int virCHProcessStart(virCHDriver *driver, goto cleanup; } + priv->machineName = virCHDomainGetMachineName(vm); vm->pid = priv->monitor->pid; vm->def->id = vm->pid; diff --git a/src/ch/meson.build b/src/ch/meson.build index 5c6cab2a9f..2b2bdda26c 100644 --- a/src/ch/meson.build +++ b/src/ch/meson.build @@ -29,6 +29,7 @@ if conf.has('WITH_CH') ], include_directories: [ conf_inc_dir, + hypervisor_inc_dir, ], link_with: [ virt_util_lib, -- 2.27.0

Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- po/POTFILES.in | 1 + src/ch/ch_cgroup.c | 457 ++++++++++++++++++++++++++++++++++++++++++++ src/ch/ch_cgroup.h | 45 +++++ src/ch/ch_conf.c | 2 + src/ch/ch_conf.h | 4 +- src/ch/ch_domain.c | 33 ++++ src/ch/ch_domain.h | 3 +- src/ch/ch_monitor.c | 125 ++++++++++-- src/ch/ch_monitor.h | 54 +++++- src/ch/ch_process.c | 288 +++++++++++++++++++++++++++- src/ch/ch_process.h | 3 + src/ch/meson.build | 2 + 12 files changed, 991 insertions(+), 26 deletions(-) create mode 100644 src/ch/ch_cgroup.c create mode 100644 src/ch/ch_cgroup.h diff --git a/po/POTFILES.in b/po/POTFILES.in index b554cf08ca..3a8db501bc 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -19,6 +19,7 @@ @SRCDIR@src/bhyve/bhyve_parse_command.c @SRCDIR@src/bhyve/bhyve_process.c @SRCDIR@src/ch/ch_conf.c +@SRCDIR@src/ch/ch_cgroup.c @SRCDIR@src/ch/ch_domain.c @SRCDIR@src/ch/ch_driver.c @SRCDIR@src/ch/ch_monitor.c diff --git a/src/ch/ch_cgroup.c b/src/ch/ch_cgroup.c new file mode 100644 index 0000000000..6be2184cf1 --- /dev/null +++ b/src/ch/ch_cgroup.c @@ -0,0 +1,457 @@ +/* + * ch_cgroup.c: CH cgroup management + * + * Copyright Microsoft Corp. 2020-2021 + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#include "ch_cgroup.h" +#include "ch_domain.h" +#include "ch_process.h" +#include "vircgroup.h" +#include "virlog.h" +#include "viralloc.h" +#include "virerror.h" +#include "domain_audit.h" +#include "domain_cgroup.h" +#include "virscsi.h" +#include "virstring.h" +#include "virfile.h" +#include "virtypedparam.h" +#include "virnuma.h" +#include "virdevmapper.h" +#include "virutil.h" + +#define VIR_FROM_THIS VIR_FROM_CH + +VIR_LOG_INIT("ch.ch_cgroup"); + +static int +chSetupBlkioCgroup(virDomainObj * vm) +{ + virCHDomainObjPrivate *priv = vm->privateData; + + if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_BLKIO)) { + if (vm->def->blkio.weight || vm->def->blkio.ndevices) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Block I/O tuning is not available on this host")); + return -1; + } else { + return 0; + } + } + + return virDomainCgroupSetupBlkio(priv->cgroup, vm->def->blkio); +} + + +static int +chSetupMemoryCgroup(virDomainObj * vm) +{ + virCHDomainObjPrivate *priv = vm->privateData; + + if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_MEMORY)) { + if (virMemoryLimitIsSet(vm->def->mem.hard_limit) || + virMemoryLimitIsSet(vm->def->mem.soft_limit) || + virMemoryLimitIsSet(vm->def->mem.swap_hard_limit)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Memory cgroup is not available on this host")); + return -1; + } else { + return 0; + } + } + + return virDomainCgroupSetupMemtune(priv->cgroup, vm->def->mem); +} + +static int +chSetupCpusetCgroup(virDomainObj * vm) +{ + virCHDomainObjPrivate *priv = vm->privateData; + + if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) + return 0; + + if (virCgroupSetCpusetMemoryMigrate(priv->cgroup, true) < 0) + return -1; + + return 0; +} + + +static int +chSetupCpuCgroup(virDomainObj * vm) +{ + virCHDomainObjPrivate *priv = vm->privateData; + + if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) { + if (vm->def->cputune.sharesSpecified) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("CPU tuning is not available on this host")); + return -1; + } else { + return 0; + } + } + + if (vm->def->cputune.sharesSpecified) { + + if (virCgroupSetCpuShares(priv->cgroup, vm->def->cputune.shares) < 0) + return -1; + + } + + return 0; +} + + +static int +chInitCgroup(virDomainObj * vm, size_t nnicindexes, int *nicindexes) +{ + virCHDomainObjPrivate *priv = vm->privateData; + + g_autoptr(virCHDriverConfig) cfg = virCHDriverGetConfig(priv->driver); + + if (!priv->driver->privileged) + return 0; + + if (!virCgroupAvailable()) + return 0; + + virCgroupFree(priv->cgroup); + + if (!vm->def->resource) { + virDomainResourceDef *res; + + res = g_new0(virDomainResourceDef, 1); + + res->partition = g_strdup("/machine"); + + vm->def->resource = res; + } + + if (vm->def->resource->partition[0] != '/') { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Resource partition '%s' must start with '/'"), + vm->def->resource->partition); + return -1; + } + + if (virCgroupNewMachine(priv->machineName, "ch", vm->def->uuid, NULL, vm->pid, false, nnicindexes, nicindexes, vm->def->resource->partition, cfg->cgroupControllers, 0, /* maxThreadsPerProc */ + &priv->cgroup) < 0) { + if (virCgroupNewIgnoreError()) + return 0; + + return -1; + } + + return 0; +} + +static void +chRestoreCgroupState(virDomainObj * vm) +{ + g_autofree char *mem_mask = NULL; + g_autofree char *nodeset = NULL; + virCHDomainObjPrivate *priv = vm->privateData; + size_t i = 0; + + g_autoptr(virBitmap) all_nodes = NULL; + virCgroup *cgroup_temp = NULL; + + if (!virNumaIsAvailable() || + !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) + return; + + if (!(all_nodes = virNumaGetHostMemoryNodeset())) + goto error; + + if (!(mem_mask = virBitmapFormat(all_nodes))) + goto error; + + if ((virCgroupHasEmptyTasks(priv->cgroup, + VIR_CGROUP_CONTROLLER_CPUSET)) <= 0) + goto error; + + if (virCgroupSetCpusetMems(priv->cgroup, mem_mask) < 0) + goto error; + + for (i = 0; i < virDomainDefGetVcpusMax(vm->def); i++) { + virDomainVcpuDef *vcpu = virDomainDefGetVcpu(vm->def, i); + + if (!vcpu->online) + continue; + + if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_VCPU, i, + false, &cgroup_temp) < 0 || + virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0 || + virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0 || + virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0) + goto cleanup; + + g_free(nodeset); + virCgroupFree(cgroup_temp); + } + + for (i = 0; i < vm->def->niothreadids; i++) { + if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_IOTHREAD, + vm->def->iothreadids[i]->iothread_id, + false, &cgroup_temp) < 0 || + virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0 || + virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0 || + virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0) + goto cleanup; + + g_free(nodeset); + virCgroupFree(cgroup_temp); + } + + if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_EMULATOR, 0, + false, &cgroup_temp) < 0 || + virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0 || + virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0 || + virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0) + goto cleanup; + + cleanup: + virCgroupFree(cgroup_temp); + return; + + error: + virResetLastError(); + VIR_DEBUG("Couldn't restore cgroups to meaningful state"); + goto cleanup; +} + +int +chConnectCgroup(virDomainObj * vm) +{ + virCHDomainObjPrivate *priv = vm->privateData; + + g_autoptr(virCHDriverConfig) cfg = virCHDriverGetConfig(priv->driver); + + if (!priv->driver->privileged) + return 0; + + if (!virCgroupAvailable()) + return 0; + + virCgroupFree(priv->cgroup); + + if (virCgroupNewDetectMachine(vm->def->name, + "ch", + vm->pid, + cfg->cgroupControllers, + priv->machineName, &priv->cgroup) < 0) + return -1; + + chRestoreCgroupState(vm); + return 0; +} + +int +chSetupCgroup(virDomainObj * vm, size_t nnicindexes, int *nicindexes) +{ + virCHDomainObjPrivate *priv = vm->privateData; + + if (!vm->pid) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Cannot setup cgroups until process is started")); + return -1; + } + + if (chInitCgroup(vm, nnicindexes, nicindexes) < 0) + return -1; + + if (!priv->cgroup) + return 0; + + if (chSetupBlkioCgroup(vm) < 0) + return -1; + + if (chSetupMemoryCgroup(vm) < 0) + return -1; + + if (chSetupCpuCgroup(vm) < 0) + return -1; + + if (chSetupCpusetCgroup(vm) < 0) + return -1; + + return 0; +} + +int +chSetupCgroupVcpuBW(virCgroup * cgroup, + unsigned long long period, long long quota) +{ + return virCgroupSetupCpuPeriodQuota(cgroup, period, quota); +} + + +int +chSetupCgroupCpusetCpus(virCgroup * cgroup, virBitmap * cpumask) +{ + return virCgroupSetupCpusetCpus(cgroup, cpumask); +} + +int +chSetupGlobalCpuCgroup(virDomainObj * vm) +{ + virCHDomainObjPrivate *priv = vm->privateData; + unsigned long long period = vm->def->cputune.global_period; + long long quota = vm->def->cputune.global_quota; + g_autofree char *mem_mask = NULL; + virDomainNumatuneMemMode mem_mode; + + if ((period || quota) && + !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("cgroup cpu is required for scheduler tuning")); + return -1; + } + + /* + * If CPU cgroup controller is not initialized here, then we need + * neither period nor quota settings. And if CPUSET controller is + * not initialized either, then there's nothing to do anyway. + */ + if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU) && + !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) + return 0; + + + if (virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 && + mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT && + virDomainNumatuneMaybeFormatNodeset(vm->def->numa, + priv->autoNodeset, + &mem_mask, -1) < 0) + return -1; + + if (period || quota) { + if (chSetupCgroupVcpuBW(priv->cgroup, period, quota) < 0) + return -1; + } + + return 0; +} + + +int +chRemoveCgroup(virDomainObj * vm) +{ + virCHDomainObjPrivate *priv = vm->privateData; + + if (priv->cgroup == NULL) + return 0; /* Not supported, so claim success */ + + if (virCgroupTerminateMachine(priv->machineName) < 0) { + if (!virCgroupNewIgnoreError()) + VIR_DEBUG("Failed to terminate cgroup for %s", vm->def->name); + } + + return virCgroupRemove(priv->cgroup); +} + + +static void +chCgroupEmulatorAllNodesDataFree(chCgroupEmulatorAllNodesData * data) +{ + if (!data) + return; + + virCgroupFree(data->emulatorCgroup); + g_free(data->emulatorMemMask); + g_free(data); +} + + +/** + * chCgroupEmulatorAllNodesAllow: + * @cgroup: domain cgroup pointer + * @retData: filled with structure used to roll back the operation + * + * Allows all NUMA nodes for the cloud hypervisor thread temporarily. This is + * necessary when hotplugging cpus since it requires memory allocated in the + * DMA region. Afterwards the operation can be reverted by + * chCgroupEmulatorAllNodesRestore. + * + * Returns 0 on success -1 on error + */ +int +chCgroupEmulatorAllNodesAllow(virCgroup * cgroup, + chCgroupEmulatorAllNodesData ** retData) +{ + chCgroupEmulatorAllNodesData *data = NULL; + g_autofree char *all_nodes_str = NULL; + + g_autoptr(virBitmap) all_nodes = NULL; + int ret = -1; + + if (!virNumaIsAvailable() || + !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) + return 0; + + if (!(all_nodes = virNumaGetHostMemoryNodeset())) + goto cleanup; + + if (!(all_nodes_str = virBitmapFormat(all_nodes))) + goto cleanup; + + data = g_new0(chCgroupEmulatorAllNodesData, 1); + + if (virCgroupNewThread(cgroup, VIR_CGROUP_THREAD_EMULATOR, 0, + false, &data->emulatorCgroup) < 0) + goto cleanup; + + if (virCgroupGetCpusetMems(data->emulatorCgroup, &data->emulatorMemMask) < 0 + || virCgroupSetCpusetMems(data->emulatorCgroup, all_nodes_str) < 0) + goto cleanup; + + *retData = g_steal_pointer(&data); + ret = 0; + + cleanup: + chCgroupEmulatorAllNodesDataFree(data); + + return ret; +} + + +/** + * chCgroupEmulatorAllNodesRestore: + * @data: data structure created by chCgroupEmulatorAllNodesAllow + * + * Rolls back the setting done by chCgroupEmulatorAllNodesAllow and frees the + * associated data. + */ +void +chCgroupEmulatorAllNodesRestore(chCgroupEmulatorAllNodesData * data) +{ + virError *err; + + if (!data) + return; + + virErrorPreserveLast(&err); + virCgroupSetCpusetMems(data->emulatorCgroup, data->emulatorMemMask); + virErrorRestore(&err); + + chCgroupEmulatorAllNodesDataFree(data); +} diff --git a/src/ch/ch_cgroup.h b/src/ch/ch_cgroup.h new file mode 100644 index 0000000000..0152b5477c --- /dev/null +++ b/src/ch/ch_cgroup.h @@ -0,0 +1,45 @@ +/* + * ch_cgroup.h: CH cgroup management + * + * Copyright Microsoft Corp. 2020-2021 + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * <http://www.gnu.org/licenses/>. + */ + +#pragma once + +#include "virusb.h" +#include "vircgroup.h" +#include "domain_conf.h" +#include "ch_conf.h" + +int chConnectCgroup(virDomainObj * vm); +int chSetupCgroup(virDomainObj * vm, size_t nnicindexes, int *nicindexes); +int chSetupCgroupVcpuBW(virCgroup * cgroup, + unsigned long long period, long long quota); +int chSetupCgroupCpusetCpus(virCgroup * cgroup, virBitmap * cpumask); +int chSetupGlobalCpuCgroup(virDomainObj * vm); +int chRemoveCgroup(virDomainObj * vm); + +typedef struct _chCgroupEmulatorAllNodesData chCgroupEmulatorAllNodesData; + +struct _chCgroupEmulatorAllNodesData { + virCgroup *emulatorCgroup; + char *emulatorMemMask; +}; + +int chCgroupEmulatorAllNodesAllow(virCgroup * cgroup, + chCgroupEmulatorAllNodesData ** data); +void chCgroupEmulatorAllNodesRestore(chCgroupEmulatorAllNodesData * data); diff --git a/src/ch/ch_conf.c b/src/ch/ch_conf.c index ed0fffe5d6..7f70452296 100644 --- a/src/ch/ch_conf.c +++ b/src/ch/ch_conf.c @@ -141,6 +141,8 @@ virCHDriverConfigNew(bool privileged) if (!(cfg = virObjectNew(virCHDriverConfigClass))) return NULL; + cfg->cgroupControllers = -1; /* Auto detect */ + if (privileged) { if (virGetUserID(CH_USER, &cfg->user) < 0) return NULL; diff --git a/src/ch/ch_conf.h b/src/ch/ch_conf.h index 49f286f97a..19deb8e568 100644 --- a/src/ch/ch_conf.h +++ b/src/ch/ch_conf.h @@ -35,11 +35,13 @@ struct _virCHDriverConfig { char *stateDir; char *logDir; - + int cgroupControllers; uid_t user; gid_t group; }; +G_DEFINE_AUTOPTR_CLEANUP_FUNC(virCHDriverConfig, virObjectUnref); + struct _virCHDriver { virMutex lock; diff --git a/src/ch/ch_domain.c b/src/ch/ch_domain.c index e1030800aa..d0aaeed1f4 100644 --- a/src/ch/ch_domain.c +++ b/src/ch/ch_domain.c @@ -326,6 +326,39 @@ chValidateDomainDeviceDef(const virDomainDeviceDef *dev, _("Serial can only be enabled for a PTY")); return -1; } + return 0; +} +int +virCHDomainRefreshThreadInfo(virDomainObj *vm) +{ + size_t maxvcpus = virDomainDefGetVcpusMax(vm->def); + virCHMonitorThreadInfo *info = NULL; + size_t nthreads, ncpus = 0; + size_t i; + + nthreads = virCHMonitorGetThreadInfo(virCHDomainGetMonitor(vm), + true, &info); + + for (i = 0; i < nthreads; i++) { + virCHDomainVcpuPrivate *vcpupriv; + virDomainVcpuDef *vcpu; + virCHMonitorCPUInfo *vcpuInfo; + + if (info[i].type != virCHThreadTypeVcpu) + continue; + + // TODO: hotplug support + vcpuInfo = &info[i].vcpuInfo; + vcpu = virDomainDefGetVcpu(vm->def, vcpuInfo->cpuid); + vcpupriv = CH_DOMAIN_VCPU_PRIVATE(vcpu); + vcpupriv->tid = vcpuInfo->tid; + ncpus++; + } + + // TODO: Remove the warning when hotplug is implemented. + if (ncpus != maxvcpus) + VIR_WARN("Mismatch in the number of cpus, expected: %ld, actual: %ld", + maxvcpus, ncpus); return 0; } diff --git a/src/ch/ch_domain.h b/src/ch/ch_domain.h index 3ac3421015..2ce3e2cef3 100644 --- a/src/ch/ch_domain.h +++ b/src/ch/ch_domain.h @@ -89,7 +89,8 @@ virCHDomainObjBeginJob(virDomainObj *obj, enum virCHDomainJob job) void virCHDomainObjEndJob(virDomainObj *obj); -int virCHDomainRefreshVcpuInfo(virDomainObj *vm); +int virCHDomainRefreshThreadInfo(virDomainObj *vm); + pid_t virCHDomainGetVcpuPid(virDomainObj *vm, unsigned int vcpuid); bool virCHDomainHasVcpuPids(virDomainObj *vm); diff --git a/src/ch/ch_monitor.c b/src/ch/ch_monitor.c index c0ae031200..095779cb3f 100644 --- a/src/ch/ch_monitor.c +++ b/src/ch/ch_monitor.c @@ -41,6 +41,7 @@ VIR_LOG_INIT("ch.ch_monitor"); static virClass *virCHMonitorClass; static void virCHMonitorDispose(void *obj); +static void virCHMonitorThreadInfoFree(virCHMonitor *mon); static int virCHMonitorOnceInit(void) { @@ -571,6 +572,7 @@ static void virCHMonitorDispose(void *opaque) virCHMonitor *mon = opaque; VIR_DEBUG("mon=%p", mon); + virCHMonitorThreadInfoFree(mon); virObjectUnref(mon->vm); } @@ -736,6 +738,114 @@ virCHMonitorGet(virCHMonitor *mon, const char *endpoint, virJSONValue **response return ret; } +/** + * virCHMonitorGetInfo: + * @mon: Pointer to the monitor + * @info: Get VM info + * + * Retrieve the VM info and store in @info + * + * Returns 0 on success. + */ +int +virCHMonitorGetInfo(virCHMonitor *mon, virJSONValue **info) +{ + return virCHMonitorGet(mon, URL_VM_INFO, info); +} + +static void +virCHMonitorThreadInfoFree(virCHMonitor *mon) +{ + mon->nthreads = 0; + if (mon->threads) + VIR_FREE(mon->threads); +} + +static size_t +virCHMonitorRefreshThreadInfo(virCHMonitor *mon) +{ + virCHMonitorThreadInfo *info = NULL; + g_autofree pid_t *tids = NULL; + virDomainObj *vm = mon->vm; + size_t ntids = 0; + size_t i; + + + virCHMonitorThreadInfoFree(mon); + if (virProcessGetPids(vm->pid, &ntids, &tids) < 0) { + mon->threads = NULL; + return 0; + } + + info = g_new0(virCHMonitorThreadInfo, ntids); + for (i = 0; i < ntids; i++) { + g_autofree char *proc = NULL; + g_autofree char *data = NULL; + + proc = g_strdup_printf("/proc/%d/task/%d/comm", + (int)vm->pid, (int)tids[i]); + + if (virFileReadAll(proc, (1<<16), &data) < 0) { + continue; + } + + VIR_DEBUG("VM PID: %d, TID %d, COMM: %s", + (int)vm->pid, (int)tids[i], data); + if (STRPREFIX(data, "vcpu")) { + int cpuid; + char *tmp; + if (virStrToLong_i(data + 4, &tmp, 0, &cpuid) < 0) { + VIR_WARN("Index is not specified correctly"); + continue; + } + info[i].type = virCHThreadTypeVcpu; + info[i].vcpuInfo.tid = tids[i]; + info[i].vcpuInfo.online = true; + info[i].vcpuInfo.cpuid = cpuid; + VIR_DEBUG("vcpu%d -> tid: %d", cpuid, tids[i]); + } else if (STRPREFIX(data, "_disk") || STRPREFIX(data, "_net") || + STRPREFIX(data, "_rng")) { + /* Prefixes used by cloud-hypervisor for IO Threads are captured at + https://github.com/cloud-hypervisor/cloud-hypervisor/blob/main/vmm/src/devic... */ + info[i].type = virCHThreadTypeIO; + info[i].ioInfo.tid = tids[i]; + virStrcpy(info[i].ioInfo.thrName, data, VIRCH_THREAD_NAME_LEN - 1); + }else { + info[i].type = virCHThreadTypeEmulator; + info[i].emuInfo.tid = tids[i]; + virStrcpy(info[i].emuInfo.thrName, data, VIRCH_THREAD_NAME_LEN - 1); + } + mon->nthreads++; + + } + mon->threads = info; + + return mon->nthreads; +} + +/** + * virCHMonitorGetThreadInfo: + * @mon: Pointer to the monitor + * @refresh: Refresh thread info or not + * + * Retrive thread info and store to @threads + * + * Returns count of threads on success. + */ +size_t +virCHMonitorGetThreadInfo(virCHMonitor *mon, bool refresh, + virCHMonitorThreadInfo **threads) +{ + int nthreads = 0; + + if (refresh) + nthreads = virCHMonitorRefreshThreadInfo(mon); + + *threads = mon->threads; + + return nthreads; +} + int virCHMonitorShutdownVMM(virCHMonitor *mon) { @@ -810,18 +920,3 @@ virCHMonitorResumeVM(virCHMonitor *mon) { return virCHMonitorPutNoContent(mon, URL_VM_RESUME); } - -/** - * virCHMonitorGetInfo: - * @mon: Pointer to the monitor - * @info: Get VM info - * - * Retrieve the VM info and store in @info - * - * Returns 0 on success. - */ -int -virCHMonitorGetInfo(virCHMonitor *mon, virJSONValue **info) -{ - return virCHMonitorGet(mon, URL_VM_INFO, info); -} diff --git a/src/ch/ch_monitor.h b/src/ch/ch_monitor.h index 8ca9e17a9a..f8c3fa75e8 100644 --- a/src/ch/ch_monitor.h +++ b/src/ch/ch_monitor.h @@ -37,6 +37,50 @@ #define URL_VM_RESUME "vm.resume" #define URL_VM_INFO "vm.info" +#define VIRCH_THREAD_NAME_LEN 16 + +typedef enum { + virCHThreadTypeEmulator, + virCHThreadTypeVcpu, + virCHThreadTypeIO, + virCHThreadTypeMax +} virCHThreadType; + +typedef struct _virCHMonitorCPUInfo virCHMonitorCPUInfo; + +struct _virCHMonitorCPUInfo { + int cpuid; + pid_t tid; + + bool online; +}; + +typedef struct _virCHMonitorEmuThreadInfo virCHMonitorEmuThreadInfo; + +struct _virCHMonitorEmuThreadInfo { + char thrName[VIRCH_THREAD_NAME_LEN]; + pid_t tid; +}; + +typedef struct _virCHMonitorIOThreadInfo virCHMonitorIOThreadInfo; + +struct _virCHMonitorIOThreadInfo { + char thrName[VIRCH_THREAD_NAME_LEN]; + pid_t tid; +}; + +typedef struct _virCHMonitorThreadInfo virCHMonitorThreadInfo; + +struct _virCHMonitorThreadInfo { + virCHThreadType type; + + union { + virCHMonitorCPUInfo vcpuInfo; + virCHMonitorEmuThreadInfo emuInfo; + virCHMonitorIOThreadInfo ioInfo; + }; +}; + typedef struct _virCHMonitor virCHMonitor; struct _virCHMonitor { @@ -49,6 +93,9 @@ struct _virCHMonitor { pid_t pid; virDomainObj *vm; + + size_t nthreads; + virCHMonitorThreadInfo *threads; }; virCHMonitor *virCHMonitorNew(virDomainObj *vm, const char *socketdir); @@ -65,12 +112,9 @@ int virCHMonitorSuspendVM(virCHMonitor *mon); int virCHMonitorResumeVM(virCHMonitor *mon); int virCHMonitorGetInfo(virCHMonitor *mon, virJSONValue **info); -typedef struct _virCHMonitorCPUInfo virCHMonitorCPUInfo; -struct _virCHMonitorCPUInfo { - pid_t tid; - bool online; -}; void virCHMonitorCPUInfoFree(virCHMonitorCPUInfo *cpus); int virCHMonitorGetCPUInfo(virCHMonitor *mon, virCHMonitorCPUInfo **vcpus, size_t maxvcpus); +size_t virCHMonitorGetThreadInfo(virCHMonitor *mon, bool refresh, + virCHMonitorThreadInfo **threads); diff --git a/src/ch/ch_process.c b/src/ch/ch_process.c index 3b7f6fcddf..8dce737adb 100644 --- a/src/ch/ch_process.c +++ b/src/ch/ch_process.c @@ -26,6 +26,8 @@ #include "ch_domain.h" #include "ch_monitor.h" #include "ch_process.h" +#include "ch_cgroup.h" +#include "virnuma.h" #include "viralloc.h" #include "virerror.h" #include "virjson.h" @@ -133,6 +135,257 @@ virCHProcessUpdateInfo(virDomainObj *vm) return 0; } +static int +virCHProcessGetAllCpuAffinity(virBitmap **cpumapRet) +{ + *cpumapRet = NULL; + + if (!virHostCPUHasBitmap()) + return 0; + + if (!(*cpumapRet = virHostCPUGetOnlineBitmap())) + return -1; + + return 0; +} + +#if defined(WITH_SCHED_GETAFFINITY) || defined(WITH_BSD_CPU_AFFINITY) +static int +virCHProcessInitCpuAffinity(virDomainObj *vm) +{ + g_autoptr(virBitmap) cpumapToSet = NULL; + virDomainNumatuneMemMode mem_mode; + virCHDomainObjPrivate *priv = vm->privateData; + + if (!vm->pid) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Cannot setup CPU affinity until process is started")); + return -1; + } + + if (virDomainNumaGetNodeCount(vm->def->numa) <= 1 && + virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 && + mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) { + virBitmap *nodeset = NULL; + + if (virDomainNumatuneMaybeGetNodeset(vm->def->numa, + priv->autoNodeset, + &nodeset, + -1) < 0) + return -1; + + if (virNumaNodesetToCPUset(nodeset, &cpumapToSet) < 0) + return -1; + } else if (vm->def->cputune.emulatorpin) { + if (!(cpumapToSet = virBitmapNewCopy(vm->def->cputune.emulatorpin))) + return -1; + } else { + if (virCHProcessGetAllCpuAffinity(&cpumapToSet) < 0) + return -1; + } + + if (cpumapToSet && + virProcessSetAffinity(vm->pid, cpumapToSet, false) < 0) { + return -1; + } + + return 0; +} +#else /* !defined(WITH_SCHED_GETAFFINITY) && !defined(WITH_BSD_CPU_AFFINITY) */ +static int +virCHProcessInitCpuAffinity(virDomainObj *vm G_GNUC_UNUSED) +{ + return 0; +} +#endif /* !defined(WITH_SCHED_GETAFFINITY) && !defined(WITH_BSD_CPU_AFFINITY) */ + +/** + * virCHProcessSetupPid: + * + * This function sets resource properties (affinity, cgroups, + * scheduler) for any PID associated with a domain. It should be used + * to set up emulator PIDs as well as vCPU and I/O thread pids to + * ensure they are all handled the same way. + * + * Returns 0 on success, -1 on error. + */ +static int +virCHProcessSetupPid(virDomainObj *vm, + pid_t pid, + virCgroupThreadName nameval, + int id, + virBitmap *cpumask, + unsigned long long period, + long long quota, + virDomainThreadSchedParam *sched) +{ + virCHDomainObjPrivate *priv = vm->privateData; + virDomainNumatuneMemMode mem_mode; + virCgroup *cgroup = NULL; + virBitmap *use_cpumask = NULL; + virBitmap *affinity_cpumask = NULL; + g_autoptr(virBitmap) hostcpumap = NULL; + g_autofree char *mem_mask = NULL; + int ret = -1; + + if ((period || quota) && + !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("cgroup cpu is required for scheduler tuning")); + goto cleanup; + } + + /* Infer which cpumask shall be used. */ + if (cpumask) { + use_cpumask = cpumask; + } else if (vm->def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) { + use_cpumask = priv->autoCpuset; + } else if (vm->def->cpumask) { + use_cpumask = vm->def->cpumask; + } else { + /* we can't assume cloud-hypervisor itself is running on all pCPUs, + * so we need to explicitly set the spawned instance to all pCPUs. */ + if (virCHProcessGetAllCpuAffinity(&hostcpumap) < 0) + goto cleanup; + affinity_cpumask = hostcpumap; + } + + /* + * If CPU cgroup controller is not initialized here, then we need + * neither period nor quota settings. And if CPUSET controller is + * not initialized either, then there's nothing to do anyway. + */ + if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU) || + virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) { + + if (virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 && + mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT && + virDomainNumatuneMaybeFormatNodeset(vm->def->numa, + priv->autoNodeset, + &mem_mask, -1) < 0) + goto cleanup; + + if (virCgroupNewThread(priv->cgroup, nameval, id, true, &cgroup) < 0) + goto cleanup; + + if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) { + if (use_cpumask && + chSetupCgroupCpusetCpus(cgroup, use_cpumask) < 0) + goto cleanup; + + if (mem_mask && virCgroupSetCpusetMems(cgroup, mem_mask) < 0) + goto cleanup; + + } + + if ((period || quota) && + chSetupCgroupVcpuBW(cgroup, period, quota) < 0) + goto cleanup; + + /* Move the thread to the sub dir */ + VIR_INFO("Adding pid %d to cgroup", pid); + if (virCgroupAddThread(cgroup, pid) < 0) + goto cleanup; + + } + + if (!affinity_cpumask) + affinity_cpumask = use_cpumask; + + /* Setup legacy affinity. */ + if (affinity_cpumask && virProcessSetAffinity(pid, affinity_cpumask, false) < 0) + goto cleanup; + + /* Set scheduler type and priority, but not for the main thread. */ + if (sched && + nameval != VIR_CGROUP_THREAD_EMULATOR && + virProcessSetScheduler(pid, sched->policy, sched->priority) < 0) + goto cleanup; + + ret = 0; + cleanup: + if (cgroup) { + if (ret < 0) + virCgroupRemove(cgroup); + virCgroupFree(cgroup); + } + + return ret; +} + +/** + * virCHProcessSetupVcpu: + * @vm: domain object + * @vcpuid: id of VCPU to set defaults + * + * This function sets resource properties (cgroups, affinity, scheduler) for a + * vCPU. This function expects that the vCPU is online and the vCPU pids were + * correctly detected at the point when it's called. + * + * Returns 0 on success, -1 on error. + */ +int +virCHProcessSetupVcpu(virDomainObj *vm, + unsigned int vcpuid) +{ + pid_t vcpupid = virCHDomainGetVcpuPid(vm, vcpuid); + virDomainVcpuDef *vcpu = virDomainDefGetVcpu(vm->def, vcpuid); + + return virCHProcessSetupPid(vm, vcpupid, VIR_CGROUP_THREAD_VCPU, + vcpuid, vcpu->cpumask, + vm->def->cputune.period, + vm->def->cputune.quota, + &vcpu->sched); +} + +static int +virCHProcessSetupVcpus(virDomainObj *vm) +{ + virDomainVcpuDef *vcpu; + unsigned int maxvcpus = virDomainDefGetVcpusMax(vm->def); + size_t i; + + if ((vm->def->cputune.period || vm->def->cputune.quota) && + !virCgroupHasController(((virCHDomainObjPrivate *) vm->privateData)->cgroup, + VIR_CGROUP_CONTROLLER_CPU)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("cgroup cpu is required for scheduler tuning")); + return -1; + } + + if (!virCHDomainHasVcpuPids(vm)) { + /* If any CPU has custom affinity that differs from the + * VM default affinity, we must reject it */ + for (i = 0; i < maxvcpus; i++) { + vcpu = virDomainDefGetVcpu(vm->def, i); + + if (!vcpu->online) + continue; + + if (vcpu->cpumask && + !virBitmapEqual(vm->def->cpumask, vcpu->cpumask)) { + virReportError(VIR_ERR_OPERATION_INVALID, "%s", + _("cpu affinity is not supported")); + return -1; + } + } + + return 0; + } + + for (i = 0; i < maxvcpus; i++) { + vcpu = virDomainDefGetVcpu(vm->def, i); + + if (!vcpu->online) + continue; + + if (virCHProcessSetupVcpu(vm, i) < 0) + return -1; + } + + return 0; +} + /** * virCHProcessStart: * @driver: pointer to driver structure @@ -168,18 +421,33 @@ int virCHProcessStart(virCHDriver *driver, } } + vm->pid = priv->monitor->pid; + vm->def->id = vm->pid; + priv->machineName = virCHDomainGetMachineName(vm); + + if (chSetupCgroup(vm, nnicindexes, nicindexes) < 0) + goto cleanup; + + if (virCHProcessInitCpuAffinity(vm) < 0) + goto cleanup; + if (virCHMonitorBootVM(priv->monitor) < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("failed to boot guest VM")); goto cleanup; } - priv->machineName = virCHDomainGetMachineName(vm); - vm->pid = priv->monitor->pid; - vm->def->id = vm->pid; + virCHDomainRefreshThreadInfo(vm); - virCHProcessUpdateInfo(vm); + VIR_DEBUG("Setting global CPU cgroup (if required)"); + if (chSetupGlobalCpuCgroup(vm) < 0) + goto cleanup; + + VIR_DEBUG("Setting vCPU tuning/settings"); + if (virCHProcessSetupVcpus(vm) < 0) + goto cleanup; + virCHProcessUpdateInfo(vm); virDomainObjSetState(vm, VIR_DOMAIN_RUNNING, reason); return 0; @@ -195,6 +463,8 @@ int virCHProcessStop(virCHDriver *driver G_GNUC_UNUSED, virDomainObj *vm, virDomainShutoffReason reason) { + int ret; + int retries = 0; virCHDomainObjPrivate *priv = vm->privateData; VIR_DEBUG("Stopping VM name=%s pid=%d reason=%d", @@ -205,6 +475,16 @@ int virCHProcessStop(virCHDriver *driver G_GNUC_UNUSED, priv->monitor = NULL; } + retry: + if ((ret = chRemoveCgroup(vm)) < 0) { + if (ret == -EBUSY && (retries++ < 5)) { + g_usleep(200*1000); + goto retry; + } + VIR_WARN("Failed to remove cgroup for %s", + vm->def->name); + } + vm->pid = -1; vm->def->id = -1; diff --git a/src/ch/ch_process.h b/src/ch/ch_process.h index abc4915979..800e3f4e23 100644 --- a/src/ch/ch_process.h +++ b/src/ch/ch_process.h @@ -29,3 +29,6 @@ int virCHProcessStart(virCHDriver *driver, int virCHProcessStop(virCHDriver *driver, virDomainObj *vm, virDomainShutoffReason reason); + +int virCHProcessSetupVcpu(virDomainObj *vm, + unsigned int vcpuid); diff --git a/src/ch/meson.build b/src/ch/meson.build index 2b2bdda26c..0b20de56fd 100644 --- a/src/ch/meson.build +++ b/src/ch/meson.build @@ -1,6 +1,8 @@ ch_driver_sources = [ 'ch_conf.c', 'ch_conf.h', + 'ch_cgroup.c', + 'ch_cgroup.h', 'ch_domain.c', 'ch_domain.h', 'ch_driver.c', -- 2.27.0

On 10/21/21 16:31, Vineeth Pillai wrote:
Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- po/POTFILES.in | 1 + src/ch/ch_cgroup.c | 457 ++++++++++++++++++++++++++++++++++++++++++++ src/ch/ch_cgroup.h | 45 +++++ src/ch/ch_conf.c | 2 + src/ch/ch_conf.h | 4 +- src/ch/ch_domain.c | 33 ++++ src/ch/ch_domain.h | 3 +- src/ch/ch_monitor.c | 125 ++++++++++-- src/ch/ch_monitor.h | 54 +++++- src/ch/ch_process.c | 288 +++++++++++++++++++++++++++- src/ch/ch_process.h | 3 + src/ch/meson.build | 2 + 12 files changed, 991 insertions(+), 26 deletions(-) create mode 100644 src/ch/ch_cgroup.c create mode 100644 src/ch/ch_cgroup.h
diff --git a/po/POTFILES.in b/po/POTFILES.in index b554cf08ca..3a8db501bc 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -19,6 +19,7 @@ @SRCDIR@src/bhyve/bhyve_parse_command.c @SRCDIR@src/bhyve/bhyve_process.c @SRCDIR@src/ch/ch_conf.c +@SRCDIR@src/ch/ch_cgroup.c @SRCDIR@src/ch/ch_domain.c @SRCDIR@src/ch/ch_driver.c @SRCDIR@src/ch/ch_monitor.c diff --git a/src/ch/ch_cgroup.c b/src/ch/ch_cgroup.c new file mode 100644 index 0000000000..6be2184cf1 --- /dev/null +++ b/src/ch/ch_cgroup.c @@ -0,0 +1,457 @@ +/* + * ch_cgroup.c: CH cgroup management + * + * Copyright Microsoft Corp. 2020-2021 + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#include "ch_cgroup.h" +#include "ch_domain.h" +#include "ch_process.h" +#include "vircgroup.h" +#include "virlog.h" +#include "viralloc.h" +#include "virerror.h" +#include "domain_audit.h" +#include "domain_cgroup.h" +#include "virscsi.h" +#include "virstring.h" +#include "virfile.h" +#include "virtypedparam.h" +#include "virnuma.h" +#include "virdevmapper.h" +#include "virutil.h" + +#define VIR_FROM_THIS VIR_FROM_CH + +VIR_LOG_INIT("ch.ch_cgroup"); + +static int +chSetupBlkioCgroup(virDomainObj * vm) +{ + virCHDomainObjPrivate *priv = vm->privateData; + + if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_BLKIO)) { + if (vm->def->blkio.weight || vm->def->blkio.ndevices) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Block I/O tuning is not available on this host")); + return -1; + } else { + return 0; + } + } + + return virDomainCgroupSetupBlkio(priv->cgroup, vm->def->blkio); +} + + +static int +chSetupMemoryCgroup(virDomainObj * vm) +{ + virCHDomainObjPrivate *priv = vm->privateData; + + if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_MEMORY)) { + if (virMemoryLimitIsSet(vm->def->mem.hard_limit) || + virMemoryLimitIsSet(vm->def->mem.soft_limit) || + virMemoryLimitIsSet(vm->def->mem.swap_hard_limit)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Memory cgroup is not available on this host")); + return -1; + } else { + return 0; + } + } + + return virDomainCgroupSetupMemtune(priv->cgroup, vm->def->mem); +} + +static int +chSetupCpusetCgroup(virDomainObj * vm) +{ + virCHDomainObjPrivate *priv = vm->privateData; + + if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) + return 0; + + if (virCgroupSetCpusetMemoryMigrate(priv->cgroup, true) < 0) + return -1; + + return 0; +} + + +static int +chSetupCpuCgroup(virDomainObj * vm) +{ + virCHDomainObjPrivate *priv = vm->privateData; + + if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) { + if (vm->def->cputune.sharesSpecified) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("CPU tuning is not available on this host")); + return -1; + } else { + return 0; + } + } + + if (vm->def->cputune.sharesSpecified) { + + if (virCgroupSetCpuShares(priv->cgroup, vm->def->cputune.shares) < 0) + return -1; + + } + + return 0; +} + + +static int +chInitCgroup(virDomainObj * vm, size_t nnicindexes, int *nicindexes) +{ + virCHDomainObjPrivate *priv = vm->privateData; + + g_autoptr(virCHDriverConfig) cfg = virCHDriverGetConfig(priv->driver); + + if (!priv->driver->privileged) + return 0; + + if (!virCgroupAvailable()) + return 0; + + virCgroupFree(priv->cgroup); + + if (!vm->def->resource) { + virDomainResourceDef *res; + + res = g_new0(virDomainResourceDef, 1); + + res->partition = g_strdup("/machine"); + + vm->def->resource = res; + } + + if (vm->def->resource->partition[0] != '/') { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Resource partition '%s' must start with '/'"), + vm->def->resource->partition); + return -1; + } + + if (virCgroupNewMachine(priv->machineName, "ch", vm->def->uuid, NULL, vm->pid, false, nnicindexes, nicindexes, vm->def->resource->partition, cfg->cgroupControllers, 0, /* maxThreadsPerProc */
Break this big boy to at least 100 chars per line, please.
+ &priv->cgroup) < 0) { + if (virCgroupNewIgnoreError()) + return 0; + + return -1; + } + + return 0; +} + +static void +chRestoreCgroupState(virDomainObj * vm) +{ + g_autofree char *mem_mask = NULL; + g_autofree char *nodeset = NULL; + virCHDomainObjPrivate *priv = vm->privateData; + size_t i = 0; + + g_autoptr(virBitmap) all_nodes = NULL; + virCgroup *cgroup_temp = NULL; + + if (!virNumaIsAvailable() || + !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) + return; + + if (!(all_nodes = virNumaGetHostMemoryNodeset())) + goto error; + + if (!(mem_mask = virBitmapFormat(all_nodes))) + goto error; + + if ((virCgroupHasEmptyTasks(priv->cgroup, + VIR_CGROUP_CONTROLLER_CPUSET)) <= 0) + goto error; + + if (virCgroupSetCpusetMems(priv->cgroup, mem_mask) < 0) + goto error; + + for (i = 0; i < virDomainDefGetVcpusMax(vm->def); i++) { + virDomainVcpuDef *vcpu = virDomainDefGetVcpu(vm->def, i); + + if (!vcpu->online) + continue; + + if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_VCPU, i, + false, &cgroup_temp) < 0 || + virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0 || + virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0 || + virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0) + goto cleanup; + + g_free(nodeset); + virCgroupFree(cgroup_temp); + } + + for (i = 0; i < vm->def->niothreadids; i++) { + if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_IOTHREAD, + vm->def->iothreadids[i]->iothread_id, + false, &cgroup_temp) < 0 || + virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0 || + virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0 || + virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0) + goto cleanup; + + g_free(nodeset); + virCgroupFree(cgroup_temp); + } + + if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_EMULATOR, 0, + false, &cgroup_temp) < 0 || + virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0 || + virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0 || + virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0) + goto cleanup; + + cleanup: + virCgroupFree(cgroup_temp); + return;
Up there in the cgroup_temp declaration you can use g_autoptr(virCgroup) cgroup_temp = NULL; And then you won't need a 'cleanup' label.
+ + error:
Wrong label indentation.
+ virResetLastError(); + VIR_DEBUG("Couldn't restore cgroups to meaningful state"); + goto cleanup; +} + +int +chConnectCgroup(virDomainObj * vm) +{ + virCHDomainObjPrivate *priv = vm->privateData; + + g_autoptr(virCHDriverConfig) cfg = virCHDriverGetConfig(priv->driver); + + if (!priv->driver->privileged) + return 0; + + if (!virCgroupAvailable()) + return 0; + + virCgroupFree(priv->cgroup); + + if (virCgroupNewDetectMachine(vm->def->name, + "ch", + vm->pid, + cfg->cgroupControllers, + priv->machineName, &priv->cgroup) < 0) + return -1; + + chRestoreCgroupState(vm); + return 0; +} + +int +chSetupCgroup(virDomainObj * vm, size_t nnicindexes, int *nicindexes) +{ + virCHDomainObjPrivate *priv = vm->privateData; + + if (!vm->pid) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Cannot setup cgroups until process is started")); + return -1; + } + + if (chInitCgroup(vm, nnicindexes, nicindexes) < 0) + return -1; + + if (!priv->cgroup) + return 0; + + if (chSetupBlkioCgroup(vm) < 0) + return -1; + + if (chSetupMemoryCgroup(vm) < 0) + return -1; + + if (chSetupCpuCgroup(vm) < 0) + return -1; + + if (chSetupCpusetCgroup(vm) < 0) + return -1; + + return 0; +} + +int +chSetupCgroupVcpuBW(virCgroup * cgroup, + unsigned long long period, long long quota) +{ + return virCgroupSetupCpuPeriodQuota(cgroup, period, quota); +} + + +int +chSetupCgroupCpusetCpus(virCgroup * cgroup, virBitmap * cpumask) +{ + return virCgroupSetupCpusetCpus(cgroup, cpumask); +} + +int +chSetupGlobalCpuCgroup(virDomainObj * vm) +{ + virCHDomainObjPrivate *priv = vm->privateData; + unsigned long long period = vm->def->cputune.global_period; + long long quota = vm->def->cputune.global_quota; + g_autofree char *mem_mask = NULL; + virDomainNumatuneMemMode mem_mode; + + if ((period || quota) && + !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("cgroup cpu is required for scheduler tuning")); + return -1; + } + + /* + * If CPU cgroup controller is not initialized here, then we need + * neither period nor quota settings. And if CPUSET controller is + * not initialized either, then there's nothing to do anyway. + */ + if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU) && + !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) + return 0; + + + if (virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 && + mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT && + virDomainNumatuneMaybeFormatNodeset(vm->def->numa, + priv->autoNodeset, + &mem_mask, -1) < 0) + return -1; + + if (period || quota) { + if (chSetupCgroupVcpuBW(priv->cgroup, period, quota) < 0) + return -1; + } + + return 0; +} + + +int +chRemoveCgroup(virDomainObj * vm) +{ + virCHDomainObjPrivate *priv = vm->privateData; + + if (priv->cgroup == NULL) + return 0; /* Not supported, so claim success */ + + if (virCgroupTerminateMachine(priv->machineName) < 0) { + if (!virCgroupNewIgnoreError()) + VIR_DEBUG("Failed to terminate cgroup for %s", vm->def->name); + } + + return virCgroupRemove(priv->cgroup); +} + + +static void +chCgroupEmulatorAllNodesDataFree(chCgroupEmulatorAllNodesData * data) +{ + if (!data) + return; + + virCgroupFree(data->emulatorCgroup); + g_free(data->emulatorMemMask); + g_free(data); +} + + +/** + * chCgroupEmulatorAllNodesAllow: + * @cgroup: domain cgroup pointer + * @retData: filled with structure used to roll back the operation + * + * Allows all NUMA nodes for the cloud hypervisor thread temporarily. This is + * necessary when hotplugging cpus since it requires memory allocated in the + * DMA region. Afterwards the operation can be reverted by + * chCgroupEmulatorAllNodesRestore. + * + * Returns 0 on success -1 on error + */ +int +chCgroupEmulatorAllNodesAllow(virCgroup * cgroup, + chCgroupEmulatorAllNodesData ** retData) +{ + chCgroupEmulatorAllNodesData *data = NULL; + g_autofree char *all_nodes_str = NULL; +
Extra line in the middle of var declarations.
+ g_autoptr(virBitmap) all_nodes = NULL; + int ret = -1; + + if (!virNumaIsAvailable() || + !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) + return 0; + + if (!(all_nodes = virNumaGetHostMemoryNodeset())) + goto cleanup; + + if (!(all_nodes_str = virBitmapFormat(all_nodes))) + goto cleanup; + + data = g_new0(chCgroupEmulatorAllNodesData, 1); + + if (virCgroupNewThread(cgroup, VIR_CGROUP_THREAD_EMULATOR, 0, + false, &data->emulatorCgroup) < 0) + goto cleanup; + + if (virCgroupGetCpusetMems(data->emulatorCgroup, &data->emulatorMemMask) < 0 + || virCgroupSetCpusetMems(data->emulatorCgroup, all_nodes_str) < 0) + goto cleanup; + + *retData = g_steal_pointer(&data); + ret = 0; + + cleanup:
Wrong label indentation.
+ chCgroupEmulatorAllNodesDataFree(data); + + return ret; +} + + +/** + * chCgroupEmulatorAllNodesRestore: + * @data: data structure created by chCgroupEmulatorAllNodesAllow + * + * Rolls back the setting done by chCgroupEmulatorAllNodesAllow and frees the + * associated data. + */ +void +chCgroupEmulatorAllNodesRestore(chCgroupEmulatorAllNodesData * data) +{ + virError *err; + + if (!data) + return; + + virErrorPreserveLast(&err); + virCgroupSetCpusetMems(data->emulatorCgroup, data->emulatorMemMask); + virErrorRestore(&err); + + chCgroupEmulatorAllNodesDataFree(data); +} diff --git a/src/ch/ch_cgroup.h b/src/ch/ch_cgroup.h new file mode 100644 index 0000000000..0152b5477c --- /dev/null +++ b/src/ch/ch_cgroup.h @@ -0,0 +1,45 @@ +/* + * ch_cgroup.h: CH cgroup management + * + * Copyright Microsoft Corp. 2020-2021 + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * <http://www.gnu.org/licenses/>. + */ + +#pragma once + +#include "virusb.h" +#include "vircgroup.h" +#include "domain_conf.h" +#include "ch_conf.h" + +int chConnectCgroup(virDomainObj * vm); +int chSetupCgroup(virDomainObj * vm, size_t nnicindexes, int *nicindexes); +int chSetupCgroupVcpuBW(virCgroup * cgroup, + unsigned long long period, long long quota); +int chSetupCgroupCpusetCpus(virCgroup * cgroup, virBitmap * cpumask); +int chSetupGlobalCpuCgroup(virDomainObj * vm); +int chRemoveCgroup(virDomainObj * vm); + +typedef struct _chCgroupEmulatorAllNodesData chCgroupEmulatorAllNodesData; + +struct _chCgroupEmulatorAllNodesData { + virCgroup *emulatorCgroup; + char *emulatorMemMask; +}; + +int chCgroupEmulatorAllNodesAllow(virCgroup * cgroup, + chCgroupEmulatorAllNodesData ** data); +void chCgroupEmulatorAllNodesRestore(chCgroupEmulatorAllNodesData * data); diff --git a/src/ch/ch_conf.c b/src/ch/ch_conf.c index ed0fffe5d6..7f70452296 100644 --- a/src/ch/ch_conf.c +++ b/src/ch/ch_conf.c @@ -141,6 +141,8 @@ virCHDriverConfigNew(bool privileged) if (!(cfg = virObjectNew(virCHDriverConfigClass))) return NULL;
+ cfg->cgroupControllers = -1; /* Auto detect */ + if (privileged) { if (virGetUserID(CH_USER, &cfg->user) < 0) return NULL; diff --git a/src/ch/ch_conf.h b/src/ch/ch_conf.h index 49f286f97a..19deb8e568 100644 --- a/src/ch/ch_conf.h +++ b/src/ch/ch_conf.h @@ -35,11 +35,13 @@ struct _virCHDriverConfig {
char *stateDir; char *logDir; - + int cgroupControllers; uid_t user; gid_t group; };
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(virCHDriverConfig, virObjectUnref); + struct _virCHDriver { virMutex lock; diff --git a/src/ch/ch_domain.c b/src/ch/ch_domain.c index e1030800aa..d0aaeed1f4 100644 --- a/src/ch/ch_domain.c +++ b/src/ch/ch_domain.c @@ -326,6 +326,39 @@ chValidateDomainDeviceDef(const virDomainDeviceDef *dev, _("Serial can only be enabled for a PTY")); return -1; } + return 0; +} +int +virCHDomainRefreshThreadInfo(virDomainObj *vm) +{ + size_t maxvcpus = virDomainDefGetVcpusMax(vm->def); + virCHMonitorThreadInfo *info = NULL; + size_t nthreads, ncpus = 0; + size_t i; + + nthreads = virCHMonitorGetThreadInfo(virCHDomainGetMonitor(vm), + true, &info); + + for (i = 0; i < nthreads; i++) { + virCHDomainVcpuPrivate *vcpupriv; + virDomainVcpuDef *vcpu; + virCHMonitorCPUInfo *vcpuInfo; + + if (info[i].type != virCHThreadTypeVcpu) + continue; + + // TODO: hotplug support + vcpuInfo = &info[i].vcpuInfo; + vcpu = virDomainDefGetVcpu(vm->def, vcpuInfo->cpuid); + vcpupriv = CH_DOMAIN_VCPU_PRIVATE(vcpu); + vcpupriv->tid = vcpuInfo->tid; + ncpus++; + } + + // TODO: Remove the warning when hotplug is implemented. + if (ncpus != maxvcpus) + VIR_WARN("Mismatch in the number of cpus, expected: %ld, actual: %ld", + maxvcpus, ncpus);
return 0; } diff --git a/src/ch/ch_domain.h b/src/ch/ch_domain.h index 3ac3421015..2ce3e2cef3 100644 --- a/src/ch/ch_domain.h +++ b/src/ch/ch_domain.h @@ -89,7 +89,8 @@ virCHDomainObjBeginJob(virDomainObj *obj, enum virCHDomainJob job) void virCHDomainObjEndJob(virDomainObj *obj);
-int virCHDomainRefreshVcpuInfo(virDomainObj *vm); +int virCHDomainRefreshThreadInfo(virDomainObj *vm); + pid_t virCHDomainGetVcpuPid(virDomainObj *vm, unsigned int vcpuid); bool virCHDomainHasVcpuPids(virDomainObj *vm);
diff --git a/src/ch/ch_monitor.c b/src/ch/ch_monitor.c index c0ae031200..095779cb3f 100644 --- a/src/ch/ch_monitor.c +++ b/src/ch/ch_monitor.c @@ -41,6 +41,7 @@ VIR_LOG_INIT("ch.ch_monitor");
static virClass *virCHMonitorClass; static void virCHMonitorDispose(void *obj); +static void virCHMonitorThreadInfoFree(virCHMonitor *mon);
static int virCHMonitorOnceInit(void) { @@ -571,6 +572,7 @@ static void virCHMonitorDispose(void *opaque) virCHMonitor *mon = opaque;
VIR_DEBUG("mon=%p", mon); + virCHMonitorThreadInfoFree(mon); virObjectUnref(mon->vm); }
@@ -736,6 +738,114 @@ virCHMonitorGet(virCHMonitor *mon, const char *endpoint, virJSONValue **response return ret; }
+/** + * virCHMonitorGetInfo: + * @mon: Pointer to the monitor + * @info: Get VM info + * + * Retrieve the VM info and store in @info + * + * Returns 0 on success. + */ +int +virCHMonitorGetInfo(virCHMonitor *mon, virJSONValue **info) +{ + return virCHMonitorGet(mon, URL_VM_INFO, info); +} + +static void +virCHMonitorThreadInfoFree(virCHMonitor *mon) +{ + mon->nthreads = 0; + if (mon->threads) + VIR_FREE(mon->threads); +} + +static size_t +virCHMonitorRefreshThreadInfo(virCHMonitor *mon) +{ + virCHMonitorThreadInfo *info = NULL; + g_autofree pid_t *tids = NULL; + virDomainObj *vm = mon->vm; + size_t ntids = 0; + size_t i; + + + virCHMonitorThreadInfoFree(mon); + if (virProcessGetPids(vm->pid, &ntids, &tids) < 0) { + mon->threads = NULL; + return 0; + } + + info = g_new0(virCHMonitorThreadInfo, ntids); + for (i = 0; i < ntids; i++) { + g_autofree char *proc = NULL; + g_autofree char *data = NULL; + + proc = g_strdup_printf("/proc/%d/task/%d/comm", + (int)vm->pid, (int)tids[i]); + + if (virFileReadAll(proc, (1<<16), &data) < 0) { + continue; + } + + VIR_DEBUG("VM PID: %d, TID %d, COMM: %s", + (int)vm->pid, (int)tids[i], data); + if (STRPREFIX(data, "vcpu")) { + int cpuid; + char *tmp; + if (virStrToLong_i(data + 4, &tmp, 0, &cpuid) < 0) { + VIR_WARN("Index is not specified correctly"); + continue; + } + info[i].type = virCHThreadTypeVcpu; + info[i].vcpuInfo.tid = tids[i]; + info[i].vcpuInfo.online = true; + info[i].vcpuInfo.cpuid = cpuid; + VIR_DEBUG("vcpu%d -> tid: %d", cpuid, tids[i]); + } else if (STRPREFIX(data, "_disk") || STRPREFIX(data, "_net") || + STRPREFIX(data, "_rng")) { + /* Prefixes used by cloud-hypervisor for IO Threads are captured at + https://github.com/cloud-hypervisor/cloud-hypervisor/blob/main/vmm/src/devic... */ + info[i].type = virCHThreadTypeIO; + info[i].ioInfo.tid = tids[i]; + virStrcpy(info[i].ioInfo.thrName, data, VIRCH_THREAD_NAME_LEN - 1); + }else { + info[i].type = virCHThreadTypeEmulator; + info[i].emuInfo.tid = tids[i]; + virStrcpy(info[i].emuInfo.thrName, data, VIRCH_THREAD_NAME_LEN - 1); + } + mon->nthreads++; + + } + mon->threads = info; + + return mon->nthreads; +} + +/** + * virCHMonitorGetThreadInfo: + * @mon: Pointer to the monitor + * @refresh: Refresh thread info or not + * + * Retrive thread info and store to @threads + * + * Returns count of threads on success. + */ +size_t +virCHMonitorGetThreadInfo(virCHMonitor *mon, bool refresh, + virCHMonitorThreadInfo **threads) +{ + int nthreads = 0; + + if (refresh) + nthreads = virCHMonitorRefreshThreadInfo(mon); + + *threads = mon->threads; + + return nthreads; +} + int virCHMonitorShutdownVMM(virCHMonitor *mon) { @@ -810,18 +920,3 @@ virCHMonitorResumeVM(virCHMonitor *mon) { return virCHMonitorPutNoContent(mon, URL_VM_RESUME); } - -/** - * virCHMonitorGetInfo: - * @mon: Pointer to the monitor - * @info: Get VM info - * - * Retrieve the VM info and store in @info - * - * Returns 0 on success. - */ -int -virCHMonitorGetInfo(virCHMonitor *mon, virJSONValue **info) -{ - return virCHMonitorGet(mon, URL_VM_INFO, info); -} diff --git a/src/ch/ch_monitor.h b/src/ch/ch_monitor.h index 8ca9e17a9a..f8c3fa75e8 100644 --- a/src/ch/ch_monitor.h +++ b/src/ch/ch_monitor.h @@ -37,6 +37,50 @@ #define URL_VM_RESUME "vm.resume" #define URL_VM_INFO "vm.info"
+#define VIRCH_THREAD_NAME_LEN 16 + +typedef enum { + virCHThreadTypeEmulator, + virCHThreadTypeVcpu, + virCHThreadTypeIO, + virCHThreadTypeMax +} virCHThreadType; + +typedef struct _virCHMonitorCPUInfo virCHMonitorCPUInfo; + +struct _virCHMonitorCPUInfo { + int cpuid; + pid_t tid; + + bool online; +}; + +typedef struct _virCHMonitorEmuThreadInfo virCHMonitorEmuThreadInfo; + +struct _virCHMonitorEmuThreadInfo { + char thrName[VIRCH_THREAD_NAME_LEN]; + pid_t tid; +}; + +typedef struct _virCHMonitorIOThreadInfo virCHMonitorIOThreadInfo; + +struct _virCHMonitorIOThreadInfo { + char thrName[VIRCH_THREAD_NAME_LEN]; + pid_t tid; +}; + +typedef struct _virCHMonitorThreadInfo virCHMonitorThreadInfo; + +struct _virCHMonitorThreadInfo { + virCHThreadType type; + + union { + virCHMonitorCPUInfo vcpuInfo; + virCHMonitorEmuThreadInfo emuInfo; + virCHMonitorIOThreadInfo ioInfo; + }; +}; + typedef struct _virCHMonitor virCHMonitor;
struct _virCHMonitor { @@ -49,6 +93,9 @@ struct _virCHMonitor { pid_t pid;
virDomainObj *vm; + + size_t nthreads; + virCHMonitorThreadInfo *threads; };
virCHMonitor *virCHMonitorNew(virDomainObj *vm, const char *socketdir); @@ -65,12 +112,9 @@ int virCHMonitorSuspendVM(virCHMonitor *mon); int virCHMonitorResumeVM(virCHMonitor *mon); int virCHMonitorGetInfo(virCHMonitor *mon, virJSONValue **info);
-typedef struct _virCHMonitorCPUInfo virCHMonitorCPUInfo; -struct _virCHMonitorCPUInfo { - pid_t tid; - bool online; -}; void virCHMonitorCPUInfoFree(virCHMonitorCPUInfo *cpus); int virCHMonitorGetCPUInfo(virCHMonitor *mon, virCHMonitorCPUInfo **vcpus, size_t maxvcpus); +size_t virCHMonitorGetThreadInfo(virCHMonitor *mon, bool refresh, + virCHMonitorThreadInfo **threads); diff --git a/src/ch/ch_process.c b/src/ch/ch_process.c index 3b7f6fcddf..8dce737adb 100644 --- a/src/ch/ch_process.c +++ b/src/ch/ch_process.c @@ -26,6 +26,8 @@ #include "ch_domain.h" #include "ch_monitor.h" #include "ch_process.h" +#include "ch_cgroup.h" +#include "virnuma.h" #include "viralloc.h" #include "virerror.h" #include "virjson.h" @@ -133,6 +135,257 @@ virCHProcessUpdateInfo(virDomainObj *vm) return 0; }
+static int +virCHProcessGetAllCpuAffinity(virBitmap **cpumapRet) +{ + *cpumapRet = NULL; + + if (!virHostCPUHasBitmap()) + return 0; + + if (!(*cpumapRet = virHostCPUGetOnlineBitmap())) + return -1; + + return 0; +} + +#if defined(WITH_SCHED_GETAFFINITY) || defined(WITH_BSD_CPU_AFFINITY) +static int +virCHProcessInitCpuAffinity(virDomainObj *vm) +{ + g_autoptr(virBitmap) cpumapToSet = NULL; + virDomainNumatuneMemMode mem_mode; + virCHDomainObjPrivate *priv = vm->privateData; + + if (!vm->pid) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Cannot setup CPU affinity until process is started")); + return -1; + } + + if (virDomainNumaGetNodeCount(vm->def->numa) <= 1 && + virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 && + mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) { + virBitmap *nodeset = NULL; + + if (virDomainNumatuneMaybeGetNodeset(vm->def->numa, + priv->autoNodeset, + &nodeset, + -1) < 0) + return -1; + + if (virNumaNodesetToCPUset(nodeset, &cpumapToSet) < 0) + return -1; + } else if (vm->def->cputune.emulatorpin) { + if (!(cpumapToSet = virBitmapNewCopy(vm->def->cputune.emulatorpin))) + return -1; + } else { + if (virCHProcessGetAllCpuAffinity(&cpumapToSet) < 0) + return -1; + } + + if (cpumapToSet && + virProcessSetAffinity(vm->pid, cpumapToSet, false) < 0) { + return -1; + } + + return 0; +} +#else /* !defined(WITH_SCHED_GETAFFINITY) && !defined(WITH_BSD_CPU_AFFINITY) */ +static int +virCHProcessInitCpuAffinity(virDomainObj *vm G_GNUC_UNUSED) +{ + return 0; +} +#endif /* !defined(WITH_SCHED_GETAFFINITY) && !defined(WITH_BSD_CPU_AFFINITY) */ + +/** + * virCHProcessSetupPid: + * + * This function sets resource properties (affinity, cgroups, + * scheduler) for any PID associated with a domain. It should be used + * to set up emulator PIDs as well as vCPU and I/O thread pids to + * ensure they are all handled the same way. + * + * Returns 0 on success, -1 on error. + */ +static int +virCHProcessSetupPid(virDomainObj *vm, + pid_t pid, + virCgroupThreadName nameval, + int id, + virBitmap *cpumask, + unsigned long long period, + long long quota, + virDomainThreadSchedParam *sched) +{ + virCHDomainObjPrivate *priv = vm->privateData; + virDomainNumatuneMemMode mem_mode; + virCgroup *cgroup = NULL; + virBitmap *use_cpumask = NULL; + virBitmap *affinity_cpumask = NULL; + g_autoptr(virBitmap) hostcpumap = NULL; + g_autofree char *mem_mask = NULL; + int ret = -1; + + if ((period || quota) && + !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("cgroup cpu is required for scheduler tuning")); + goto cleanup; + } + + /* Infer which cpumask shall be used. */ + if (cpumask) { + use_cpumask = cpumask; + } else if (vm->def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) { + use_cpumask = priv->autoCpuset; + } else if (vm->def->cpumask) { + use_cpumask = vm->def->cpumask; + } else { + /* we can't assume cloud-hypervisor itself is running on all pCPUs, + * so we need to explicitly set the spawned instance to all pCPUs. */ + if (virCHProcessGetAllCpuAffinity(&hostcpumap) < 0) + goto cleanup; + affinity_cpumask = hostcpumap; + } + + /* + * If CPU cgroup controller is not initialized here, then we need + * neither period nor quota settings. And if CPUSET controller is + * not initialized either, then there's nothing to do anyway. + */ + if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU) || + virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) { + + if (virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 && + mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT && + virDomainNumatuneMaybeFormatNodeset(vm->def->numa, + priv->autoNodeset, + &mem_mask, -1) < 0) + goto cleanup; + + if (virCgroupNewThread(priv->cgroup, nameval, id, true, &cgroup) < 0) + goto cleanup; + + if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) { + if (use_cpumask && + chSetupCgroupCpusetCpus(cgroup, use_cpumask) < 0) + goto cleanup; + + if (mem_mask && virCgroupSetCpusetMems(cgroup, mem_mask) < 0) + goto cleanup; + + } + + if ((period || quota) && + chSetupCgroupVcpuBW(cgroup, period, quota) < 0) + goto cleanup; + + /* Move the thread to the sub dir */ + VIR_INFO("Adding pid %d to cgroup", pid); + if (virCgroupAddThread(cgroup, pid) < 0) + goto cleanup; + + } + + if (!affinity_cpumask) + affinity_cpumask = use_cpumask; + + /* Setup legacy affinity. */ + if (affinity_cpumask && virProcessSetAffinity(pid, affinity_cpumask, false) < 0) + goto cleanup; + + /* Set scheduler type and priority, but not for the main thread. */ + if (sched && + nameval != VIR_CGROUP_THREAD_EMULATOR && + virProcessSetScheduler(pid, sched->policy, sched->priority) < 0) + goto cleanup; + + ret = 0; + cleanup: + if (cgroup) { + if (ret < 0) + virCgroupRemove(cgroup); + virCgroupFree(cgroup); + } + + return ret; +} + +/** + * virCHProcessSetupVcpu: + * @vm: domain object + * @vcpuid: id of VCPU to set defaults + * + * This function sets resource properties (cgroups, affinity, scheduler) for a + * vCPU. This function expects that the vCPU is online and the vCPU pids were + * correctly detected at the point when it's called. + * + * Returns 0 on success, -1 on error. + */ +int +virCHProcessSetupVcpu(virDomainObj *vm, + unsigned int vcpuid) +{ + pid_t vcpupid = virCHDomainGetVcpuPid(vm, vcpuid); + virDomainVcpuDef *vcpu = virDomainDefGetVcpu(vm->def, vcpuid); + + return virCHProcessSetupPid(vm, vcpupid, VIR_CGROUP_THREAD_VCPU, + vcpuid, vcpu->cpumask, + vm->def->cputune.period, + vm->def->cputune.quota, + &vcpu->sched); +} + +static int +virCHProcessSetupVcpus(virDomainObj *vm) +{ + virDomainVcpuDef *vcpu; + unsigned int maxvcpus = virDomainDefGetVcpusMax(vm->def); + size_t i; + + if ((vm->def->cputune.period || vm->def->cputune.quota) && + !virCgroupHasController(((virCHDomainObjPrivate *) vm->privateData)->cgroup, + VIR_CGROUP_CONTROLLER_CPU)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("cgroup cpu is required for scheduler tuning")); + return -1; + } + + if (!virCHDomainHasVcpuPids(vm)) { + /* If any CPU has custom affinity that differs from the + * VM default affinity, we must reject it */ + for (i = 0; i < maxvcpus; i++) { + vcpu = virDomainDefGetVcpu(vm->def, i); + + if (!vcpu->online) + continue; + + if (vcpu->cpumask && + !virBitmapEqual(vm->def->cpumask, vcpu->cpumask)) { + virReportError(VIR_ERR_OPERATION_INVALID, "%s", + _("cpu affinity is not supported")); + return -1; + } + } + + return 0; + } + + for (i = 0; i < maxvcpus; i++) { + vcpu = virDomainDefGetVcpu(vm->def, i); + + if (!vcpu->online) + continue; + + if (virCHProcessSetupVcpu(vm, i) < 0) + return -1; + } + + return 0; +} + /** * virCHProcessStart: * @driver: pointer to driver structure @@ -168,18 +421,33 @@ int virCHProcessStart(virCHDriver *driver, } }
+ vm->pid = priv->monitor->pid; + vm->def->id = vm->pid; + priv->machineName = virCHDomainGetMachineName(vm); + + if (chSetupCgroup(vm, nnicindexes, nicindexes) < 0) + goto cleanup; + + if (virCHProcessInitCpuAffinity(vm) < 0) + goto cleanup; + if (virCHMonitorBootVM(priv->monitor) < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("failed to boot guest VM")); goto cleanup; }
- priv->machineName = virCHDomainGetMachineName(vm); - vm->pid = priv->monitor->pid; - vm->def->id = vm->pid; + virCHDomainRefreshThreadInfo(vm);
- virCHProcessUpdateInfo(vm); + VIR_DEBUG("Setting global CPU cgroup (if required)"); + if (chSetupGlobalCpuCgroup(vm) < 0) + goto cleanup; + + VIR_DEBUG("Setting vCPU tuning/settings"); + if (virCHProcessSetupVcpus(vm) < 0) + goto cleanup;
+ virCHProcessUpdateInfo(vm); virDomainObjSetState(vm, VIR_DOMAIN_RUNNING, reason);
return 0; @@ -195,6 +463,8 @@ int virCHProcessStop(virCHDriver *driver G_GNUC_UNUSED, virDomainObj *vm, virDomainShutoffReason reason) { + int ret; + int retries = 0; virCHDomainObjPrivate *priv = vm->privateData;
VIR_DEBUG("Stopping VM name=%s pid=%d reason=%d", @@ -205,6 +475,16 @@ int virCHProcessStop(virCHDriver *driver G_GNUC_UNUSED, priv->monitor = NULL; }
+ retry:
Wrong label indentation. Daniel
+ if ((ret = chRemoveCgroup(vm)) < 0) { + if (ret == -EBUSY && (retries++ < 5)) { + g_usleep(200*1000); + goto retry; + } + VIR_WARN("Failed to remove cgroup for %s", + vm->def->name); + } + vm->pid = -1; vm->def->id = -1;
diff --git a/src/ch/ch_process.h b/src/ch/ch_process.h index abc4915979..800e3f4e23 100644 --- a/src/ch/ch_process.h +++ b/src/ch/ch_process.h @@ -29,3 +29,6 @@ int virCHProcessStart(virCHDriver *driver, int virCHProcessStop(virCHDriver *driver, virDomainObj *vm, virDomainShutoffReason reason); + +int virCHProcessSetupVcpu(virDomainObj *vm, + unsigned int vcpuid); diff --git a/src/ch/meson.build b/src/ch/meson.build index 2b2bdda26c..0b20de56fd 100644 --- a/src/ch/meson.build +++ b/src/ch/meson.build @@ -1,6 +1,8 @@ ch_driver_sources = [ 'ch_conf.c', 'ch_conf.h', + 'ch_cgroup.c', + 'ch_cgroup.h', 'ch_domain.c', 'ch_domain.h', 'ch_driver.c',

Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- src/ch/ch_domain.c | 30 +++++++++ src/ch/ch_domain.h | 1 + src/ch/ch_driver.c | 151 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 182 insertions(+) diff --git a/src/ch/ch_domain.c b/src/ch/ch_domain.c index d0aaeed1f4..9ad00583aa 100644 --- a/src/ch/ch_domain.c +++ b/src/ch/ch_domain.c @@ -20,6 +20,7 @@ #include <config.h> +#include "datatypes.h" #include "ch_domain.h" #include "domain_driver.h" #include "viralloc.h" @@ -420,3 +421,32 @@ char *virCHDomainGetMachineName(virDomainObj *vm) return ret; } + +/** + * virCHDomainObjFromDomain: + * @domain: Domain pointer that has to be looked up + * + * This function looks up @domain and returns the appropriate virDomainObjPtr + * that has to be released by calling virDomainObjEndAPI(). + * + * Returns the domain object with incremented reference counter which is locked + * on success, NULL otherwise. + */ +virDomainObj * +virCHDomainObjFromDomain(virDomainPtr domain) +{ + virDomainObj *vm; + virCHDriver *driver = domain->conn->privateData; + char uuidstr[VIR_UUID_STRING_BUFLEN]; + + vm = virDomainObjListFindByUUID(driver->domains, domain->uuid); + if (!vm) { + virUUIDFormat(domain->uuid, uuidstr); + virReportError(VIR_ERR_NO_DOMAIN, + _("no domain with matching uuid '%s' (%s)"), + uuidstr, domain->name); + return NULL; + } + + return vm; +} diff --git a/src/ch/ch_domain.h b/src/ch/ch_domain.h index 2ce3e2cef3..db1451405b 100644 --- a/src/ch/ch_domain.h +++ b/src/ch/ch_domain.h @@ -95,3 +95,4 @@ pid_t virCHDomainGetVcpuPid(virDomainObj *vm, unsigned int vcpuid); bool virCHDomainHasVcpuPids(virDomainObj *vm); char *virCHDomainGetMachineName(virDomainObj *vm); +virDomainObj *virCHDomainObjFromDomain(virDomain *domain); diff --git a/src/ch/ch_driver.c b/src/ch/ch_driver.c index ca854da123..7f3cf6dbef 100644 --- a/src/ch/ch_driver.c +++ b/src/ch/ch_driver.c @@ -25,6 +25,7 @@ #include "ch_driver.h" #include "ch_monitor.h" #include "ch_process.h" +#include "ch_cgroup.h" #include "datatypes.h" #include "driver.h" #include "viraccessapicheck.h" @@ -1141,6 +1142,154 @@ chDomainGetVcpus(virDomainPtr dom, return ret; } +static int +chDomainPinVcpuLive(virDomainObj *vm, + virDomainDef *def, + int vcpu, + virCHDriver *driver, + virCHDriverConfig *cfg, + virBitmap *cpumap) +{ + virBitmap *tmpmap = NULL; + virDomainVcpuDef *vcpuinfo; + virCHDomainObjPrivate *priv = vm->privateData; + virCgroup *cgroup_vcpu = NULL; + g_autofree char *str = NULL; + int ret = -1; + + if (!virCHDomainHasVcpuPids(vm)) { + virReportError(VIR_ERR_OPERATION_INVALID, + "%s", _("cpu affinity is not supported")); + goto cleanup; + } + + if (!(vcpuinfo = virDomainDefGetVcpu(def, vcpu))) { + virReportError(VIR_ERR_INVALID_ARG, + _("vcpu %d is out of range of live cpu count %d"), + vcpu, virDomainDefGetVcpusMax(def)); + goto cleanup; + } + + if (!(tmpmap = virBitmapNewCopy(cpumap))) + goto cleanup; + + if (!(str = virBitmapFormat(cpumap))) + goto cleanup; + + if (vcpuinfo->online) { + /* Configure the corresponding cpuset cgroup before set affinity. */ + if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) { + if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_VCPU, vcpu, + false, &cgroup_vcpu) < 0) + goto cleanup; + if (chSetupCgroupCpusetCpus(cgroup_vcpu, cpumap) < 0) + goto cleanup; + } + + if (virProcessSetAffinity(virCHDomainGetVcpuPid(vm, vcpu), cpumap, false) < 0) + goto cleanup; + } + + virBitmapFree(vcpuinfo->cpumask); + vcpuinfo->cpumask = tmpmap; + tmpmap = NULL; + + if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0) + goto cleanup; + + ret = 0; + + cleanup: + virBitmapFree(tmpmap); + virCgroupFree(cgroup_vcpu); + return ret; +} + + +static int +chDomainPinVcpuFlags(virDomainPtr dom, + unsigned int vcpu, + unsigned char *cpumap, + int maplen, + unsigned int flags) +{ + virCHDriver *driver = dom->conn->privateData; + virDomainObj *vm; + virDomainDef *def; + virDomainDef *persistentDef; + int ret = -1; + virBitmap *pcpumap = NULL; + virDomainVcpuDef *vcpuinfo = NULL; + g_autoptr(virCHDriverConfig) cfg = NULL; + + virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | + VIR_DOMAIN_AFFECT_CONFIG, -1); + + cfg = virCHDriverGetConfig(driver); + + if (!(vm = virCHDomainObjFromDomain(dom))) + goto cleanup; + + if (virDomainPinVcpuFlagsEnsureACL(dom->conn, vm->def, flags) < 0) + goto cleanup; + + if (virCHDomainObjBeginJob(vm, CH_JOB_MODIFY) < 0) + goto cleanup; + + if (virDomainObjGetDefs(vm, flags, &def, &persistentDef) < 0) + goto endjob; + + if (persistentDef && + !(vcpuinfo = virDomainDefGetVcpu(persistentDef, vcpu))) { + virReportError(VIR_ERR_INVALID_ARG, + _("vcpu %d is out of range of persistent cpu count %d"), + vcpu, virDomainDefGetVcpus(persistentDef)); + goto endjob; + } + + if (!(pcpumap = virBitmapNewData(cpumap, maplen))) + goto endjob; + + if (virBitmapIsAllClear(pcpumap)) { + virReportError(VIR_ERR_INVALID_ARG, "%s", + _("Empty cpu list for pinning")); + goto endjob; + } + + if (def && + chDomainPinVcpuLive(vm, def, vcpu, driver, cfg, pcpumap) < 0) + goto endjob; + + if (persistentDef) { + virBitmapFree(vcpuinfo->cpumask); + vcpuinfo->cpumask = pcpumap; + pcpumap = NULL; + + // ret = virDomainDefSave(persistentDef, driver->xmlopt, cfg->configDir); + goto endjob; + } + + ret = 0; + + endjob: + virCHDomainObjEndJob(vm); + + cleanup: + virDomainObjEndAPI(&vm); + virBitmapFree(pcpumap); + return ret; +} + +static int +chDomainPinVcpu(virDomainPtr dom, + unsigned int vcpu, + unsigned char *cpumap, + int maplen) +{ + return chDomainPinVcpuFlags(dom, vcpu, cpumap, maplen, + VIR_DOMAIN_AFFECT_LIVE); +} + /* Function Tables */ static virHypervisorDriver chHypervisorDriver = { .name = "CH", @@ -1181,6 +1330,8 @@ static virHypervisorDriver chHypervisorDriver = { .domainGetVcpusFlags = chDomainGetVcpusFlags, /* 7.9.0 */ .domainGetMaxVcpus = chDomainGetMaxVcpus, /* 7.9.0 */ .domainGetVcpuPinInfo = chDomainGetVcpuPinInfo, /* 7.9.0 */ + .domainPinVcpu = chDomainPinVcpu, /* 7.9.0 */ + .domainPinVcpuFlags = chDomainPinVcpuFlags, /* 7.9.0 */ .nodeGetCPUMap = chNodeGetCPUMap, /* 7.9.0 */ }; -- 2.27.0

Enable support of VIR_DRV_FEATURE_TYPED_PARAM_STRING to enable numatune Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- src/ch/ch_driver.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/ch/ch_driver.c b/src/ch/ch_driver.c index 7f3cf6dbef..9ad23c1710 100644 --- a/src/ch/ch_driver.c +++ b/src/ch/ch_driver.c @@ -955,6 +955,36 @@ static int chStateInitialize(bool privileged, return ret; } +/* Which features are supported by this driver? */ +static int +chConnectSupportsFeature(virConnectPtr conn, int feature) +{ + if (virConnectSupportsFeatureEnsureACL(conn) < 0) + return -1; + + switch ((virDrvFeature) feature) { + case VIR_DRV_FEATURE_TYPED_PARAM_STRING: + return 1; + case VIR_DRV_FEATURE_MIGRATION_V2: + case VIR_DRV_FEATURE_MIGRATION_V3: + case VIR_DRV_FEATURE_MIGRATION_P2P: + case VIR_DRV_FEATURE_MIGRATE_CHANGE_PROTECTION: + case VIR_DRV_FEATURE_FD_PASSING: + case VIR_DRV_FEATURE_XML_MIGRATABLE: + case VIR_DRV_FEATURE_MIGRATION_OFFLINE: + case VIR_DRV_FEATURE_MIGRATION_PARAMS: + case VIR_DRV_FEATURE_MIGRATION_DIRECT: + case VIR_DRV_FEATURE_MIGRATION_V1: + case VIR_DRV_FEATURE_PROGRAM_KEEPALIVE: + case VIR_DRV_FEATURE_REMOTE: + case VIR_DRV_FEATURE_REMOTE_CLOSE_CALLBACK: + case VIR_DRV_FEATURE_REMOTE_EVENT_CALLBACK: + case VIR_DRV_FEATURE_NETWORK_UPDATE_HAS_CORRECT_ORDER: + default: + return 0; + } +} + static int chDomainGetVcpusFlags(virDomainPtr dom, unsigned int flags) { @@ -1303,6 +1333,7 @@ static virHypervisorDriver chHypervisorDriver = { .connectListAllDomains = chConnectListAllDomains, /* 7.5.0 */ .connectListDomains = chConnectListDomains, /* 7.5.0 */ .connectGetCapabilities = chConnectGetCapabilities, /* 7.5.0 */ + .connectSupportsFeature = chConnectSupportsFeature, /* 7.8.0 */ .domainCreateXML = chDomainCreateXML, /* 7.5.0 */ .domainCreate = chDomainCreate, /* 7.5.0 */ .domainCreateWithFlags = chDomainCreateWithFlags, /* 7.5.0 */ -- 2.27.0

Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> --- src/ch/ch_driver.c | 278 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 278 insertions(+) diff --git a/src/ch/ch_driver.c b/src/ch/ch_driver.c index 9ad23c1710..97dfda85e1 100644 --- a/src/ch/ch_driver.c +++ b/src/ch/ch_driver.c @@ -43,6 +43,7 @@ #include "viruri.h" #include "virutil.h" #include "viruuid.h" +#include "virnuma.h" #define VIR_FROM_THIS VIR_FROM_CH @@ -1320,6 +1321,281 @@ chDomainPinVcpu(virDomainPtr dom, VIR_DOMAIN_AFFECT_LIVE); } +#define CH_NB_NUMA_PARAM 2 + +static int +chDomainGetNumaParameters(virDomainPtr dom, + virTypedParameterPtr params, + int *nparams, + unsigned int flags) +{ + size_t i; + virDomainObj *vm = NULL; + virDomainNumatuneMemMode tmpmode = VIR_DOMAIN_NUMATUNE_MEM_STRICT; + virCHDomainObjPrivate *priv; + g_autofree char *nodeset = NULL; + int ret = -1; + virDomainDef *def = NULL; + bool live = false; + virBitmap *autoNodeset = NULL; + + virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | + VIR_DOMAIN_AFFECT_CONFIG | + VIR_TYPED_PARAM_STRING_OKAY, -1); + + if (!(vm = virCHDomainObjFromDomain(dom))) + return -1; + priv = vm->privateData; + + if (virDomainGetNumaParametersEnsureACL(dom->conn, vm->def) < 0) + goto cleanup; + + if (!(def = virDomainObjGetOneDefState(vm, flags, &live))) + goto cleanup; + + if (live) + autoNodeset = priv->autoNodeset; + + if ((*nparams) == 0) { + *nparams = CH_NB_NUMA_PARAM; + ret = 0; + goto cleanup; + } + + for (i = 0; i < CH_NB_NUMA_PARAM && i < *nparams; i++) { + virMemoryParameterPtr param = ¶ms[i]; + + switch (i) { + case 0: /* fill numa mode here */ + ignore_value(virDomainNumatuneGetMode(def->numa, -1, &tmpmode)); + + if (virTypedParameterAssign(param, VIR_DOMAIN_NUMA_MODE, + VIR_TYPED_PARAM_INT, tmpmode) < 0) + goto cleanup; + + break; + + case 1: /* fill numa nodeset here */ + nodeset = virDomainNumatuneFormatNodeset(def->numa, autoNodeset, -1); + + if (!nodeset || + virTypedParameterAssign(param, VIR_DOMAIN_NUMA_NODESET, + VIR_TYPED_PARAM_STRING, nodeset) < 0) + goto cleanup; + + nodeset = NULL; + break; + + /* coverity[dead_error_begin] */ + default: + break; + /* should not hit here */ + } + } + + if (*nparams > CH_NB_NUMA_PARAM) + *nparams = CH_NB_NUMA_PARAM; + ret = 0; + + cleanup: + virDomainObjEndAPI(&vm); + return ret; +} + +static int +chDomainSetNumaParamsLive(virDomainObj *vm, + virBitmap *nodeset) +{ + virCgroup *cgroup_temp = NULL; + virCHDomainObjPrivate *priv = vm->privateData; + g_autofree char *nodeset_str = NULL; + virDomainNumatuneMemMode mode; + size_t i = 0; + int ret = -1; + + if (virDomainNumatuneGetMode(vm->def->numa, -1, &mode) == 0 && + mode != VIR_DOMAIN_NUMATUNE_MEM_STRICT) { + virReportError(VIR_ERR_OPERATION_INVALID, "%s", + _("change of nodeset for running domain " + "requires strict numa mode")); + goto cleanup; + } + + if (!virNumaNodesetIsAvailable(nodeset)) + goto cleanup; + + /* Ensure the cpuset string is formatted before passing to cgroup */ + if (!(nodeset_str = virBitmapFormat(nodeset))) + goto cleanup; + + if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_EMULATOR, 0, + false, &cgroup_temp) < 0 || + virCgroupSetCpusetMems(cgroup_temp, nodeset_str) < 0) + goto cleanup; + virCgroupFree(cgroup_temp); + + for (i = 0; i < virDomainDefGetVcpusMax(vm->def); i++) { + virDomainVcpuDef *vcpu = virDomainDefGetVcpu(vm->def, i); + + if (!vcpu->online) + continue; + + if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_VCPU, i, + false, &cgroup_temp) < 0 || + virCgroupSetCpusetMems(cgroup_temp, nodeset_str) < 0) + goto cleanup; + virCgroupFree(cgroup_temp); + } + + for (i = 0; i < vm->def->niothreadids; i++) { + if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_IOTHREAD, + vm->def->iothreadids[i]->iothread_id, + false, &cgroup_temp) < 0 || + virCgroupSetCpusetMems(cgroup_temp, nodeset_str) < 0) + goto cleanup; + virCgroupFree(cgroup_temp); + } + + /* set nodeset for root cgroup */ + if (virCgroupSetCpusetMems(priv->cgroup, nodeset_str) < 0) + goto cleanup; + + ret = 0; + cleanup: + virCgroupFree(cgroup_temp); + + return ret; +} + +static int +chDomainSetNumaParameters(virDomainPtr dom, + virTypedParameterPtr params, + int nparams, + unsigned int flags) +{ + virCHDriver *driver = dom->conn->privateData; + size_t i; + virDomainDef *def; + virDomainDef *persistentDef; + virDomainObj *vm = NULL; + int ret = -1; + g_autoptr(virCHDriverConfig) cfg = NULL; + virCHDomainObjPrivate *priv; + virBitmap *nodeset = NULL; + virDomainNumatuneMemMode config_mode; + int mode = -1; + + virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | + VIR_DOMAIN_AFFECT_CONFIG, -1); + + if (virTypedParamsValidate(params, nparams, + VIR_DOMAIN_NUMA_MODE, + VIR_TYPED_PARAM_INT, + VIR_DOMAIN_NUMA_NODESET, + VIR_TYPED_PARAM_STRING, + NULL) < 0) + return -1; + + if (!(vm = virCHDomainObjFromDomain(dom))) + return -1; + + priv = vm->privateData; + cfg = virCHDriverGetConfig(driver); + + if (virDomainSetNumaParametersEnsureACL(dom->conn, vm->def, flags) < 0) + goto cleanup; + + for (i = 0; i < nparams; i++) { + virTypedParameterPtr param = ¶ms[i]; + + if (STREQ(param->field, VIR_DOMAIN_NUMA_MODE)) { + mode = param->value.i; + + if (mode < 0 || mode >= VIR_DOMAIN_NUMATUNE_MEM_LAST) { + virReportError(VIR_ERR_INVALID_ARG, + _("unsupported numatune mode: '%d'"), mode); + goto cleanup; + } + + } else if (STREQ(param->field, VIR_DOMAIN_NUMA_NODESET)) { + if (virBitmapParse(param->value.s, &nodeset, + VIR_DOMAIN_CPUMASK_LEN) < 0) + goto cleanup; + + if (virBitmapIsAllClear(nodeset)) { + virReportError(VIR_ERR_OPERATION_INVALID, + _("Invalid nodeset of 'numatune': %s"), + param->value.s); + goto cleanup; + } + } + } + + if (virCHDomainObjBeginJob(vm, CH_JOB_MODIFY) < 0) + goto cleanup; + + if (virDomainObjGetDefs(vm, flags, &def, &persistentDef) < 0) + goto endjob; + + if (def) { + if (!driver->privileged) { + virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s", + _("NUMA tuning is not available in session mode")); + goto endjob; + } + + if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) { + virReportError(VIR_ERR_OPERATION_INVALID, "%s", + _("cgroup cpuset controller is not mounted")); + goto endjob; + } + + if (mode != -1 && + virDomainNumatuneGetMode(def->numa, -1, &config_mode) == 0 && + config_mode != mode) { + virReportError(VIR_ERR_OPERATION_INVALID, "%s", + _("can't change numatune mode for running domain")); + goto endjob; + } + + if (nodeset && + chDomainSetNumaParamsLive(vm, nodeset) < 0) + goto endjob; + + if (virDomainNumatuneSet(def->numa, + def->placement_mode == + VIR_DOMAIN_CPU_PLACEMENT_MODE_STATIC, + -1, mode, nodeset) < 0) + goto endjob; + + if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0) + goto endjob; + } + + /* + if (persistentDef) { + if (virDomainNumatuneSet(persistentDef->numa, + persistentDef->placement_mode == + VIR_DOMAIN_CPU_PLACEMENT_MODE_STATIC, + -1, mode, nodeset) < 0) + goto endjob; + + if (virDomainDefSave(persistentDef, driver->xmlopt, cfg->configDir) < 0) + goto endjob; + } + */ + + ret = 0; + + endjob: + virCHDomainObjEndJob(vm); + + cleanup: + virBitmapFree(nodeset); + virDomainObjEndAPI(&vm); + return ret; +} + /* Function Tables */ static virHypervisorDriver chHypervisorDriver = { .name = "CH", @@ -1364,6 +1640,8 @@ static virHypervisorDriver chHypervisorDriver = { .domainPinVcpu = chDomainPinVcpu, /* 7.9.0 */ .domainPinVcpuFlags = chDomainPinVcpuFlags, /* 7.9.0 */ .nodeGetCPUMap = chNodeGetCPUMap, /* 7.9.0 */ + .domainSetNumaParameters = chDomainSetNumaParameters, /* 7.9.0 */ + .domainGetNumaParameters = chDomainGetNumaParameters, /* 7.9.0 */ }; static virConnectDriver chConnectDriver = { -- 2.27.0

From: Praveen K Paladugu <prapal@linux.microsoft.com> using virCHProcessSetupPid Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com> --- src/ch/ch_monitor.c | 60 ++++++++++++++++++++++++++++++++++ src/ch/ch_monitor.h | 2 ++ src/ch/ch_process.c | 78 +++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 138 insertions(+), 2 deletions(-) diff --git a/src/ch/ch_monitor.c b/src/ch/ch_monitor.c index 095779cb3f..924d510a2f 100644 --- a/src/ch/ch_monitor.c +++ b/src/ch/ch_monitor.c @@ -920,3 +920,63 @@ virCHMonitorResumeVM(virCHMonitor *mon) { return virCHMonitorPutNoContent(mon, URL_VM_RESUME); } + +/** + * virCHMonitorGetIOThreads: + * @mon: Pointer to the monitor + * @iothreads: Location to return array of IOThreadInfo data + * + * Retrieve the list of iothreads defined/running for the machine + * + * Returns count of IOThreadInfo structures on success + * -1 on error. + */ +int virCHMonitorGetIOThreads(virCHMonitor *mon, + virDomainIOThreadInfo ***iothreads) +{ + size_t nthreads = 0, niothreads = 0; + int thd_index; + virDomainIOThreadInfo **iothreadinfolist = NULL, *iothreadinfo = NULL; + + *iothreads = NULL; + nthreads = virCHMonitorRefreshThreadInfo(mon); + + iothreadinfolist = g_new0(virDomainIOThreadInfo*, nthreads); + + for (thd_index = 0; thd_index < nthreads; thd_index++) { + virBitmap *map = NULL; + if (mon->threads[thd_index].type == virCHThreadTypeIO) { + iothreadinfo = g_new0(virDomainIOThreadInfo, 1); + + iothreadinfo->iothread_id = mon->threads[thd_index].ioInfo.tid; + + if (!(map = virProcessGetAffinity(iothreadinfo->iothread_id))) + goto cleanup; + + if (virBitmapToData(map, &(iothreadinfo->cpumap), + &(iothreadinfo->cpumaplen)) < 0) { + virBitmapFree(map); + goto cleanup; + } + virBitmapFree(map); + //Append to iothreadinfolist + iothreadinfolist[niothreads] = iothreadinfo; + niothreads++; + } + } + VIR_DELETE_ELEMENT_INPLACE(iothreadinfolist, + niothreads, nthreads); + *iothreads = iothreadinfolist; + VIR_DEBUG("niothreads = %ld", niothreads); + return niothreads; + + cleanup: + if (iothreadinfolist) { + for (thd_index = 0; thd_index < niothreads; thd_index++) + VIR_FREE(iothreadinfolist[thd_index]); + VIR_FREE(iothreadinfolist); + } + if (iothreadinfo) + VIR_FREE(iothreadinfo); + return -1; +} diff --git a/src/ch/ch_monitor.h b/src/ch/ch_monitor.h index f8c3fa75e8..98edb0faf9 100644 --- a/src/ch/ch_monitor.h +++ b/src/ch/ch_monitor.h @@ -118,3 +118,5 @@ int virCHMonitorGetCPUInfo(virCHMonitor *mon, size_t maxvcpus); size_t virCHMonitorGetThreadInfo(virCHMonitor *mon, bool refresh, virCHMonitorThreadInfo **threads); +int virCHMonitorGetIOThreads(virCHMonitor *mon, + virDomainIOThreadInfo ***iothreads); diff --git a/src/ch/ch_process.c b/src/ch/ch_process.c index 8dce737adb..25c2910f9c 100644 --- a/src/ch/ch_process.c +++ b/src/ch/ch_process.c @@ -41,7 +41,6 @@ VIR_LOG_INIT("ch.ch_process"); #define START_VM_POSTFIX ": starting up vm\n" - static virCHMonitor * virCHProcessConnectMonitor(virCHDriver *driver, virDomainObj *vm) @@ -131,7 +130,6 @@ virCHProcessUpdateInfo(virDomainObj *vm) virCHProcessUpdateConsole(vm, info); virJSONValueFree(info); - return 0; } @@ -313,6 +311,74 @@ virCHProcessSetupPid(virDomainObj *vm, return ret; } +static int +virCHProcessSetupIOThread(virDomainObj *vm, + virDomainIOThreadInfo *iothread) +{ + virCHDomainObjPrivate *priv = vm->privateData; + return virCHProcessSetupPid(vm, iothread->iothread_id, + VIR_CGROUP_THREAD_IOTHREAD, + iothread->iothread_id, + priv->autoCpuset, // This should be updated when CLH supports accepting + // iothread settings from input domain definition + vm->def->cputune.iothread_period, + vm->def->cputune.iothread_quota, + NULL); // CLH doesn't allow choosing a scheduler for iothreads. +} + +static int +virCHProcessSetupIOThreads(virDomainObj *vm) +{ + virCHDomainObjPrivate *priv = vm->privateData; + virDomainIOThreadInfo **iothreads = NULL; + size_t i; + size_t niothreads; + + niothreads = virCHMonitorGetIOThreads(priv->monitor, &iothreads); + for (i = 0; i < niothreads; i++) { + VIR_DEBUG("IOThread index = %ld , tid = %d", i, iothreads[i]->iothread_id); + if (virCHProcessSetupIOThread(vm, iothreads[i]) < 0) + return -1; + } + return 0; +} + + +static int +virCHProcessSetupEmulatorThread(virDomainObj *vm, + virCHMonitorEmuThreadInfo emuthread) +{ + return virCHProcessSetupPid(vm, emuthread.tid, VIR_CGROUP_THREAD_EMULATOR, + 0, vm->def->cputune.emulatorpin, + vm->def->cputune.emulator_period, + vm->def->cputune.emulator_quota, + vm->def->cputune.emulatorsched); +} + +static int +virCHProcessSetupEmulatorThreads(virDomainObj *vm) +{ + int thd_index = 0; + virCHDomainObjPrivate *priv = vm->privateData; + /* + Cloud-hypervisor start 4 Emulator threads by default: + vmm + cloud-hypervisor + http-server + signal_handler + */ + for (thd_index = 0; thd_index < priv->monitor->nthreads; thd_index++) { + if (priv->monitor->threads[thd_index].type == virCHThreadTypeEmulator) { + VIR_DEBUG("Setup tid = %d (%s) Emulator thread", priv->monitor->threads[thd_index].emuInfo.tid, + priv->monitor->threads[thd_index].emuInfo.thrName); + + if (virCHProcessSetupEmulatorThread(vm, priv->monitor->threads[thd_index].emuInfo) < 0) + return -1; + } + } + return 0; +} + /** * virCHProcessSetupVcpu: * @vm: domain object @@ -439,6 +505,14 @@ int virCHProcessStart(virCHDriver *driver, virCHDomainRefreshThreadInfo(vm); + VIR_DEBUG("Setting emulator tuning/settings"); + if (virCHProcessSetupEmulatorThreads(vm) < 0) + goto cleanup; + + VIR_DEBUG("Setting iothread tuning/settings"); + if (virCHProcessSetupIOThreads(vm) < 0) + goto cleanup; + VIR_DEBUG("Setting global CPU cgroup (if required)"); if (chSetupGlobalCpuCgroup(vm) < 0) goto cleanup; -- 2.27.0

On 10/21/21 16:31, Vineeth Pillai wrote:
From: Praveen K Paladugu <prapal@linux.microsoft.com>
using virCHProcessSetupPid
Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com> --- src/ch/ch_monitor.c | 60 ++++++++++++++++++++++++++++++++++ src/ch/ch_monitor.h | 2 ++ src/ch/ch_process.c | 78 +++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 138 insertions(+), 2 deletions(-)
diff --git a/src/ch/ch_monitor.c b/src/ch/ch_monitor.c index 095779cb3f..924d510a2f 100644 --- a/src/ch/ch_monitor.c +++ b/src/ch/ch_monitor.c @@ -920,3 +920,63 @@ virCHMonitorResumeVM(virCHMonitor *mon) { return virCHMonitorPutNoContent(mon, URL_VM_RESUME); } + +/** + * virCHMonitorGetIOThreads: + * @mon: Pointer to the monitor + * @iothreads: Location to return array of IOThreadInfo data + * + * Retrieve the list of iothreads defined/running for the machine + * + * Returns count of IOThreadInfo structures on success + * -1 on error. + */ +int virCHMonitorGetIOThreads(virCHMonitor *mon, + virDomainIOThreadInfo ***iothreads) +{ + size_t nthreads = 0, niothreads = 0; + int thd_index; + virDomainIOThreadInfo **iothreadinfolist = NULL, *iothreadinfo = NULL; + + *iothreads = NULL; + nthreads = virCHMonitorRefreshThreadInfo(mon); + + iothreadinfolist = g_new0(virDomainIOThreadInfo*, nthreads); + + for (thd_index = 0; thd_index < nthreads; thd_index++) { + virBitmap *map = NULL; + if (mon->threads[thd_index].type == virCHThreadTypeIO) { + iothreadinfo = g_new0(virDomainIOThreadInfo, 1); + + iothreadinfo->iothread_id = mon->threads[thd_index].ioInfo.tid; + + if (!(map = virProcessGetAffinity(iothreadinfo->iothread_id))) + goto cleanup; + + if (virBitmapToData(map, &(iothreadinfo->cpumap), + &(iothreadinfo->cpumaplen)) < 0) { + virBitmapFree(map); + goto cleanup; + } + virBitmapFree(map); + //Append to iothreadinfolist + iothreadinfolist[niothreads] = iothreadinfo; + niothreads++; + } + } + VIR_DELETE_ELEMENT_INPLACE(iothreadinfolist, + niothreads, nthreads); + *iothreads = iothreadinfolist; + VIR_DEBUG("niothreads = %ld", niothreads); + return niothreads; + + cleanup: + if (iothreadinfolist) { + for (thd_index = 0; thd_index < niothreads; thd_index++) + VIR_FREE(iothreadinfolist[thd_index]); + VIR_FREE(iothreadinfolist); + } + if (iothreadinfo) + VIR_FREE(iothreadinfo); + return -1; +} diff --git a/src/ch/ch_monitor.h b/src/ch/ch_monitor.h index f8c3fa75e8..98edb0faf9 100644 --- a/src/ch/ch_monitor.h +++ b/src/ch/ch_monitor.h @@ -118,3 +118,5 @@ int virCHMonitorGetCPUInfo(virCHMonitor *mon, size_t maxvcpus); size_t virCHMonitorGetThreadInfo(virCHMonitor *mon, bool refresh, virCHMonitorThreadInfo **threads); +int virCHMonitorGetIOThreads(virCHMonitor *mon, + virDomainIOThreadInfo ***iothreads); diff --git a/src/ch/ch_process.c b/src/ch/ch_process.c index 8dce737adb..25c2910f9c 100644 --- a/src/ch/ch_process.c +++ b/src/ch/ch_process.c @@ -41,7 +41,6 @@ VIR_LOG_INIT("ch.ch_process"); #define START_VM_POSTFIX ": starting up vm\n"
- static virCHMonitor * virCHProcessConnectMonitor(virCHDriver *driver, virDomainObj *vm) @@ -131,7 +130,6 @@ virCHProcessUpdateInfo(virDomainObj *vm) virCHProcessUpdateConsole(vm, info);
virJSONValueFree(info); - return 0; }
@@ -313,6 +311,74 @@ virCHProcessSetupPid(virDomainObj *vm, return ret; }
+static int +virCHProcessSetupIOThread(virDomainObj *vm, + virDomainIOThreadInfo *iothread) +{ + virCHDomainObjPrivate *priv = vm->privateData; + return virCHProcessSetupPid(vm, iothread->iothread_id, + VIR_CGROUP_THREAD_IOTHREAD, + iothread->iothread_id, + priv->autoCpuset, // This should be updated when CLH supports accepting + // iothread settings from input domain definition + vm->def->cputune.iothread_period, + vm->def->cputune.iothread_quota, + NULL); // CLH doesn't allow choosing a scheduler for iothreads. +} + +static int +virCHProcessSetupIOThreads(virDomainObj *vm) +{ + virCHDomainObjPrivate *priv = vm->privateData; + virDomainIOThreadInfo **iothreads = NULL; + size_t i; + size_t niothreads; + + niothreads = virCHMonitorGetIOThreads(priv->monitor, &iothreads); + for (i = 0; i < niothreads; i++) { + VIR_DEBUG("IOThread index = %ld , tid = %d", i, iothreads[i]->iothread_id); + if (virCHProcessSetupIOThread(vm, iothreads[i]) < 0) + return -1; + } + return 0; +} + + +static int +virCHProcessSetupEmulatorThread(virDomainObj *vm, + virCHMonitorEmuThreadInfo emuthread) +{ + return virCHProcessSetupPid(vm, emuthread.tid, VIR_CGROUP_THREAD_EMULATOR, + 0, vm->def->cputune.emulatorpin, + vm->def->cputune.emulator_period, + vm->def->cputune.emulator_quota, + vm->def->cputune.emulatorsched); +} + +static int +virCHProcessSetupEmulatorThreads(virDomainObj *vm) +{ + int thd_index = 0; + virCHDomainObjPrivate *priv = vm->privateData; + /* + Cloud-hypervisor start 4 Emulator threads by default: + vmm + cloud-hypervisor + http-server + signal_handler + */
Please use "*" at the start of each new line in the block comment:
+ /* + * Cloud-hypervisor start 4 Emulator threads by default: + * vmm + * cloud-hypervisor + * http-server + * signal_handler + */
Daniel
+ for (thd_index = 0; thd_index < priv->monitor->nthreads; thd_index++) { + if (priv->monitor->threads[thd_index].type == virCHThreadTypeEmulator) { + VIR_DEBUG("Setup tid = %d (%s) Emulator thread", priv->monitor->threads[thd_index].emuInfo.tid, + priv->monitor->threads[thd_index].emuInfo.thrName); + + if (virCHProcessSetupEmulatorThread(vm, priv->monitor->threads[thd_index].emuInfo) < 0) + return -1; + } + } + return 0; +} + /** * virCHProcessSetupVcpu: * @vm: domain object @@ -439,6 +505,14 @@ int virCHProcessStart(virCHDriver *driver,
virCHDomainRefreshThreadInfo(vm);
+ VIR_DEBUG("Setting emulator tuning/settings"); + if (virCHProcessSetupEmulatorThreads(vm) < 0) + goto cleanup; + + VIR_DEBUG("Setting iothread tuning/settings"); + if (virCHProcessSetupIOThreads(vm) < 0) + goto cleanup; + VIR_DEBUG("Setting global CPU cgroup (if required)"); if (chSetupGlobalCpuCgroup(vm) < 0) goto cleanup;

From: Praveen K Paladugu <prapal@linux.microsoft.com> Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com> --- src/ch/ch_driver.c | 154 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) diff --git a/src/ch/ch_driver.c b/src/ch/ch_driver.c index 97dfda85e1..786e2292a5 100644 --- a/src/ch/ch_driver.c +++ b/src/ch/ch_driver.c @@ -1321,6 +1321,158 @@ chDomainPinVcpu(virDomainPtr dom, VIR_DOMAIN_AFFECT_LIVE); } + + +static int +chDomainGetEmulatorPinInfo(virDomainPtr dom, + unsigned char *cpumaps, + int maplen, + unsigned int flags) +{ + virDomainObj *vm = NULL; + virDomainDef *def; + virCHDomainObjPrivate *priv; + bool live; + int ret = -1; + virBitmap *cpumask = NULL; + g_autoptr(virBitmap) bitmap = NULL; + virBitmap *autoCpuset = NULL; + + virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | + VIR_DOMAIN_AFFECT_CONFIG, -1); + + if (!(vm = chDomObjFromDomain(dom))) + goto cleanup; + + if (virDomainGetEmulatorPinInfoEnsureACL(dom->conn, vm->def) < 0) + goto cleanup; + + if (!(def = virDomainObjGetOneDefState(vm, flags, &live))) + goto cleanup; + + if (live) { + priv = vm->privateData; + autoCpuset = priv->autoCpuset; + } + if (def->cputune.emulatorpin) { + cpumask = def->cputune.emulatorpin; + } else if (def->cpumask) { + cpumask = def->cpumask; + } else if (vm->def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO && + autoCpuset) { + cpumask = autoCpuset; + } else { + if (!(bitmap = virHostCPUGetAvailableCPUsBitmap())) + goto cleanup; + cpumask = bitmap; + } + + virBitmapToDataBuf(cpumask, cpumaps, maplen); + + ret = 1; + + cleanup: + virDomainObjEndAPI(&vm); + return ret; +} + +static int +chDomainPinEmulator(virDomainPtr dom, + unsigned char *cpumap, + int maplen, + unsigned int flags) +{ + virCHDriver *driver = dom->conn->privateData; + virDomainObj *vm; + virCgroup *cgroup_emulator = NULL; + virDomainDef *def; + virDomainDef *persistentDef; + int ret = -1; + virCHDomainObjPrivate *priv; + virBitmap *pcpumap = NULL; + g_autoptr(virCHDriverConfig) cfg = NULL; + g_autofree char *str = NULL; + virTypedParameterPtr eventParams = NULL; + int eventNparams = 0; + int eventMaxparams = 0; + + virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | + VIR_DOMAIN_AFFECT_CONFIG, -1); + + cfg = virCHDriverGetConfig(driver); + + if (!(vm = chDomObjFromDomain(dom))) + goto cleanup; + + if (virDomainPinEmulatorEnsureACL(dom->conn, vm->def, flags) < 0) + goto cleanup; + + if (virCHDomainObjBeginJob(vm, CH_JOB_MODIFY) < 0) + goto cleanup; + + if (virDomainObjGetDefs(vm, flags, &def, &persistentDef) < 0) + goto endjob; + + priv = vm->privateData; + + if (!(pcpumap = virBitmapNewData(cpumap, maplen))) + goto endjob; + + if (virBitmapIsAllClear(pcpumap)) { + virReportError(VIR_ERR_INVALID_ARG, "%s", + _("Empty cpu list for pinning")); + goto endjob; + } + + if (def) { + if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) { + if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_EMULATOR, + 0, false, &cgroup_emulator) < 0) + goto endjob; + + if (chSetupCgroupCpusetCpus(cgroup_emulator, pcpumap) < 0) { + virReportError(VIR_ERR_OPERATION_INVALID, "%s", + _("failed to set cpuset.cpus in cgroup" + " for emulator threads")); + goto endjob; + } + } + + if (virProcessSetAffinity(vm->pid, pcpumap, false) < 0) + goto endjob; + + virBitmapFree(def->cputune.emulatorpin); + def->cputune.emulatorpin = NULL; + + if (!(def->cputune.emulatorpin = virBitmapNewCopy(pcpumap))) + goto endjob; + + if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0) + goto endjob; + + str = virBitmapFormat(pcpumap); + if (virTypedParamsAddString(&eventParams, &eventNparams, + &eventMaxparams, + VIR_DOMAIN_TUNABLE_CPU_EMULATORPIN, + str) < 0) + goto endjob; + + } + + + ret = 0; + + endjob: + virCHDomainObjEndJob(vm); + + cleanup: + if (cgroup_emulator) + virCgroupFree(cgroup_emulator); + virBitmapFree(pcpumap); + virDomainObjEndAPI(&vm); + return ret; +} + #define CH_NB_NUMA_PARAM 2 static int @@ -1639,6 +1791,8 @@ static virHypervisorDriver chHypervisorDriver = { .domainGetVcpuPinInfo = chDomainGetVcpuPinInfo, /* 7.9.0 */ .domainPinVcpu = chDomainPinVcpu, /* 7.9.0 */ .domainPinVcpuFlags = chDomainPinVcpuFlags, /* 7.9.0 */ + .domainPinEmulator = chDomainPinEmulator, /* 7.9.0 */ + .domainGetEmulatorPinInfo = chDomainGetEmulatorPinInfo, /* 7.9.0 */ .nodeGetCPUMap = chNodeGetCPUMap, /* 7.9.0 */ .domainSetNumaParameters = chDomainSetNumaParameters, /* 7.9.0 */ .domainGetNumaParameters = chDomainGetNumaParameters, /* 7.9.0 */ -- 2.27.0

On 10/21/21 16:31, Vineeth Pillai wrote:
From: Praveen K Paladugu <prapal@linux.microsoft.com>
Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com> --- src/ch/ch_driver.c | 154 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+)
diff --git a/src/ch/ch_driver.c b/src/ch/ch_driver.c index 97dfda85e1..786e2292a5 100644 --- a/src/ch/ch_driver.c +++ b/src/ch/ch_driver.c @@ -1321,6 +1321,158 @@ chDomainPinVcpu(virDomainPtr dom, VIR_DOMAIN_AFFECT_LIVE); }
+ + +static int +chDomainGetEmulatorPinInfo(virDomainPtr dom, + unsigned char *cpumaps, + int maplen, + unsigned int flags) +{ + virDomainObj *vm = NULL; + virDomainDef *def; + virCHDomainObjPrivate *priv; + bool live; + int ret = -1; + virBitmap *cpumask = NULL; + g_autoptr(virBitmap) bitmap = NULL; + virBitmap *autoCpuset = NULL; + + virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | + VIR_DOMAIN_AFFECT_CONFIG, -1); + + if (!(vm = chDomObjFromDomain(dom))) + goto cleanup; + + if (virDomainGetEmulatorPinInfoEnsureACL(dom->conn, vm->def) < 0) + goto cleanup; + + if (!(def = virDomainObjGetOneDefState(vm, flags, &live))) + goto cleanup; + + if (live) { + priv = vm->privateData; + autoCpuset = priv->autoCpuset; + } + if (def->cputune.emulatorpin) { + cpumask = def->cputune.emulatorpin; + } else if (def->cpumask) { + cpumask = def->cpumask; + } else if (vm->def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO && + autoCpuset) { + cpumask = autoCpuset; + } else { + if (!(bitmap = virHostCPUGetAvailableCPUsBitmap())) + goto cleanup; + cpumask = bitmap; + } + + virBitmapToDataBuf(cpumask, cpumaps, maplen); + + ret = 1; + + cleanup: + virDomainObjEndAPI(&vm); + return ret; +} + +static int +chDomainPinEmulator(virDomainPtr dom, + unsigned char *cpumap, + int maplen, + unsigned int flags) +{ + virCHDriver *driver = dom->conn->privateData; + virDomainObj *vm; + virCgroup *cgroup_emulator = NULL; + virDomainDef *def; + virDomainDef *persistentDef; + int ret = -1; + virCHDomainObjPrivate *priv; + virBitmap *pcpumap = NULL; + g_autoptr(virCHDriverConfig) cfg = NULL; + g_autofree char *str = NULL; + virTypedParameterPtr eventParams = NULL; + int eventNparams = 0; + int eventMaxparams = 0; + + virCheckFlags(VIR_DOMAIN_AFFECT_LIVE | + VIR_DOMAIN_AFFECT_CONFIG, -1); + + cfg = virCHDriverGetConfig(driver); + + if (!(vm = chDomObjFromDomain(dom))) + goto cleanup; + + if (virDomainPinEmulatorEnsureACL(dom->conn, vm->def, flags) < 0) + goto cleanup; + + if (virCHDomainObjBeginJob(vm, CH_JOB_MODIFY) < 0) + goto cleanup; + + if (virDomainObjGetDefs(vm, flags, &def, &persistentDef) < 0) + goto endjob; + + priv = vm->privateData; + + if (!(pcpumap = virBitmapNewData(cpumap, maplen))) + goto endjob; + + if (virBitmapIsAllClear(pcpumap)) { + virReportError(VIR_ERR_INVALID_ARG, "%s", + _("Empty cpu list for pinning")); + goto endjob; + } + + if (def) { + if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) { + if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_EMULATOR, + 0, false, &cgroup_emulator) < 0) + goto endjob; + + if (chSetupCgroupCpusetCpus(cgroup_emulator, pcpumap) < 0) { + virReportError(VIR_ERR_OPERATION_INVALID, "%s", + _("failed to set cpuset.cpus in cgroup" + " for emulator threads")); + goto endjob; + } + } + + if (virProcessSetAffinity(vm->pid, pcpumap, false) < 0) + goto endjob; + + virBitmapFree(def->cputune.emulatorpin); + def->cputune.emulatorpin = NULL; + + if (!(def->cputune.emulatorpin = virBitmapNewCopy(pcpumap))) + goto endjob; + + if (virDomainObjSave(vm, driver->xmlopt, cfg->stateDir) < 0) + goto endjob; + + str = virBitmapFormat(pcpumap); + if (virTypedParamsAddString(&eventParams, &eventNparams, + &eventMaxparams, + VIR_DOMAIN_TUNABLE_CPU_EMULATORPIN, + str) < 0) + goto endjob; + + } + + + ret = 0; + + endjob:
Wrong label indentation.
+ virCHDomainObjEndJob(vm); + + cleanup:
Same here. Thanks, Daniel
+ if (cgroup_emulator) + virCgroupFree(cgroup_emulator); + virBitmapFree(pcpumap); + virDomainObjEndAPI(&vm); + return ret; +} + #define CH_NB_NUMA_PARAM 2
static int @@ -1639,6 +1791,8 @@ static virHypervisorDriver chHypervisorDriver = { .domainGetVcpuPinInfo = chDomainGetVcpuPinInfo, /* 7.9.0 */ .domainPinVcpu = chDomainPinVcpu, /* 7.9.0 */ .domainPinVcpuFlags = chDomainPinVcpuFlags, /* 7.9.0 */ + .domainPinEmulator = chDomainPinEmulator, /* 7.9.0 */ + .domainGetEmulatorPinInfo = chDomainGetEmulatorPinInfo, /* 7.9.0 */ .nodeGetCPUMap = chNodeGetCPUMap, /* 7.9.0 */ .domainSetNumaParameters = chDomainSetNumaParameters, /* 7.9.0 */ .domainGetNumaParameters = chDomainGetNumaParameters, /* 7.9.0 */
participants (2)
-
Daniel Henrique Barboza
-
Vineeth Pillai