[PATCH 0/4] lxc: Add VCPU features for LXC

This series cover a lots of functionalities to LXC VCPUs. It enables sharing some timer devices between host and LXC guest using `timer` settings. It still has other improvements related to VCPU and LXC such as virtual cpuinfo content based on VCPU settings and some better resource limits. Each patch has the description of the problem and what it is trying to fix. Julio Faracco (4): lxc: Add Real Time Clock device into allowed devices lxc: Add HPET device into allowed devices lxc: Implement virtual /proc/cpuinfo via LXC fuse lxc: Count max VCPUs based on cpuset.cpus in native config. docs/formatdomain.html.in | 4 +- src/lxc/lxc_cgroup.c | 76 ++++++++++++++ src/lxc/lxc_container.c | 38 +++++++ src/lxc/lxc_container.h | 2 + src/lxc/lxc_controller.c | 98 +++++++++++++++++++ src/lxc/lxc_fuse.c | 78 ++++++++++++++- src/lxc/lxc_native.c | 24 ++++- .../lxcconf2xml-cpusettune.xml | 2 +- 8 files changed, 313 insertions(+), 9 deletions(-) -- 2.20.1

This commit share host Real Time Clock device (rtc) into LXC containers to support hardware clock. This should be available setting up a `rtc` timer under clock section. Since this option is not emulated, it should be available only for `localtime` clock. This option should be readonly due to security reasons. Before: root# hwclock --verbose hwclock from util-linux 2.32.1 System Time: 1581877557.598365 Trying to open: /dev/rtc0 Trying to open: /dev/rtc Trying to open: /dev/misc/rtc No usable clock interface found. hwclock: Cannot access the Hardware Clock via any known method. Now: root# hwclock 2020-02-16 18:23:55.374134+00:00 root# hwclock -w hwclock: ioctl(RTC_SET_TIME) to /dev/rtc to set the time failed: Permission denied Signed-off-by: Julio Faracco <jcfaracco@gmail.com> --- docs/formatdomain.html.in | 2 +- src/lxc/lxc_cgroup.c | 36 +++++++++++++++++++ src/lxc/lxc_controller.c | 73 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 110 insertions(+), 1 deletion(-) diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 1a31eda154..b045314917 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -2465,7 +2465,7 @@ being modified, and can be one of "platform" (currently unsupported), "hpet" (libxl, xen, qemu), "kvmclock" (qemu), - "pit" (qemu), "rtc" (qemu), "tsc" (libxl, qemu - + "pit" (qemu), "rtc" (qemu, lxc), "tsc" (libxl, qemu - <span class="since">since 3.2.0</span>), "hypervclock" (qemu - <span class="since">since 1.2.2</span>) or "armvtimer" (qemu - <span class="since">since 6.1.0</span>). diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c index 7f3701593a..752cb4b047 100644 --- a/src/lxc/lxc_cgroup.c +++ b/src/lxc/lxc_cgroup.c @@ -410,6 +410,42 @@ static int virLXCCgroupSetupDeviceACL(virDomainDefPtr def, VIR_CGROUP_DEVICE_RWM) < 0) return -1; + VIR_DEBUG("Allowing timers char devices"); + + /* Sync'ed with Host clock */ + if (def->clock.offset == VIR_DOMAIN_CLOCK_OFFSET_LOCALTIME) { + for (i = 0; i < def->clock.ntimers; i++) { + virDomainTimerDefPtr timer = def->clock.timers[i]; + + switch ((virDomainTimerNameType)timer->name) { + case VIR_DOMAIN_TIMER_NAME_PLATFORM: + case VIR_DOMAIN_TIMER_NAME_TSC: + case VIR_DOMAIN_TIMER_NAME_KVMCLOCK: + case VIR_DOMAIN_TIMER_NAME_HYPERVCLOCK: + case VIR_DOMAIN_TIMER_NAME_PIT: + case VIR_DOMAIN_TIMER_NAME_HPET: + case VIR_DOMAIN_TIMER_NAME_ARMVTIMER: + case VIR_DOMAIN_TIMER_NAME_LAST: + break; + case VIR_DOMAIN_TIMER_NAME_RTC: + if (!timer->present) + break; + + if (virFileExists("/dev/rtc")) { + if (virCgroupAllowDevicePath(cgroup, "/dev/rtc", + VIR_CGROUP_DEVICE_READ, + false) < 0) + return -1; + } else { + VIR_DEBUG("Ignoring non-existent device /dev/rtc"); + } + break; + } + } + } else { + VIR_DEBUG("Ignoring non-localtime clock"); + } + VIR_DEBUG("Device whitelist complete"); return 0; diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index c3dec0859c..528f2fabf1 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -1550,6 +1550,76 @@ static int virLXCControllerPopulateDevices(virLXCControllerPtr ctrl) } +static int +virLXCControllerSetupTimers(virLXCControllerPtr ctrl) +{ + int ret = -1; + char *path = NULL; + size_t i; + struct stat sb; + virDomainDefPtr def = ctrl->def; + + /* Not sync'ed with Host clock */ + if (def->clock.offset != VIR_DOMAIN_CLOCK_OFFSET_LOCALTIME) + return 0; + + for (i = 0; i < def->clock.ntimers; i++) { + dev_t dev; + virDomainTimerDefPtr timer = def->clock.timers[i]; + + switch ((virDomainTimerNameType)timer->name) { + case VIR_DOMAIN_TIMER_NAME_PLATFORM: + case VIR_DOMAIN_TIMER_NAME_TSC: + case VIR_DOMAIN_TIMER_NAME_KVMCLOCK: + case VIR_DOMAIN_TIMER_NAME_HYPERVCLOCK: + case VIR_DOMAIN_TIMER_NAME_PIT: + case VIR_DOMAIN_TIMER_NAME_HPET: + case VIR_DOMAIN_TIMER_NAME_ARMVTIMER: + case VIR_DOMAIN_TIMER_NAME_LAST: + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("unsupported timer type (name) '%s'"), + virDomainTimerNameTypeToString(timer->name)); + return -1; + case VIR_DOMAIN_TIMER_NAME_RTC: + if (!timer->present) + break; + + if (stat("/dev/rtc", &sb) < 0) { + if (errno == EACCES) + goto cleanup; + + virReportSystemError(errno, + _("Path '%s' is not accessible"), + path); + goto cleanup; + } + + path = g_strdup_printf("/%s/%s.dev/%s", LXC_STATE_DIR, + ctrl->def->name, "/rtc"); + + dev = makedev(major(sb.st_rdev), minor(sb.st_rdev)); + if (mknod(path, S_IFCHR, dev) < 0 || + chmod(path, sb.st_mode)) { + virReportSystemError(errno, + _("Failed to make device %s"), + path); + goto cleanup; + } + + if (lxcContainerChown(ctrl->def, path) < 0) + goto cleanup; + break; + } + } + + ret = 0; + cleanup: + VIR_FREE(path); + return ret; + +} + + static int virLXCControllerSetupHostdevSubsysUSB(virDomainDefPtr vmDef, virDomainHostdevDefPtr def, @@ -2352,6 +2422,9 @@ virLXCControllerRun(virLXCControllerPtr ctrl) if (virLXCControllerPopulateDevices(ctrl) < 0) goto cleanup; + if (virLXCControllerSetupTimers(ctrl) < 0) + goto cleanup; + if (virLXCControllerSetupAllDisks(ctrl) < 0) goto cleanup; -- 2.20.1

On 2/16/20 2:11 PM, Julio Faracco wrote:
This commit share host Real Time Clock device (rtc) into LXC containers to support hardware clock. This should be available setting up a `rtc` timer under clock section. Since this option is not emulated, it should be available only for `localtime` clock. This option should be readonly due to security reasons.
Before: root# hwclock --verbose hwclock from util-linux 2.32.1 System Time: 1581877557.598365 Trying to open: /dev/rtc0 Trying to open: /dev/rtc Trying to open: /dev/misc/rtc No usable clock interface found. hwclock: Cannot access the Hardware Clock via any known method.
Now: root# hwclock 2020-02-16 18:23:55.374134+00:00 root# hwclock -w hwclock: ioctl(RTC_SET_TIME) to /dev/rtc to set the time failed: Permission denied
Signed-off-by: Julio Faracco <jcfaracco@gmail.com> --- docs/formatdomain.html.in | 2 +- src/lxc/lxc_cgroup.c | 36 +++++++++++++++++++ src/lxc/lxc_controller.c | 73 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 110 insertions(+), 1 deletion(-)
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 1a31eda154..b045314917 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -2465,7 +2465,7 @@ being modified, and can be one of "platform" (currently unsupported), "hpet" (libxl, xen, qemu), "kvmclock" (qemu), - "pit" (qemu), "rtc" (qemu), "tsc" (libxl, qemu - + "pit" (qemu), "rtc" (qemu, lxc), "tsc" (libxl, qemu - <span class="since">since 3.2.0</span>), "hypervclock" (qemu - <span class="since">since 1.2.2</span>) or "armvtimer" (qemu - <span class="since">since 6.1.0</span>). diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c index 7f3701593a..752cb4b047 100644 --- a/src/lxc/lxc_cgroup.c +++ b/src/lxc/lxc_cgroup.c @@ -410,6 +410,42 @@ static int virLXCCgroupSetupDeviceACL(virDomainDefPtr def, VIR_CGROUP_DEVICE_RWM) < 0) return -1;
+ VIR_DEBUG("Allowing timers char devices"); + + /* Sync'ed with Host clock */ + if (def->clock.offset == VIR_DOMAIN_CLOCK_OFFSET_LOCALTIME) { + for (i = 0; i < def->clock.ntimers; i++) { + virDomainTimerDefPtr timer = def->clock.timers[i]; + + switch ((virDomainTimerNameType)timer->name) { + case VIR_DOMAIN_TIMER_NAME_PLATFORM: + case VIR_DOMAIN_TIMER_NAME_TSC: + case VIR_DOMAIN_TIMER_NAME_KVMCLOCK: + case VIR_DOMAIN_TIMER_NAME_HYPERVCLOCK: + case VIR_DOMAIN_TIMER_NAME_PIT: + case VIR_DOMAIN_TIMER_NAME_HPET: + case VIR_DOMAIN_TIMER_NAME_ARMVTIMER: + case VIR_DOMAIN_TIMER_NAME_LAST: + break; + case VIR_DOMAIN_TIMER_NAME_RTC: + if (!timer->present) + break; + + if (virFileExists("/dev/rtc")) { + if (virCgroupAllowDevicePath(cgroup, "/dev/rtc", + VIR_CGROUP_DEVICE_READ, + false) < 0) + return -1; + } else { + VIR_DEBUG("Ignoring non-existent device /dev/rtc"); + } + break; + } + } + } else { + VIR_DEBUG("Ignoring non-localtime clock"); + } + VIR_DEBUG("Device whitelist complete");
return 0; diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index c3dec0859c..528f2fabf1 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -1550,6 +1550,76 @@ static int virLXCControllerPopulateDevices(virLXCControllerPtr ctrl) }
+static int +virLXCControllerSetupTimers(virLXCControllerPtr ctrl) +{ + int ret = -1; + char *path = NULL;
You can use g_autofree with this path variable to avoid the need for a VIR_FREE() call and the 'cleanup' label. The 'ret' variable becomes unneeded after that as well since you can just return -1 on error.
+ size_t i; + struct stat sb; + virDomainDefPtr def = ctrl->def; + + /* Not sync'ed with Host clock */ + if (def->clock.offset != VIR_DOMAIN_CLOCK_OFFSET_LOCALTIME) + return 0; + + for (i = 0; i < def->clock.ntimers; i++) { + dev_t dev; + virDomainTimerDefPtr timer = def->clock.timers[i]; + + switch ((virDomainTimerNameType)timer->name) { + case VIR_DOMAIN_TIMER_NAME_PLATFORM: + case VIR_DOMAIN_TIMER_NAME_TSC: + case VIR_DOMAIN_TIMER_NAME_KVMCLOCK: + case VIR_DOMAIN_TIMER_NAME_HYPERVCLOCK: + case VIR_DOMAIN_TIMER_NAME_PIT: + case VIR_DOMAIN_TIMER_NAME_HPET: + case VIR_DOMAIN_TIMER_NAME_ARMVTIMER: + case VIR_DOMAIN_TIMER_NAME_LAST: + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("unsupported timer type (name) '%s'"), + virDomainTimerNameTypeToString(timer->name)); + return -1; + case VIR_DOMAIN_TIMER_NAME_RTC: + if (!timer->present) + break; + + if (stat("/dev/rtc", &sb) < 0) { + if (errno == EACCES) + goto cleanup; + + virReportSystemError(errno, + _("Path '%s' is not accessible"), + path); + goto cleanup; + } + + path = g_strdup_printf("/%s/%s.dev/%s", LXC_STATE_DIR, + ctrl->def->name, "/rtc"); + + dev = makedev(major(sb.st_rdev), minor(sb.st_rdev)); + if (mknod(path, S_IFCHR, dev) < 0 || + chmod(path, sb.st_mode)) { + virReportSystemError(errno, + _("Failed to make device %s"), + path); + goto cleanup; + } + + if (lxcContainerChown(ctrl->def, path) < 0) + goto cleanup; + break; + } + } + + ret = 0; + cleanup: + VIR_FREE(path); + return ret; + +} + + static int virLXCControllerSetupHostdevSubsysUSB(virDomainDefPtr vmDef, virDomainHostdevDefPtr def, @@ -2352,6 +2422,9 @@ virLXCControllerRun(virLXCControllerPtr ctrl) if (virLXCControllerPopulateDevices(ctrl) < 0) goto cleanup;
+ if (virLXCControllerSetupTimers(ctrl) < 0) + goto cleanup; + if (virLXCControllerSetupAllDisks(ctrl) < 0) goto cleanup;

This commit is related to RTC timer device too. HPET is being shared from host device through `localtime` clock. This timer is available creating a new timer using `hpet` name. Signed-off-by: Julio Faracco <jcfaracco@gmail.com> --- docs/formatdomain.html.in | 2 +- src/lxc/lxc_cgroup.c | 17 +++++++++++++---- src/lxc/lxc_controller.c | 33 +++++++++++++++++++++++++++++---- 3 files changed, 43 insertions(+), 9 deletions(-) diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index b045314917..9822fee0ab 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -2464,7 +2464,7 @@ The <code>name</code> attribute selects which timer is being modified, and can be one of "platform" (currently unsupported), - "hpet" (libxl, xen, qemu), "kvmclock" (qemu), + "hpet" (libxl, xen, qemu, lxc), "kvmclock" (qemu), "pit" (qemu), "rtc" (qemu, lxc), "tsc" (libxl, qemu - <span class="since">since 3.2.0</span>), "hypervclock" (qemu - <span class="since">since 1.2.2</span>) or diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c index 752cb4b047..470337e675 100644 --- a/src/lxc/lxc_cgroup.c +++ b/src/lxc/lxc_cgroup.c @@ -417,20 +417,19 @@ static int virLXCCgroupSetupDeviceACL(virDomainDefPtr def, for (i = 0; i < def->clock.ntimers; i++) { virDomainTimerDefPtr timer = def->clock.timers[i]; + if (!timer->present) + continue; + switch ((virDomainTimerNameType)timer->name) { case VIR_DOMAIN_TIMER_NAME_PLATFORM: case VIR_DOMAIN_TIMER_NAME_TSC: case VIR_DOMAIN_TIMER_NAME_KVMCLOCK: case VIR_DOMAIN_TIMER_NAME_HYPERVCLOCK: case VIR_DOMAIN_TIMER_NAME_PIT: - case VIR_DOMAIN_TIMER_NAME_HPET: case VIR_DOMAIN_TIMER_NAME_ARMVTIMER: case VIR_DOMAIN_TIMER_NAME_LAST: break; case VIR_DOMAIN_TIMER_NAME_RTC: - if (!timer->present) - break; - if (virFileExists("/dev/rtc")) { if (virCgroupAllowDevicePath(cgroup, "/dev/rtc", VIR_CGROUP_DEVICE_READ, @@ -440,6 +439,16 @@ static int virLXCCgroupSetupDeviceACL(virDomainDefPtr def, VIR_DEBUG("Ignoring non-existent device /dev/rtc"); } break; + case VIR_DOMAIN_TIMER_NAME_HPET: + if (virFileExists("/dev/hpet")) { + if (virCgroupAllowDevicePath(cgroup, "/dev/hpet", + VIR_CGROUP_DEVICE_READ, + false) < 0) + return -1; + } else { + VIR_DEBUG("Ignoring non-existent device /dev/hpet"); + } + break; } } } else { diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index 528f2fabf1..b193474767 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -1567,13 +1567,15 @@ virLXCControllerSetupTimers(virLXCControllerPtr ctrl) dev_t dev; virDomainTimerDefPtr timer = def->clock.timers[i]; + if (!timer->present) + continue; + switch ((virDomainTimerNameType)timer->name) { case VIR_DOMAIN_TIMER_NAME_PLATFORM: case VIR_DOMAIN_TIMER_NAME_TSC: case VIR_DOMAIN_TIMER_NAME_KVMCLOCK: case VIR_DOMAIN_TIMER_NAME_HYPERVCLOCK: case VIR_DOMAIN_TIMER_NAME_PIT: - case VIR_DOMAIN_TIMER_NAME_HPET: case VIR_DOMAIN_TIMER_NAME_ARMVTIMER: case VIR_DOMAIN_TIMER_NAME_LAST: virReportError(VIR_ERR_CONFIG_UNSUPPORTED, @@ -1581,9 +1583,6 @@ virLXCControllerSetupTimers(virLXCControllerPtr ctrl) virDomainTimerNameTypeToString(timer->name)); return -1; case VIR_DOMAIN_TIMER_NAME_RTC: - if (!timer->present) - break; - if (stat("/dev/rtc", &sb) < 0) { if (errno == EACCES) goto cleanup; @@ -1606,6 +1605,32 @@ virLXCControllerSetupTimers(virLXCControllerPtr ctrl) goto cleanup; } + if (lxcContainerChown(ctrl->def, path) < 0) + goto cleanup; + break; + case VIR_DOMAIN_TIMER_NAME_HPET: + if (stat("/dev/hpet", &sb) < 0) { + if (errno == EACCES) + goto cleanup; + + virReportSystemError(errno, + _("Path '%s' is not accessible"), + path); + goto cleanup; + } + + path = g_strdup_printf("/%s/%s.dev/%s", LXC_STATE_DIR, + ctrl->def->name, "/hpet"); + + dev = makedev(major(sb.st_rdev), minor(sb.st_rdev)); + if (mknod(path, S_IFCHR, dev) < 0 || + chmod(path, sb.st_mode)) { + virReportSystemError(errno, + _("Failed to make device %s"), + path); + goto cleanup; + } + if (lxcContainerChown(ctrl->def, path) < 0) goto cleanup; break; -- 2.20.1

This commit tries to fix a lots of issues related to LXC VCPUs. One of them is related to /proc/cpuinfo content. If only 1 VCPU is set, LXC containers will show all CPUs available for host. The second one is related to CPU share, if an user set only 1 VCPU, the container/process will use all available CPUs. (This is not the case when `cpuset` attribute is declared. So, this commit adds a virtual cpuinfo based on VCPU mapping and it automatic limits the CPU usage according VCPU count. Example (now): LXC container - 8 CPUS with 2 VCPU: lxc-root# stress --cpu 8 On host machine, only CPU 0 and 1 have 100% usage. Signed-off-by: Julio Faracco <jcfaracco@gmail.com> --- src/lxc/lxc_cgroup.c | 31 ++++++++++++++++ src/lxc/lxc_container.c | 15 ++++++++ src/lxc/lxc_fuse.c | 78 ++++++++++++++++++++++++++++++++++++++--- 3 files changed, 120 insertions(+), 4 deletions(-) diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c index 470337e675..a6c73d9d55 100644 --- a/src/lxc/lxc_cgroup.c +++ b/src/lxc/lxc_cgroup.c @@ -59,6 +59,34 @@ static int virLXCCgroupSetupCpuTune(virDomainDefPtr def, } +static int virLXCCgroupSetupVcpuAuto(virDomainDefPtr def, + virCgroupPtr cgroup) +{ + size_t i; + int vcpumax; + virBuffer buffer = VIR_BUFFER_INITIALIZER; + virBufferPtr cpuset = &buffer; + + vcpumax = virDomainDefGetVcpusMax(def); + for (i = 0; i < vcpumax; i++) { + virDomainVcpuDefPtr vcpu = virDomainDefGetVcpu(def, i); + /* Cgroup is smart enough to convert numbers separated + * by comma into ranges. Example: "0,1,2,5," -> "0-2,5". + * Libvirt does not need to process it here. */ + if (vcpu) + virBufferAsprintf(cpuset, "%zu,", i); + } + if (virCgroupSetCpusetCpus(cgroup, + virBufferCurrentContent(cpuset)) < 0) { + virBufferFreeAndReset(cpuset); + return -1; + } + + virBufferFreeAndReset(cpuset); + return 0; +} + + static int virLXCCgroupSetupCpusetTune(virDomainDefPtr def, virCgroupPtr cgroup, virBitmapPtr nodemask) @@ -76,6 +104,9 @@ static int virLXCCgroupSetupCpusetTune(virDomainDefPtr def, goto cleanup; /* free mask to make sure we won't use it in a wrong way later */ VIR_FREE(mask); + } else { + /* auto mode for VCPU limits */ + virLXCCgroupSetupVcpuAuto(def, cgroup); } if (virDomainNumatuneGetMode(def->numa, -1, &mode) < 0 || diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c index 41efe43a14..1a2c97c9f4 100644 --- a/src/lxc/lxc_container.c +++ b/src/lxc/lxc_container.c @@ -999,6 +999,7 @@ static int lxcContainerMountProcFuse(virDomainDefPtr def, { int ret; char *meminfo_path = NULL; + char *cpuinfo_path = NULL; VIR_DEBUG("Mount /proc/meminfo stateDir=%s", stateDir); @@ -1013,7 +1014,21 @@ static int lxcContainerMountProcFuse(virDomainDefPtr def, meminfo_path); } + VIR_DEBUG("Mount /proc/cpuinfo stateDir=%s", stateDir); + + cpuinfo_path = g_strdup_printf("/.oldroot/%s/%s.fuse/cpuinfo", + stateDir, + def->name); + + if ((ret = mount(cpuinfo_path, "/proc/cpuinfo", + NULL, MS_BIND, NULL)) < 0) { + virReportSystemError(errno, + _("Failed to mount %s on /proc/cpuinfo"), + cpuinfo_path); + } + VIR_FREE(meminfo_path); + VIR_FREE(cpuinfo_path); return ret; } #else diff --git a/src/lxc/lxc_fuse.c b/src/lxc/lxc_fuse.c index 44f240a0b5..12fa69d494 100644 --- a/src/lxc/lxc_fuse.c +++ b/src/lxc/lxc_fuse.c @@ -37,6 +37,7 @@ #if WITH_FUSE static const char *fuse_meminfo_path = "/meminfo"; +static const char *fuse_cpuinfo_path = "/cpuinfo"; static int lxcProcGetattr(const char *path, struct stat *stbuf) { @@ -54,7 +55,8 @@ static int lxcProcGetattr(const char *path, struct stat *stbuf) if (STREQ(path, "/")) { stbuf->st_mode = S_IFDIR | 0755; stbuf->st_nlink = 2; - } else if (STREQ(path, fuse_meminfo_path)) { + } else if (STREQ(path, fuse_meminfo_path) || + STREQ(path, fuse_cpuinfo_path)) { if (stat(mempath, &sb) < 0) { res = -errno; goto cleanup; @@ -90,6 +92,7 @@ static int lxcProcReaddir(const char *path, void *buf, filler(buf, ".", NULL, 0); filler(buf, "..", NULL, 0); filler(buf, fuse_meminfo_path + 1, NULL, 0); + filler(buf, fuse_cpuinfo_path + 1, NULL, 0); return 0; } @@ -97,7 +100,8 @@ static int lxcProcReaddir(const char *path, void *buf, static int lxcProcOpen(const char *path G_GNUC_UNUSED, struct fuse_file_info *fi G_GNUC_UNUSED) { - if (STRNEQ(path, fuse_meminfo_path)) + if (STRNEQ(path, fuse_meminfo_path) && + STRNEQ(path, fuse_cpuinfo_path)) return -ENOENT; if ((fi->flags & 3) != O_RDONLY) @@ -125,7 +129,7 @@ static int lxcProcHostRead(char *path, char *buf, size_t size, off_t offset) static int lxcProcReadMeminfo(char *hostpath, virDomainDefPtr def, char *buf, size_t size, off_t offset) { - int res; + int res = -1; FILE *fd = NULL; char *line = NULL; size_t n; @@ -151,7 +155,6 @@ static int lxcProcReadMeminfo(char *hostpath, virDomainDefPtr def, goto cleanup; } - res = -1; while (getline(&line, &n, fd) > 0) { char *ptr = strchr(line, ':'); if (!ptr) @@ -235,6 +238,70 @@ static int lxcProcReadMeminfo(char *hostpath, virDomainDefPtr def, return res; } + +static int lxcProcReadCpuinfo(char *hostpath, virDomainDefPtr def, + char *buf, size_t size, off_t offset) +{ + int res = -1; + FILE *fd = NULL; + char *line = NULL; + size_t n; + virBuffer buffer = VIR_BUFFER_INITIALIZER; + virBufferPtr new_cpuinfo = &buffer; + size_t cpu; + size_t nvcpu; + size_t curcpu = 0; + bool get_proc = false; + + fd = fopen(hostpath, "r"); + if (fd == NULL) { + virReportSystemError(errno, _("Cannot open %s"), hostpath); + res = -errno; + goto cleanup; + } + + /* /proc/cpuinfo does not support fseek */ + if (offset > 0) { + res = 0; + goto cleanup; + } + + nvcpu = virDomainDefGetVcpus(def); + while (getline(&line, &n, fd) > 0) { + if (sscanf(line, "processor\t: %zu", &cpu) == 1) { + virDomainVcpuDefPtr vcpu = virDomainDefGetVcpu(def, cpu); + /* VCPU is mapped */ + if (vcpu) { + if (curcpu == nvcpu) + break; + + virBufferAsprintf(new_cpuinfo, "processor\t: %zu\n", + curcpu); + curcpu++; + get_proc = true; + } else { + get_proc = false; + } + } else { + /* It is not a processor index */ + if (get_proc) + virBufferAdd(new_cpuinfo, line, -1); + } + } + + res = strlen(virBufferCurrentContent(new_cpuinfo)); + if (res > size) + res = size; + memcpy(buf, virBufferCurrentContent(new_cpuinfo), res); + + cleanup: + VIR_FREE(line); + virBufferFreeAndReset(new_cpuinfo); + VIR_FORCE_FCLOSE(fd); + return res; +} + + static int lxcProcRead(const char *path G_GNUC_UNUSED, char *buf G_GNUC_UNUSED, size_t size G_GNUC_UNUSED, @@ -254,6 +321,9 @@ static int lxcProcRead(const char *path G_GNUC_UNUSED, if (STREQ(path, fuse_meminfo_path)) { if ((res = lxcProcReadMeminfo(hostpath, def, buf, size, offset)) < 0) res = lxcProcHostRead(hostpath, buf, size, offset); + } else if (STREQ(path, fuse_cpuinfo_path)) { + if ((res = lxcProcReadCpuinfo(hostpath, def, buf, size, offset)) < 0) + res = lxcProcHostRead(hostpath, buf, size, offset); } VIR_FREE(hostpath); -- 2.20.1

On 2/16/20 2:11 PM, Julio Faracco wrote:
This commit tries to fix a lots of issues related to LXC VCPUs. One of
Extra 'a' there. "tries to fix lots of issues ..."
them is related to /proc/cpuinfo content. If only 1 VCPU is set, LXC containers will show all CPUs available for host. The second one is related to CPU share, if an user set only 1 VCPU, the container/process will use all available CPUs. (This is not the case when `cpuset`
This parentesis is never closed.
attribute is declared. So, this commit adds a virtual cpuinfo based on VCPU mapping and it automatic limits the CPU usage according VCPU count.
"and it automatically limits ..."
Example (now): LXC container - 8 CPUS with 2 VCPU: lxc-root# stress --cpu 8
On host machine, only CPU 0 and 1 have 100% usage.
Signed-off-by: Julio Faracco <jcfaracco@gmail.com> --- src/lxc/lxc_cgroup.c | 31 ++++++++++++++++ src/lxc/lxc_container.c | 15 ++++++++ src/lxc/lxc_fuse.c | 78 ++++++++++++++++++++++++++++++++++++++--- 3 files changed, 120 insertions(+), 4 deletions(-)
diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c index 470337e675..a6c73d9d55 100644 --- a/src/lxc/lxc_cgroup.c +++ b/src/lxc/lxc_cgroup.c @@ -59,6 +59,34 @@ static int virLXCCgroupSetupCpuTune(virDomainDefPtr def, }
+static int virLXCCgroupSetupVcpuAuto(virDomainDefPtr def, + virCgroupPtr cgroup) +{ + size_t i; + int vcpumax; + virBuffer buffer = VIR_BUFFER_INITIALIZER; + virBufferPtr cpuset = &buffer; + + vcpumax = virDomainDefGetVcpusMax(def); + for (i = 0; i < vcpumax; i++) { + virDomainVcpuDefPtr vcpu = virDomainDefGetVcpu(def, i); + /* Cgroup is smart enough to convert numbers separated + * by comma into ranges. Example: "0,1,2,5," -> "0-2,5". + * Libvirt does not need to process it here. */ + if (vcpu) + virBufferAsprintf(cpuset, "%zu,", i); + } + if (virCgroupSetCpusetCpus(cgroup, + virBufferCurrentContent(cpuset)) < 0) { + virBufferFreeAndReset(cpuset); + return -1; + } + + virBufferFreeAndReset(cpuset); + return 0; +} + + static int virLXCCgroupSetupCpusetTune(virDomainDefPtr def, virCgroupPtr cgroup, virBitmapPtr nodemask) @@ -76,6 +104,9 @@ static int virLXCCgroupSetupCpusetTune(virDomainDefPtr def, goto cleanup; /* free mask to make sure we won't use it in a wrong way later */ VIR_FREE(mask); + } else { + /* auto mode for VCPU limits */ + virLXCCgroupSetupVcpuAuto(def, cgroup); }
if (virDomainNumatuneGetMode(def->numa, -1, &mode) < 0 || diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c index 41efe43a14..1a2c97c9f4 100644 --- a/src/lxc/lxc_container.c +++ b/src/lxc/lxc_container.c @@ -999,6 +999,7 @@ static int lxcContainerMountProcFuse(virDomainDefPtr def, { int ret; char *meminfo_path = NULL; + char *cpuinfo_path = NULL;
You can use g_autofree on cpuinfo_path to avoid the need for VIR_FREE() in the end.
VIR_DEBUG("Mount /proc/meminfo stateDir=%s", stateDir);
@@ -1013,7 +1014,21 @@ static int lxcContainerMountProcFuse(virDomainDefPtr def, meminfo_path); }
+ VIR_DEBUG("Mount /proc/cpuinfo stateDir=%s", stateDir); + + cpuinfo_path = g_strdup_printf("/.oldroot/%s/%s.fuse/cpuinfo", + stateDir, + def->name); + + if ((ret = mount(cpuinfo_path, "/proc/cpuinfo", + NULL, MS_BIND, NULL)) < 0) { + virReportSystemError(errno, + _("Failed to mount %s on /proc/cpuinfo"), + cpuinfo_path); + } + VIR_FREE(meminfo_path); + VIR_FREE(cpuinfo_path); return ret; } #else diff --git a/src/lxc/lxc_fuse.c b/src/lxc/lxc_fuse.c index 44f240a0b5..12fa69d494 100644 --- a/src/lxc/lxc_fuse.c +++ b/src/lxc/lxc_fuse.c @@ -37,6 +37,7 @@ #if WITH_FUSE
static const char *fuse_meminfo_path = "/meminfo"; +static const char *fuse_cpuinfo_path = "/cpuinfo";
static int lxcProcGetattr(const char *path, struct stat *stbuf) { @@ -54,7 +55,8 @@ static int lxcProcGetattr(const char *path, struct stat *stbuf) if (STREQ(path, "/")) { stbuf->st_mode = S_IFDIR | 0755; stbuf->st_nlink = 2; - } else if (STREQ(path, fuse_meminfo_path)) { + } else if (STREQ(path, fuse_meminfo_path) || + STREQ(path, fuse_cpuinfo_path)) { if (stat(mempath, &sb) < 0) { res = -errno; goto cleanup; @@ -90,6 +92,7 @@ static int lxcProcReaddir(const char *path, void *buf, filler(buf, ".", NULL, 0); filler(buf, "..", NULL, 0); filler(buf, fuse_meminfo_path + 1, NULL, 0); + filler(buf, fuse_cpuinfo_path + 1, NULL, 0);
return 0; } @@ -97,7 +100,8 @@ static int lxcProcReaddir(const char *path, void *buf, static int lxcProcOpen(const char *path G_GNUC_UNUSED, struct fuse_file_info *fi G_GNUC_UNUSED) { - if (STRNEQ(path, fuse_meminfo_path)) + if (STRNEQ(path, fuse_meminfo_path) && + STRNEQ(path, fuse_cpuinfo_path)) return -ENOENT;
if ((fi->flags & 3) != O_RDONLY) @@ -125,7 +129,7 @@ static int lxcProcHostRead(char *path, char *buf, size_t size, off_t offset) static int lxcProcReadMeminfo(char *hostpath, virDomainDefPtr def, char *buf, size_t size, off_t offset) { - int res; + int res = -1; FILE *fd = NULL; char *line = NULL; size_t n; @@ -151,7 +155,6 @@ static int lxcProcReadMeminfo(char *hostpath, virDomainDefPtr def, goto cleanup; }
- res = -1; while (getline(&line, &n, fd) > 0) { char *ptr = strchr(line, ':'); if (!ptr) @@ -235,6 +238,70 @@ static int lxcProcReadMeminfo(char *hostpath, virDomainDefPtr def, return res; }
+ +static int lxcProcReadCpuinfo(char *hostpath, virDomainDefPtr def, + char *buf, size_t size, off_t offset) +{ + int res = -1; + FILE *fd = NULL;
I believe that instead of creating the fd as FILE you can use VIR_AUTOCLOSE fd = -1; And it will auto close the fd for you when context is gone. Then you won't need VIR_FORCE_FCLOSE().
+ char *line = NULL;
'line' can use g_autofree as well.
+ size_t n; + virBuffer buffer = VIR_BUFFER_INITIALIZER; + virBufferPtr new_cpuinfo = &buffer; + size_t cpu; + size_t nvcpu; + size_t curcpu = 0; + bool get_proc = false; + + fd = fopen(hostpath, "r");
Suggestion: fopen() here is fine, but I believe that you can use utilities like virFileReadAll() as well. It will give you a buffer with the file contents so you don't need to rely on sscanf() and getline() in the loop below.
+ if (fd == NULL) { + virReportSystemError(errno, _("Cannot open %s"), hostpath); + res = -errno; + goto cleanup; + } + + /* /proc/cpuinfo does not support fseek */ + if (offset > 0) { + res = 0; + goto cleanup; + } + + nvcpu = virDomainDefGetVcpus(def); + while (getline(&line, &n, fd) > 0) { + if (sscanf(line, "processor\t: %zu", &cpu) == 1) { + virDomainVcpuDefPtr vcpu = virDomainDefGetVcpu(def, cpu); + /* VCPU is mapped */ + if (vcpu) { + if (curcpu == nvcpu) + break; + + virBufferAsprintf(new_cpuinfo, "processor\t: %zu\n", + curcpu); + curcpu++; + get_proc = true; + } else { + get_proc = false; + } + } else { + /* It is not a processor index */ + if (get_proc) + virBufferAdd(new_cpuinfo, line, -1); + } + } + + res = strlen(virBufferCurrentContent(new_cpuinfo)); + if (res > size) + res = size; + memcpy(buf, virBufferCurrentContent(new_cpuinfo), res); + + cleanup: + VIR_FREE(line); + virBufferFreeAndReset(new_cpuinfo); + VIR_FORCE_FCLOSE(fd); + return res; +} + + static int lxcProcRead(const char *path G_GNUC_UNUSED, char *buf G_GNUC_UNUSED, size_t size G_GNUC_UNUSED, @@ -254,6 +321,9 @@ static int lxcProcRead(const char *path G_GNUC_UNUSED, if (STREQ(path, fuse_meminfo_path)) { if ((res = lxcProcReadMeminfo(hostpath, def, buf, size, offset)) < 0) res = lxcProcHostRead(hostpath, buf, size, offset); + } else if (STREQ(path, fuse_cpuinfo_path)) { + if ((res = lxcProcReadCpuinfo(hostpath, def, buf, size, offset)) < 0) + res = lxcProcHostRead(hostpath, buf, size, offset); }
VIR_FREE(hostpath);

Em qua., 19 de fev. de 2020 às 09:27, Daniel Henrique Barboza <danielhb413@gmail.com> escreveu:
On 2/16/20 2:11 PM, Julio Faracco wrote:
This commit tries to fix a lots of issues related to LXC VCPUs. One of
Extra 'a' there. "tries to fix lots of issues ..."
them is related to /proc/cpuinfo content. If only 1 VCPU is set, LXC containers will show all CPUs available for host. The second one is related to CPU share, if an user set only 1 VCPU, the container/process will use all available CPUs. (This is not the case when `cpuset`
This parentesis is never closed.
attribute is declared. So, this commit adds a virtual cpuinfo based on VCPU mapping and it automatic limits the CPU usage according VCPU count.
"and it automatically limits ..."
Example (now): LXC container - 8 CPUS with 2 VCPU: lxc-root# stress --cpu 8
On host machine, only CPU 0 and 1 have 100% usage.
Signed-off-by: Julio Faracco <jcfaracco@gmail.com> --- src/lxc/lxc_cgroup.c | 31 ++++++++++++++++ src/lxc/lxc_container.c | 15 ++++++++ src/lxc/lxc_fuse.c | 78 ++++++++++++++++++++++++++++++++++++++--- 3 files changed, 120 insertions(+), 4 deletions(-)
diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c index 470337e675..a6c73d9d55 100644 --- a/src/lxc/lxc_cgroup.c +++ b/src/lxc/lxc_cgroup.c @@ -59,6 +59,34 @@ static int virLXCCgroupSetupCpuTune(virDomainDefPtr def, }
+static int virLXCCgroupSetupVcpuAuto(virDomainDefPtr def, + virCgroupPtr cgroup) +{ + size_t i; + int vcpumax; + virBuffer buffer = VIR_BUFFER_INITIALIZER; + virBufferPtr cpuset = &buffer; + + vcpumax = virDomainDefGetVcpusMax(def); + for (i = 0; i < vcpumax; i++) { + virDomainVcpuDefPtr vcpu = virDomainDefGetVcpu(def, i); + /* Cgroup is smart enough to convert numbers separated + * by comma into ranges. Example: "0,1,2,5," -> "0-2,5". + * Libvirt does not need to process it here. */ + if (vcpu) + virBufferAsprintf(cpuset, "%zu,", i); + } + if (virCgroupSetCpusetCpus(cgroup, + virBufferCurrentContent(cpuset)) < 0) { + virBufferFreeAndReset(cpuset); + return -1; + } + + virBufferFreeAndReset(cpuset); + return 0; +} + + static int virLXCCgroupSetupCpusetTune(virDomainDefPtr def, virCgroupPtr cgroup, virBitmapPtr nodemask) @@ -76,6 +104,9 @@ static int virLXCCgroupSetupCpusetTune(virDomainDefPtr def, goto cleanup; /* free mask to make sure we won't use it in a wrong way later */ VIR_FREE(mask); + } else { + /* auto mode for VCPU limits */ + virLXCCgroupSetupVcpuAuto(def, cgroup); }
if (virDomainNumatuneGetMode(def->numa, -1, &mode) < 0 || diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c index 41efe43a14..1a2c97c9f4 100644 --- a/src/lxc/lxc_container.c +++ b/src/lxc/lxc_container.c @@ -999,6 +999,7 @@ static int lxcContainerMountProcFuse(virDomainDefPtr def, { int ret; char *meminfo_path = NULL; + char *cpuinfo_path = NULL;
You can use g_autofree on cpuinfo_path to avoid the need for VIR_FREE() in the end.
VIR_DEBUG("Mount /proc/meminfo stateDir=%s", stateDir);
@@ -1013,7 +1014,21 @@ static int lxcContainerMountProcFuse(virDomainDefPtr def, meminfo_path); }
+ VIR_DEBUG("Mount /proc/cpuinfo stateDir=%s", stateDir); + + cpuinfo_path = g_strdup_printf("/.oldroot/%s/%s.fuse/cpuinfo", + stateDir, + def->name); + + if ((ret = mount(cpuinfo_path, "/proc/cpuinfo", + NULL, MS_BIND, NULL)) < 0) { + virReportSystemError(errno, + _("Failed to mount %s on /proc/cpuinfo"), + cpuinfo_path); + } + VIR_FREE(meminfo_path); + VIR_FREE(cpuinfo_path); return ret; } #else diff --git a/src/lxc/lxc_fuse.c b/src/lxc/lxc_fuse.c index 44f240a0b5..12fa69d494 100644 --- a/src/lxc/lxc_fuse.c +++ b/src/lxc/lxc_fuse.c @@ -37,6 +37,7 @@ #if WITH_FUSE
static const char *fuse_meminfo_path = "/meminfo"; +static const char *fuse_cpuinfo_path = "/cpuinfo";
static int lxcProcGetattr(const char *path, struct stat *stbuf) { @@ -54,7 +55,8 @@ static int lxcProcGetattr(const char *path, struct stat *stbuf) if (STREQ(path, "/")) { stbuf->st_mode = S_IFDIR | 0755; stbuf->st_nlink = 2; - } else if (STREQ(path, fuse_meminfo_path)) { + } else if (STREQ(path, fuse_meminfo_path) || + STREQ(path, fuse_cpuinfo_path)) { if (stat(mempath, &sb) < 0) { res = -errno; goto cleanup; @@ -90,6 +92,7 @@ static int lxcProcReaddir(const char *path, void *buf, filler(buf, ".", NULL, 0); filler(buf, "..", NULL, 0); filler(buf, fuse_meminfo_path + 1, NULL, 0); + filler(buf, fuse_cpuinfo_path + 1, NULL, 0);
return 0; } @@ -97,7 +100,8 @@ static int lxcProcReaddir(const char *path, void *buf, static int lxcProcOpen(const char *path G_GNUC_UNUSED, struct fuse_file_info *fi G_GNUC_UNUSED) { - if (STRNEQ(path, fuse_meminfo_path)) + if (STRNEQ(path, fuse_meminfo_path) && + STRNEQ(path, fuse_cpuinfo_path)) return -ENOENT;
if ((fi->flags & 3) != O_RDONLY) @@ -125,7 +129,7 @@ static int lxcProcHostRead(char *path, char *buf, size_t size, off_t offset) static int lxcProcReadMeminfo(char *hostpath, virDomainDefPtr def, char *buf, size_t size, off_t offset) { - int res; + int res = -1; FILE *fd = NULL; char *line = NULL; size_t n; @@ -151,7 +155,6 @@ static int lxcProcReadMeminfo(char *hostpath, virDomainDefPtr def, goto cleanup; }
- res = -1; while (getline(&line, &n, fd) > 0) { char *ptr = strchr(line, ':'); if (!ptr) @@ -235,6 +238,70 @@ static int lxcProcReadMeminfo(char *hostpath, virDomainDefPtr def, return res; }
+ +static int lxcProcReadCpuinfo(char *hostpath, virDomainDefPtr def, + char *buf, size_t size, off_t offset) +{ + int res = -1; + FILE *fd = NULL;
I believe that instead of creating the fd as FILE you can use
VIR_AUTOCLOSE fd = -1;
And it will auto close the fd for you when context is gone. Then you won't need VIR_FORCE_FCLOSE().
+ char *line = NULL;
'line' can use g_autofree as well.
+ size_t n; + virBuffer buffer = VIR_BUFFER_INITIALIZER; + virBufferPtr new_cpuinfo = &buffer; + size_t cpu; + size_t nvcpu; + size_t curcpu = 0; + bool get_proc = false; + + fd = fopen(hostpath, "r");
Suggestion: fopen() here is fine, but I believe that you can use utilities like virFileReadAll() as well. It will give you a buffer with the file contents so you don't need to rely on sscanf() and getline() in the loop below.
Hi Daniel, Thanks for your review. I totally agree with you. I just kept this fopen() (and other functions) to keep the same style of that driver. I will resubmit this series with your suggestions and I will include some cleanups also. I think it is better. This comment is related to other reviewed patches from this series too.
+ if (fd == NULL) { + virReportSystemError(errno, _("Cannot open %s"), hostpath); + res = -errno; + goto cleanup; + } + + /* /proc/cpuinfo does not support fseek */ + if (offset > 0) { + res = 0; + goto cleanup; + } + + nvcpu = virDomainDefGetVcpus(def); + while (getline(&line, &n, fd) > 0) { + if (sscanf(line, "processor\t: %zu", &cpu) == 1) { + virDomainVcpuDefPtr vcpu = virDomainDefGetVcpu(def, cpu); + /* VCPU is mapped */ + if (vcpu) { + if (curcpu == nvcpu) + break; + + virBufferAsprintf(new_cpuinfo, "processor\t: %zu\n", + curcpu); + curcpu++; + get_proc = true; + } else { + get_proc = false; + } + } else { + /* It is not a processor index */ + if (get_proc) + virBufferAdd(new_cpuinfo, line, -1); + } + } + + res = strlen(virBufferCurrentContent(new_cpuinfo)); + if (res > size) + res = size; + memcpy(buf, virBufferCurrentContent(new_cpuinfo), res); + + cleanup: + VIR_FREE(line); + virBufferFreeAndReset(new_cpuinfo); + VIR_FORCE_FCLOSE(fd); + return res; +} + + static int lxcProcRead(const char *path G_GNUC_UNUSED, char *buf G_GNUC_UNUSED, size_t size G_GNUC_UNUSED, @@ -254,6 +321,9 @@ static int lxcProcRead(const char *path G_GNUC_UNUSED, if (STREQ(path, fuse_meminfo_path)) { if ((res = lxcProcReadMeminfo(hostpath, def, buf, size, offset)) < 0) res = lxcProcHostRead(hostpath, buf, size, offset); + } else if (STREQ(path, fuse_cpuinfo_path)) { + if ((res = lxcProcReadCpuinfo(hostpath, def, buf, size, offset)) < 0) + res = lxcProcHostRead(hostpath, buf, size, offset); }
VIR_FREE(hostpath);

Native config files sometimes can setup cpuset.cpus to pin som CPUs. Before this, LXC was using a fixed number of 1 VCPU. After this commit, XML definition will generate a dynamic number of VCPUs based on that cgroup attribute. Signed-off-by: Julio Faracco <jcfaracco@gmail.com> --- src/lxc/lxc_container.c | 23 ++++++++++++++++++ src/lxc/lxc_container.h | 2 ++ src/lxc/lxc_native.c | 24 +++++++++++++++++-- .../lxcconf2xml-cpusettune.xml | 2 +- 4 files changed, 48 insertions(+), 3 deletions(-) diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c index 1a2c97c9f4..d3934c8c03 100644 --- a/src/lxc/lxc_container.c +++ b/src/lxc/lxc_container.c @@ -2493,3 +2493,26 @@ int lxcContainerChown(virDomainDefPtr def, const char *path) return 0; } + + +int lxcContainerGetMaxCpusInCpuset(const char *cpuset) +{ + const char *c = cpuset; + int max_cpu = 0; + + while (c) { + int a, b, ret; + + ret = sscanf(c, "%d-%d", &a, &b); + if (ret == 1) + max_cpu++; + else if (ret == 2) + max_cpu += a > b ? a - b + 1 : b - a + 1; + + if (!(c = strchr(c+1, ','))) + break; + c++; + } + + return max_cpu; +} diff --git a/src/lxc/lxc_container.h b/src/lxc/lxc_container.h index 94a6c5309c..6f112e0667 100644 --- a/src/lxc/lxc_container.h +++ b/src/lxc/lxc_container.h @@ -63,3 +63,5 @@ virArch lxcContainerGetAlt32bitArch(virArch arch); int lxcContainerChown(virDomainDefPtr def, const char *path); bool lxcIsBasicMountLocation(const char *path); + +int lxcContainerGetMaxCpusInCpuset(const char *cpuset); diff --git a/src/lxc/lxc_native.c b/src/lxc/lxc_native.c index 02d2bf33e4..409bf00bd2 100644 --- a/src/lxc/lxc_native.c +++ b/src/lxc/lxc_native.c @@ -993,6 +993,24 @@ lxcSetCpusetTune(virDomainDefPtr def, virConfPtr properties) return 0; } + +static int +lxcGetVCpuMax(virConfPtr properties) +{ + g_autofree char *value = NULL; + int vcpumax = 1; + + if (virConfGetValueString(properties, "lxc.cgroup.cpuset.cpus", + &value) > 0) { + vcpumax = lxcContainerGetMaxCpusInCpuset(value); + if (vcpumax > 0) + return vcpumax; + } + + return vcpumax; +} + + static int lxcBlkioDeviceWalkCallback(const char *name, virConfValuePtr value, void *data) { @@ -1132,6 +1150,7 @@ lxcParseConfigString(const char *config, virDomainDefPtr vmdef = NULL; g_autoptr(virConf) properties = NULL; g_autofree char *value = NULL; + int vcpumax; if (!(properties = virConfReadString(config, VIR_CONF_FLAG_LXC_FORMAT))) return NULL; @@ -1155,10 +1174,11 @@ lxcParseConfigString(const char *config, /* Value not handled by the LXC driver, setting to * minimum required to make XML parsing pass */ - if (virDomainDefSetVcpusMax(vmdef, 1, xmlopt) < 0) + vcpumax = lxcGetVCpuMax(properties); + if (virDomainDefSetVcpusMax(vmdef, vcpumax, xmlopt) < 0) goto error; - if (virDomainDefSetVcpus(vmdef, 1) < 0) + if (virDomainDefSetVcpus(vmdef, vcpumax) < 0) goto error; vmdef->nfss = 0; diff --git a/tests/lxcconf2xmldata/lxcconf2xml-cpusettune.xml b/tests/lxcconf2xmldata/lxcconf2xml-cpusettune.xml index 6df089d00f..a1fec12d9b 100644 --- a/tests/lxcconf2xmldata/lxcconf2xml-cpusettune.xml +++ b/tests/lxcconf2xmldata/lxcconf2xml-cpusettune.xml @@ -3,7 +3,7 @@ <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid> <memory unit='KiB'>65536</memory> <currentMemory unit='KiB'>65536</currentMemory> - <vcpu placement='static' cpuset='1-2,5-7'>1</vcpu> + <vcpu placement='static' cpuset='1-2,5-7'>5</vcpu> <numatune> <memory mode='strict' nodeset='1-4'/> </numatune> -- 2.20.1

On 2/16/20 2:11 PM, Julio Faracco wrote:
Native config files sometimes can setup cpuset.cpus to pin som CPUs.
typo: s/som/some Everything else LGTM.
Before this, LXC was using a fixed number of 1 VCPU. After this commit, XML definition will generate a dynamic number of VCPUs based on that cgroup attribute.
Signed-off-by: Julio Faracco <jcfaracco@gmail.com> --- src/lxc/lxc_container.c | 23 ++++++++++++++++++ src/lxc/lxc_container.h | 2 ++ src/lxc/lxc_native.c | 24 +++++++++++++++++-- .../lxcconf2xml-cpusettune.xml | 2 +- 4 files changed, 48 insertions(+), 3 deletions(-)
diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c index 1a2c97c9f4..d3934c8c03 100644 --- a/src/lxc/lxc_container.c +++ b/src/lxc/lxc_container.c @@ -2493,3 +2493,26 @@ int lxcContainerChown(virDomainDefPtr def, const char *path)
return 0; } + + +int lxcContainerGetMaxCpusInCpuset(const char *cpuset) +{ + const char *c = cpuset; + int max_cpu = 0; + + while (c) { + int a, b, ret; + + ret = sscanf(c, "%d-%d", &a, &b); + if (ret == 1) + max_cpu++; + else if (ret == 2) + max_cpu += a > b ? a - b + 1 : b - a + 1; + + if (!(c = strchr(c+1, ','))) + break; + c++; + } + + return max_cpu; +} diff --git a/src/lxc/lxc_container.h b/src/lxc/lxc_container.h index 94a6c5309c..6f112e0667 100644 --- a/src/lxc/lxc_container.h +++ b/src/lxc/lxc_container.h @@ -63,3 +63,5 @@ virArch lxcContainerGetAlt32bitArch(virArch arch); int lxcContainerChown(virDomainDefPtr def, const char *path);
bool lxcIsBasicMountLocation(const char *path); + +int lxcContainerGetMaxCpusInCpuset(const char *cpuset); diff --git a/src/lxc/lxc_native.c b/src/lxc/lxc_native.c index 02d2bf33e4..409bf00bd2 100644 --- a/src/lxc/lxc_native.c +++ b/src/lxc/lxc_native.c @@ -993,6 +993,24 @@ lxcSetCpusetTune(virDomainDefPtr def, virConfPtr properties) return 0; }
+ +static int +lxcGetVCpuMax(virConfPtr properties) +{ + g_autofree char *value = NULL; + int vcpumax = 1; + + if (virConfGetValueString(properties, "lxc.cgroup.cpuset.cpus", + &value) > 0) { + vcpumax = lxcContainerGetMaxCpusInCpuset(value); + if (vcpumax > 0) + return vcpumax; + } + + return vcpumax; +} + + static int lxcBlkioDeviceWalkCallback(const char *name, virConfValuePtr value, void *data) { @@ -1132,6 +1150,7 @@ lxcParseConfigString(const char *config, virDomainDefPtr vmdef = NULL; g_autoptr(virConf) properties = NULL; g_autofree char *value = NULL; + int vcpumax;
if (!(properties = virConfReadString(config, VIR_CONF_FLAG_LXC_FORMAT))) return NULL; @@ -1155,10 +1174,11 @@ lxcParseConfigString(const char *config,
/* Value not handled by the LXC driver, setting to * minimum required to make XML parsing pass */ - if (virDomainDefSetVcpusMax(vmdef, 1, xmlopt) < 0) + vcpumax = lxcGetVCpuMax(properties); + if (virDomainDefSetVcpusMax(vmdef, vcpumax, xmlopt) < 0) goto error;
- if (virDomainDefSetVcpus(vmdef, 1) < 0) + if (virDomainDefSetVcpus(vmdef, vcpumax) < 0) goto error;
vmdef->nfss = 0; diff --git a/tests/lxcconf2xmldata/lxcconf2xml-cpusettune.xml b/tests/lxcconf2xmldata/lxcconf2xml-cpusettune.xml index 6df089d00f..a1fec12d9b 100644 --- a/tests/lxcconf2xmldata/lxcconf2xml-cpusettune.xml +++ b/tests/lxcconf2xmldata/lxcconf2xml-cpusettune.xml @@ -3,7 +3,7 @@ <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid> <memory unit='KiB'>65536</memory> <currentMemory unit='KiB'>65536</currentMemory> - <vcpu placement='static' cpuset='1-2,5-7'>1</vcpu> + <vcpu placement='static' cpuset='1-2,5-7'>5</vcpu> <numatune> <memory mode='strict' nodeset='1-4'/> </numatune>
participants (2)
-
Daniel Henrique Barboza
-
Julio Faracco