[PATCH 0/5] Allow freepage/allocpages on numactl-less builds

See 3/5 for explanation. Michal Prívozník (5): conf: Introduce virCapabilitiesHostNUMAGetMaxNode() virhostmem: Let caller pass max NUMA node to virHostMemGetFreePages virhostmem: Let caller pass max NUMA node to virHostMemAllocPages virhostmem: Handle numactl-less build in hugepages allocation/reporting rpm: Enable numactl on s390x libvirt.spec.in | 2 +- src/conf/capabilities.c | 9 +++++++++ src/conf/capabilities.h | 2 ++ src/libvirt_private.syms | 1 + src/lxc/lxc_driver.c | 23 +++++++++++++++++++++-- src/qemu/qemu_driver.c | 23 +++++++++++++++++++++-- src/util/virhostmem.c | 24 ++++++++++++++++++------ src/util/virhostmem.h | 2 ++ src/vbox/vbox_common.c | 20 +++++++++++++++++--- 9 files changed, 92 insertions(+), 14 deletions(-) -- 2.31.1

This is just a small helper that will be used later. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/conf/capabilities.c | 9 +++++++++ src/conf/capabilities.h | 2 ++ src/libvirt_private.syms | 1 + 3 files changed, 12 insertions(+) diff --git a/src/conf/capabilities.c b/src/conf/capabilities.c index 72d4146ac3..a3e68741a9 100644 --- a/src/conf/capabilities.c +++ b/src/conf/capabilities.c @@ -1433,6 +1433,15 @@ virCapabilitiesHostNUMAGetCpus(virCapsHostNUMA *caps, } +int +virCapabilitiesHostNUMAGetMaxNode(virCapsHostNUMA *caps) +{ + virCapsHostNUMACell *cell = g_ptr_array_index(caps->cells, caps->cells->len - 1); + + return cell->num; +} + + int virCapabilitiesGetNodeInfo(virNodeInfoPtr nodeinfo) { diff --git a/src/conf/capabilities.h b/src/conf/capabilities.h index 1b99202c9b..701878332c 100644 --- a/src/conf/capabilities.h +++ b/src/conf/capabilities.h @@ -328,6 +328,8 @@ virCapabilitiesFormatXML(virCaps *caps); virBitmap *virCapabilitiesHostNUMAGetCpus(virCapsHostNUMA *caps, virBitmap *nodemask); +int virCapabilitiesHostNUMAGetMaxNode(virCapsHostNUMA *caps); + int virCapabilitiesGetNodeInfo(virNodeInfoPtr nodeinfo); int virCapabilitiesInitPages(virCaps *caps); diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index fcb02c21b1..9870c5a37c 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -69,6 +69,7 @@ virCapabilitiesGetNodeInfo; virCapabilitiesHostInitIOMMU; virCapabilitiesHostNUMAAddCell; virCapabilitiesHostNUMAGetCpus; +virCapabilitiesHostNUMAGetMaxNode; virCapabilitiesHostNUMANew; virCapabilitiesHostNUMANewHost; virCapabilitiesHostNUMARef; -- 2.31.1

In all three cases (LXC, QEMU and VBox drivers) the caller has access to host capabilities and thus know the maximum NUMA node. This means, that virHostMemGetFreePages() doesn't have to query it. Querying may fail if libvirt was compiled without numactl support. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/lxc/lxc_driver.c | 12 +++++++++++- src/qemu/qemu_driver.c | 12 +++++++++++- src/util/virhostmem.c | 6 ++---- src/util/virhostmem.h | 1 + src/vbox/vbox_common.c | 12 ++++++++++-- 5 files changed, 35 insertions(+), 8 deletions(-) diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c index 8f2ca19f44..f720cf968d 100644 --- a/src/lxc/lxc_driver.c +++ b/src/lxc/lxc_driver.c @@ -5013,12 +5013,22 @@ lxcNodeGetFreePages(virConnectPtr conn, unsigned long long *counts, unsigned int flags) { + virLXCDriver *driver = conn->privateData; + g_autoptr(virCaps) caps = NULL; + int lastCell; + virCheckFlags(0, -1); if (virNodeGetFreePagesEnsureACL(conn) < 0) return -1; - return virHostMemGetFreePages(npages, pages, startCell, cellCount, counts); + if (!(caps = virLXCDriverGetCapabilities(driver, false))) + return -1; + + lastCell = virCapabilitiesHostNUMAGetMaxNode(caps->host.numa); + + return virHostMemGetFreePages(npages, pages, startCell, cellCount, + lastCell, counts); } diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index f31e13889e..6a065a9c06 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -17428,12 +17428,22 @@ qemuNodeGetFreePages(virConnectPtr conn, unsigned long long *counts, unsigned int flags) { + virQEMUDriver *driver = conn->privateData; + g_autoptr(virCaps) caps = NULL; + int lastCell; + virCheckFlags(0, -1); if (virNodeGetFreePagesEnsureACL(conn) < 0) return -1; - return virHostMemGetFreePages(npages, pages, startCell, cellCount, counts); + if (!(caps = virQEMUDriverGetCapabilities(driver, false))) + return -1; + + lastCell = virCapabilitiesHostNUMAGetMaxNode(caps->host.numa); + + return virHostMemGetFreePages(npages, pages, startCell, cellCount, + lastCell, counts); } diff --git a/src/util/virhostmem.c b/src/util/virhostmem.c index 8aa675cb4f..43dd775fd3 100644 --- a/src/util/virhostmem.c +++ b/src/util/virhostmem.c @@ -843,14 +843,12 @@ virHostMemGetFreePages(unsigned int npages, unsigned int *pages, int startCell, unsigned int cellCount, + int lastCell, unsigned long long *counts) { - int cell, lastCell; + int cell; size_t i, ncounts = 0; - if ((lastCell = virNumaGetMaxNode()) < 0) - return 0; - if (startCell > lastCell) { virReportError(VIR_ERR_INTERNAL_ERROR, _("start cell %d out of range (0-%d)"), diff --git a/src/util/virhostmem.h b/src/util/virhostmem.h index 1369829807..ecfdd71618 100644 --- a/src/util/virhostmem.h +++ b/src/util/virhostmem.h @@ -45,6 +45,7 @@ int virHostMemGetFreePages(unsigned int npages, unsigned int *pages, int startCell, unsigned int cellCount, + int lastCell, unsigned long long *counts); int virHostMemAllocPages(unsigned int npages, diff --git a/src/vbox/vbox_common.c b/src/vbox/vbox_common.c index ecdcdbe88d..89f74b86d6 100644 --- a/src/vbox/vbox_common.c +++ b/src/vbox/vbox_common.c @@ -7598,7 +7598,7 @@ vboxNodeGetFreeMemory(virConnectPtr conn G_GNUC_UNUSED) } static int -vboxNodeGetFreePages(virConnectPtr conn G_GNUC_UNUSED, +vboxNodeGetFreePages(virConnectPtr conn, unsigned int npages, unsigned int *pages, int startCell, @@ -7606,9 +7606,17 @@ vboxNodeGetFreePages(virConnectPtr conn G_GNUC_UNUSED, unsigned long long *counts, unsigned int flags) { + struct _vboxDriver *driver = conn->privateData; + int lastCell; + virCheckFlags(0, -1); - return virHostMemGetFreePages(npages, pages, startCell, cellCount, counts); + virObjectLock(driver); + lastCell = virCapabilitiesHostNUMAGetMaxNode(driver->caps->host.numa); + virObjectUnlock(driver); + + return virHostMemGetFreePages(npages, pages, startCell, + cellCount, lastCell, counts); } static int -- 2.31.1

In all three cases (LXC, QEMU and VBox drivers) the caller has access to host capabilities and thus know the maximum NUMA node. This means, that virHostMemAllocPages() doesn't have to query it. Querying may fail if libvirt was compiled without numactl support. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/lxc/lxc_driver.c | 11 ++++++++++- src/qemu/qemu_driver.c | 11 ++++++++++- src/util/virhostmem.c | 6 ++---- src/util/virhostmem.h | 1 + src/vbox/vbox_common.c | 8 +++++++- 5 files changed, 30 insertions(+), 7 deletions(-) diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c index f720cf968d..e2720a6f89 100644 --- a/src/lxc/lxc_driver.c +++ b/src/lxc/lxc_driver.c @@ -5041,6 +5041,9 @@ lxcNodeAllocPages(virConnectPtr conn, unsigned int cellCount, unsigned int flags) { + virLXCDriver *driver = conn->privateData; + g_autoptr(virCaps) caps = NULL; + int lastCell; bool add = !(flags & VIR_NODE_ALLOC_PAGES_SET); virCheckFlags(VIR_NODE_ALLOC_PAGES_SET, -1); @@ -5048,8 +5051,14 @@ lxcNodeAllocPages(virConnectPtr conn, if (virNodeAllocPagesEnsureACL(conn) < 0) return -1; + if (!(caps = virLXCDriverGetCapabilities(driver, false))) + return -1; + + lastCell = virCapabilitiesHostNUMAGetMaxNode(caps->host.numa); + return virHostMemAllocPages(npages, pageSizes, pageCounts, - startCell, cellCount, add); + startCell, cellCount, + lastCell, add); } diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 6a065a9c06..444e9e5cbc 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -18802,6 +18802,9 @@ qemuNodeAllocPages(virConnectPtr conn, unsigned int cellCount, unsigned int flags) { + virQEMUDriver *driver = conn->privateData; + g_autoptr(virCaps) caps = NULL; + int lastCell; bool add = !(flags & VIR_NODE_ALLOC_PAGES_SET); virCheckFlags(VIR_NODE_ALLOC_PAGES_SET, -1); @@ -18809,8 +18812,14 @@ qemuNodeAllocPages(virConnectPtr conn, if (virNodeAllocPagesEnsureACL(conn) < 0) return -1; + if (!(caps = virQEMUDriverGetCapabilities(driver, false))) + return -1; + + lastCell = virCapabilitiesHostNUMAGetMaxNode(caps->host.numa); + return virHostMemAllocPages(npages, pageSizes, pageCounts, - startCell, cellCount, add); + startCell, cellCount, + lastCell, add); } static int diff --git a/src/util/virhostmem.c b/src/util/virhostmem.c index 43dd775fd3..8734dafb72 100644 --- a/src/util/virhostmem.c +++ b/src/util/virhostmem.c @@ -885,14 +885,12 @@ virHostMemAllocPages(unsigned int npages, unsigned long long *pageCounts, int startCell, unsigned int cellCount, + int lastCell, bool add) { - int cell, lastCell; + int cell; size_t i, ncounts = 0; - if ((lastCell = virNumaGetMaxNode()) < 0) - return 0; - if (startCell > lastCell) { virReportError(VIR_ERR_INTERNAL_ERROR, _("start cell %d out of range (0-%d)"), diff --git a/src/util/virhostmem.h b/src/util/virhostmem.h index ecfdd71618..3265215d84 100644 --- a/src/util/virhostmem.h +++ b/src/util/virhostmem.h @@ -53,4 +53,5 @@ int virHostMemAllocPages(unsigned int npages, unsigned long long *pageCounts, int startCell, unsigned int cellCount, + int lastCell, bool add); diff --git a/src/vbox/vbox_common.c b/src/vbox/vbox_common.c index 89f74b86d6..7334254a36 100644 --- a/src/vbox/vbox_common.c +++ b/src/vbox/vbox_common.c @@ -7628,12 +7628,18 @@ vboxNodeAllocPages(virConnectPtr conn G_GNUC_UNUSED, unsigned int cellCount, unsigned int flags) { + struct _vboxDriver *driver = conn->privateData; + int lastCell; bool add = !(flags & VIR_NODE_ALLOC_PAGES_SET); virCheckFlags(VIR_NODE_ALLOC_PAGES_SET, -1); + virObjectLock(driver); + lastCell = virCapabilitiesHostNUMAGetMaxNode(driver->caps->host.numa); + virObjectUnlock(driver); + return virHostMemAllocPages(npages, pageSizes, pageCounts, - startCell, cellCount, add); + startCell, cellCount, lastCell, add); } static int -- 2.31.1

When using 'virsh freepages' or 'virsh allocpages' then virHostMemGetFreePages() or virHostMemAllocPages() is called, respectively. But the following may happen: libvirt was built without numactl support and thus a fake NUMA node was constructed for capabilities, which means that startCell is going to be 0. But we can't blindly pass startCell = 0 to virNumaGetPageInfo() nor virNumaSetPagePoolSize() because they would operate over node specific path (/sys/devices/system/node/nodeX) rather than NUMA agnostic path (/sys/kernel/mm/hugepages/) and we are not guaranteed that the former exists (kernel might have been built without NUMA support). Resolves:https://bugzilla.redhat.com/show_bug.cgi?id=1978574 Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/util/virhostmem.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/util/virhostmem.c b/src/util/virhostmem.c index 8734dafb72..b13c3fe38e 100644 --- a/src/util/virhostmem.c +++ b/src/util/virhostmem.c @@ -849,6 +849,14 @@ virHostMemGetFreePages(unsigned int npages, int cell; size_t i, ncounts = 0; + if (!virNumaIsAvailable() && lastCell == 0 && + startCell == 0 && cellCount == 1) { + /* As a special case, if we were built without numactl and want to + * fetch info on the fake NUMA node set startCell to -1 to make the + * loop below fetch overall info. */ + startCell = -1; + } + if (startCell > lastCell) { virReportError(VIR_ERR_INTERNAL_ERROR, _("start cell %d out of range (0-%d)"), @@ -891,6 +899,14 @@ virHostMemAllocPages(unsigned int npages, int cell; size_t i, ncounts = 0; + if (!virNumaIsAvailable() && lastCell == 0 && + startCell == 0 && cellCount == 1) { + /* As a special case, if we were built without numactl and want to + * allocate hugepages on the fake NUMA node set startCell to -1 to make + * the loop below operate on NUMA agnostic sysfs paths. */ + startCell = -1; + } + if (startCell > lastCell) { virReportError(VIR_ERR_INTERNAL_ERROR, _("start cell %d out of range (0-%d)"), -- 2.31.1

While s390x doesn't have NUMA nodes it has libnuma which is still helpful as it parses sysfs for us and kernel emulates NUMA#0. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- libvirt.spec.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libvirt.spec.in b/libvirt.spec.in index c3f50224cc..b01379d242 100644 --- a/libvirt.spec.in +++ b/libvirt.spec.in @@ -24,7 +24,7 @@ %define arches_vbox %{arches_x86} %define arches_ceph %{arches_64bit} %define arches_zfs %{arches_x86} %{power64} %{arm} -%define arches_numactl %{arches_x86} %{power64} aarch64 +%define arches_numactl %{arches_x86} %{power64} aarch64 s390x %define arches_numad %{arches_x86} %{power64} aarch64 # The hypervisor drivers that run in libvirtd -- 2.31.1

On Thu, Aug 19, 2021 at 04:27:00PM +0200, Michal Privoznik wrote:
See 3/5 for explanation.
Michal Prívozník (5): conf: Introduce virCapabilitiesHostNUMAGetMaxNode() virhostmem: Let caller pass max NUMA node to virHostMemGetFreePages virhostmem: Let caller pass max NUMA node to virHostMemAllocPages virhostmem: Handle numactl-less build in hugepages allocation/reporting rpm: Enable numactl on s390x
Reviewed-by: Martin Kletzander <mkletzan@redhat.com>
libvirt.spec.in | 2 +- src/conf/capabilities.c | 9 +++++++++ src/conf/capabilities.h | 2 ++ src/libvirt_private.syms | 1 + src/lxc/lxc_driver.c | 23 +++++++++++++++++++++-- src/qemu/qemu_driver.c | 23 +++++++++++++++++++++-- src/util/virhostmem.c | 24 ++++++++++++++++++------ src/util/virhostmem.h | 2 ++ src/vbox/vbox_common.c | 20 +++++++++++++++++--- 9 files changed, 92 insertions(+), 14 deletions(-)
-- 2.31.1
participants (2)
-
Martin Kletzander
-
Michal Privoznik