[libvirt] [PATCH] libxl: implement NUMA capabilities reporting

From: Dario Faggioli <dario.faggioli@citrix.com> Starting from Xen 4.2, libxl has all the bits and pieces in place for retrieving an adequate amount of information about the host NUMA topology. It is therefore possible, after a bit of shuffling, to arrange those information in the way libvirt wants to present them to the outside world. Therefore, with this patch, the <topology> section of the host capabilities is properly populated, when running on Xen, so that we can figure out whether or not we're running on a NUMA host, and what its characteristics are. [raistlin@Zhaman ~]$ sudo virsh --connect xen:/// capabilities <capabilities> <host> <cpu> .... <topology> <cells num='2'> <cell id='0'> <memory unit='KiB'>6291456</memory> <cpus num='8'> <cpu id='0' socket_id='1' core_id='0' siblings='0-1'/> <cpu id='1' socket_id='1' core_id='0' siblings='0-1'/> <cpu id='2' socket_id='1' core_id='1' siblings='2-3'/> <cpu id='3' socket_id='1' core_id='1' siblings='2-3'/> <cpu id='4' socket_id='1' core_id='9' siblings='4-5'/> <cpu id='5' socket_id='1' core_id='9' siblings='4-5'/> <cpu id='6' socket_id='1' core_id='10' siblings='6-7'/> <cpu id='7' socket_id='1' core_id='10' siblings='6-7'/> </cpus> </cell> <cell id='1'> <memory unit='KiB'>6881280</memory> <cpus num='8'> <cpu id='8' socket_id='0' core_id='0' siblings='8-9'/> <cpu id='9' socket_id='0' core_id='0' siblings='8-9'/> <cpu id='10' socket_id='0' core_id='1' siblings='10-11'/> <cpu id='11' socket_id='0' core_id='1' siblings='10-11'/> <cpu id='12' socket_id='0' core_id='9' siblings='12-13'/> <cpu id='13' socket_id='0' core_id='9' siblings='12-13'/> <cpu id='14' socket_id='0' core_id='10' siblings='14-15'/> <cpu id='15' socket_id='0' core_id='10' siblings='14-15'/> </cpus> </cell> </cells> </topology> </host> .... --- This would be a nice addtion to libvirt 1.1.2, wrapping up Dario's initial work on NUMA support in the libxl driver. Since he is on vacation the next weeks, I rebased his patch on the recent libxl capabilities cleanup. src/libxl/libxl_conf.c | 116 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) diff --git a/src/libxl/libxl_conf.c b/src/libxl/libxl_conf.c index 73e2d49..6b0e89e 100644 --- a/src/libxl/libxl_conf.c +++ b/src/libxl/libxl_conf.c @@ -101,6 +101,119 @@ libxlCapsInitHost(libxl_ctx *ctx, virCapsPtr caps) } static int +libxlCapsInitNuma(libxl_ctx *ctx, virCapsPtr caps) +{ + libxl_numainfo *numa_info = NULL; + libxl_cputopology *cpu_topo = NULL; + int nr_nodes = 0, nr_cpus = 0; + virCapsHostNUMACellCPUPtr *cpus = NULL; + int *nr_cpus_node = NULL; + size_t i; + int ret = -1; + + /* Let's try to fetch all the topology information */ + numa_info = libxl_get_numainfo(ctx, &nr_nodes); + if (numa_info == NULL || nr_nodes == 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("libxl_get_numainfo failed")); + goto cleanup; + } else { + cpu_topo = libxl_get_cpu_topology(ctx, &nr_cpus); + if (cpu_topo == NULL || nr_cpus == 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("libxl_get_cpu_topology failed")); + goto cleanup; + } + } + + if (VIR_ALLOC_N(cpus, nr_nodes) < 0) + goto cleanup; + + if (VIR_ALLOC_N(nr_cpus_node, nr_nodes) < 0) + goto cleanup; + + /* For each node, prepare a list of CPUs belonging to that node */ + for (i = 0; i < nr_cpus; i++) { + int node = cpu_topo[i].node; + + if (cpu_topo[i].core == LIBXL_CPUTOPOLOGY_INVALID_ENTRY) + continue; + + nr_cpus_node[node]++; + + if (nr_cpus_node[node] == 1) { + if (VIR_ALLOC(cpus[node]) < 0) + goto cleanup; + } else { + if (VIR_REALLOC_N(cpus[node], nr_cpus_node[node]) < 0) + goto cleanup; + } + + /* Mapping between what libxl tells and what libvirt wants */ + cpus[node][nr_cpus_node[node]-1].id = i; + cpus[node][nr_cpus_node[node]-1].socket_id = cpu_topo[i].socket; + cpus[node][nr_cpus_node[node]-1].core_id = cpu_topo[i].core; + /* Allocate the siblings maps. We will be filling them later */ + cpus[node][nr_cpus_node[node]-1].siblings = virBitmapNew(nr_cpus); + if (!cpus[node][nr_cpus_node[node]-1].siblings) { + virReportOOMError(); + goto cleanup; + } + } + + /* Let's now populate the siblings bitmaps */ + for (i = 0; i < nr_cpus; i++) { + int node = cpu_topo[i].node; + size_t j; + + if (cpu_topo[i].core == LIBXL_CPUTOPOLOGY_INVALID_ENTRY) + continue; + + for (j = 0; j < nr_cpus_node[node]; j++) { + if (cpus[node][j].socket_id == cpu_topo[i].socket && + cpus[node][j].core_id == cpu_topo[i].core) + ignore_value(virBitmapSetBit(cpus[node][j].siblings, i)); + } + } + + for (i = 0; i < nr_nodes; i++) { + if (numa_info[i].size == LIBXL_NUMAINFO_INVALID_ENTRY) + continue; + + if (virCapabilitiesAddHostNUMACell(caps, i, nr_cpus_node[i], + numa_info[i].size / 1024, + cpus[i]) < 0) { + virCapabilitiesClearHostNUMACellCPUTopology(cpus[i], + nr_cpus_node[i]); + goto cleanup; + } + + /* This is safe, as the CPU list is now stored in the NUMA cell */ + cpus[i] = NULL; + } + + ret = 0; + + cleanup: + if (ret != 0) { + /* Something went wrong: deallocate everything and unref caps */ + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("libxenlight failed to build the NUMA topology")); + + for (i = 0; i < nr_nodes; i++) + VIR_FREE(cpus[i]); + virCapabilitiesFreeNUMAInfo(caps); + } + + VIR_FREE(cpus); + VIR_FREE(nr_cpus_node); + libxl_cputopology_list_free(cpu_topo, nr_cpus); + libxl_numainfo_list_free(numa_info, nr_nodes); + + return ret; +} + +static int libxlCapsInitGuests(libxl_ctx *ctx, virCapsPtr caps) { const libxl_version_info *ver_info; @@ -880,6 +993,9 @@ libxlMakeCapabilities(libxl_ctx *ctx) if (libxlCapsInitHost(ctx, caps) < 0) goto error; + if (libxlCapsInitNuma(ctx, caps) < 0) + goto error; + if (libxlCapsInitGuests(ctx, caps) < 0) goto error; -- 1.8.1.4

On Fri, Aug 16, 2013 at 03:46:29PM -0600, Jim Fehlig wrote:
static int +libxlCapsInitNuma(libxl_ctx *ctx, virCapsPtr caps) +{ + libxl_numainfo *numa_info = NULL; + libxl_cputopology *cpu_topo = NULL; + int nr_nodes = 0, nr_cpus = 0; + virCapsHostNUMACellCPUPtr *cpus = NULL; + int *nr_cpus_node = NULL; + size_t i; + int ret = -1; + + /* Let's try to fetch all the topology information */ + numa_info = libxl_get_numainfo(ctx, &nr_nodes); + if (numa_info == NULL || nr_nodes == 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("libxl_get_numainfo failed"));
You're reporting a useful error here....
+ goto cleanup; + } else { + cpu_topo = libxl_get_cpu_topology(ctx, &nr_cpus); + if (cpu_topo == NULL || nr_cpus == 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("libxl_get_cpu_topology failed"));
And here, and so on....
+ + ret = 0; + + cleanup: + if (ret != 0) { + /* Something went wrong: deallocate everything and unref caps */ + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("libxenlight failed to build the NUMA topology"));
And overwriting the error with something useless. Just remove this call to virReportError. Add a VIR_DEBUG log in its place if you want something to highlight the situation in debugging modes.
+ + for (i = 0; i < nr_nodes; i++) + VIR_FREE(cpus[i]); + virCapabilitiesFreeNUMAInfo(caps); + } + + VIR_FREE(cpus); + VIR_FREE(nr_cpus_node); + libxl_cputopology_list_free(cpu_topo, nr_cpus); + libxl_numainfo_list_free(numa_info, nr_nodes); + + return ret; +} + +static int libxlCapsInitGuests(libxl_ctx *ctx, virCapsPtr caps) { const libxl_version_info *ver_info; @@ -880,6 +993,9 @@ libxlMakeCapabilities(libxl_ctx *ctx) if (libxlCapsInitHost(ctx, caps) < 0) goto error;
+ if (libxlCapsInitNuma(ctx, caps) < 0) + goto error; + if (libxlCapsInitGuests(ctx, caps) < 0) goto error;
ACK if that change above is made before pushing Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

Daniel P. Berrange wrote:
On Fri, Aug 16, 2013 at 03:46:29PM -0600, Jim Fehlig wrote:
static int +libxlCapsInitNuma(libxl_ctx *ctx, virCapsPtr caps) +{ + libxl_numainfo *numa_info = NULL; + libxl_cputopology *cpu_topo = NULL; + int nr_nodes = 0, nr_cpus = 0; + virCapsHostNUMACellCPUPtr *cpus = NULL; + int *nr_cpus_node = NULL; + size_t i; + int ret = -1; + + /* Let's try to fetch all the topology information */ + numa_info = libxl_get_numainfo(ctx, &nr_nodes); + if (numa_info == NULL || nr_nodes == 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("libxl_get_numainfo failed"));
You're reporting a useful error here....
+ goto cleanup; + } else { + cpu_topo = libxl_get_cpu_topology(ctx, &nr_cpus); + if (cpu_topo == NULL || nr_cpus == 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("libxl_get_cpu_topology failed"));
And here, and so on....
+ + ret = 0; + + cleanup: + if (ret != 0) { + /* Something went wrong: deallocate everything and unref caps */ + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("libxenlight failed to build the NUMA topology"));
And overwriting the error with something useless. Just remove this call to virReportError. Add a VIR_DEBUG log in its place if you want something to highlight the situation in debugging modes.
Thanks for catching that. I've removed the unnecessary error reporting and pushed the patch. Regards, Jim
+ + for (i = 0; i < nr_nodes; i++) + VIR_FREE(cpus[i]); + virCapabilitiesFreeNUMAInfo(caps); + } + + VIR_FREE(cpus); + VIR_FREE(nr_cpus_node); + libxl_cputopology_list_free(cpu_topo, nr_cpus); + libxl_numainfo_list_free(numa_info, nr_nodes); + + return ret; +} + +static int libxlCapsInitGuests(libxl_ctx *ctx, virCapsPtr caps) { const libxl_version_info *ver_info; @@ -880,6 +993,9 @@ libxlMakeCapabilities(libxl_ctx *ctx) if (libxlCapsInitHost(ctx, caps) < 0) goto error;
+ if (libxlCapsInitNuma(ctx, caps) < 0) + goto error; + if (libxlCapsInitGuests(ctx, caps) < 0) goto error;
ACK if that change above is made before pushing
Daniel

On 08/16/2013 05:46 PM, Jim Fehlig wrote:
From: Dario Faggioli <dario.faggioli@citrix.com>
...snip...
+ + cleanup: + if (ret != 0) { + for (i = 0; i < nr_nodes; i++) + VIR_FREE(cpus[i]); + virCapabilitiesFreeNUMAInfo(caps); + } +
Coverity got grumpy with respect to the above loop. While I can only assume logically that 'nr_nodes' is not changed if libxl_get_numainfo() returns NULL, Coverity doesn't assume that. Also, even if libxl_get_numainfo() did return data and nr_nodes had a value, if the "else" condition fails, eg "if (cpu_topo == NULL || nr_cpus == 0) {", then 'nr_nodes > 0', but 'cpus' is still NULL, which will cause sudden death syndrome :-). The following resolves Coverity's complaint and keeps things safer: - for (i = 0; i < nr_nodes; i++) + for (i = 0; cpus && i < nr_nodes; i++) John

On 08/20/2013 11:01 AM, John Ferlan wrote:
The following resolves Coverity's complaint and keeps things safer:
- for (i = 0; i < nr_nodes; i++) + for (i = 0; cpus && i < nr_nodes; i++)
Pre-approved ACK if you want to work that into a formal patch. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org
participants (4)
-
Daniel P. Berrange
-
Eric Blake
-
Jim Fehlig
-
John Ferlan