Dario Faggioli wrote:
Starting from Xen 4.2, libxl has all the bits and pieces in place
for retrieving an adequate amount of information about the host
NUMA topology. It is therefore possible, after a bit of shuffling,
to arrange those information in the way libvirt wants to present
them to the outside world.
Therefore, with this patch, the <topology> section of the host
capabilities is properly populated, when running on Xen, so that
we can figure out whether or not we're running on a NUMA host,
and what its characteristics are.
[raistlin@Zhaman ~]$ sudo virsh --connect xen:/// capabilities
<capabilities>
<host>
<cpu>
....
<topology>
<cells num='2'>
<cell id='0'>
<memory unit='KiB'>6291456</memory>
<cpus num='8'>
<cpu id='0' socket_id='1' core_id='0'
siblings='0-1'/>
<cpu id='1' socket_id='1' core_id='0'
siblings='0-1'/>
<cpu id='2' socket_id='1' core_id='1'
siblings='2-3'/>
<cpu id='3' socket_id='1' core_id='1'
siblings='2-3'/>
<cpu id='4' socket_id='1' core_id='9'
siblings='4-5'/>
<cpu id='5' socket_id='1' core_id='9'
siblings='4-5'/>
<cpu id='6' socket_id='1' core_id='10'
siblings='6-7'/>
<cpu id='7' socket_id='1' core_id='10'
siblings='6-7'/>
</cpus>
</cell>
<cell id='1'>
<memory unit='KiB'>6881280</memory>
<cpus num='8'>
<cpu id='8' socket_id='0' core_id='0'
siblings='8-9'/>
<cpu id='9' socket_id='0' core_id='0'
siblings='8-9'/>
<cpu id='10' socket_id='0' core_id='1'
siblings='10-11'/>
<cpu id='11' socket_id='0' core_id='1'
siblings='10-11'/>
<cpu id='12' socket_id='0' core_id='9'
siblings='12-13'/>
<cpu id='13' socket_id='0' core_id='9'
siblings='12-13'/>
<cpu id='14' socket_id='0' core_id='10'
siblings='14-15'/>
<cpu id='15' socket_id='0' core_id='10'
siblings='14-15'/>
</cpus>
</cell>
</cells>
</topology>
</host>
....
Signed-off-by: Dario Faggioli <dario.faggioli(a)citrix.com>
Cc: Daniel P. Berrange <berrange(a)redhat.com>
---
Changes from v3:
* updated VIR_*ALLOC* call sites not to invoke virReportOOMError();
* failing at getting NUMA and CPU topology info from libxl are now
considered proper errors, as requested during review;
* s/out/cleanup/ as requested during review.
Changes from v2:
* iterators turned from int to size_t;
* fixed wrong sibling maps if on same node but different socket;
* code motion and error handling, as requested during review.
Changes from v1:
* fixed a typo in the commit message, as requested during review;
* fixed coding style (one function parameters per line and no spaces
between variable definitions), as requested during review;
* avoid zero-filling memory after VIR_ALLOC_N(), since it does that
already, as requested during review;
* improved out of memory error reporting, as requested during review;
* libxlMakeNumaCapabilities() created, accommodating all the NUMA
related additions, instead of having them within
libxlMakeCapabilitiesInternal(), as suggested during review.
---
src/libxl/libxl_conf.c | 140 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 139 insertions(+), 1 deletion(-)
diff --git a/src/libxl/libxl_conf.c b/src/libxl/libxl_conf.c
index 827dfdd..2c84191 100644
--- a/src/libxl/libxl_conf.c
+++ b/src/libxl/libxl_conf.c
@@ -161,6 +161,107 @@ libxlBuildCapabilities(virArch hostarch,
}
static virCapsPtr
+libxlMakeNumaCapabilities(libxl_numainfo *numa_info,
+ int nr_nodes,
+ libxl_cputopology *cpu_topo,
+ int nr_cpus,
+ virCapsPtr caps)
I know we discussed returning an int from this function to indicate
success/failure, and that I agreed with your reasoning [1] to keep the
virCapsPtr. But now that driver init fails if gathering NUMA fails, I
think this really should return an int.
+{
+ virCapsHostNUMACellCPUPtr *cpus = NULL;
+ int *nr_cpus_node = NULL;
+ bool numa_failed = true;
+ size_t i;
+
+ if (VIR_ALLOC_N(cpus, nr_nodes) < 0)
+ goto cleanup;
+
+ if (VIR_ALLOC_N(nr_cpus_node, nr_nodes) < 0)
+ goto cleanup;
+
+ /* For each node, prepare a list of CPUs belonging to that node */
+ for (i = 0; i < nr_cpus; i++) {
+ int node = cpu_topo[i].node;
+
+ if (cpu_topo[i].core == LIBXL_CPUTOPOLOGY_INVALID_ENTRY)
+ continue;
+
+ nr_cpus_node[node]++;
+
+ if (nr_cpus_node[node] == 1) {
+ if (VIR_ALLOC(cpus[node]) < 0)
+ goto cleanup;
+ } else {
+ if (VIR_REALLOC_N(cpus[node], nr_cpus_node[node]) < 0)
+ goto cleanup;
+ }
+
+ /* Mapping between what libxl tells and what libvirt wants */
+ cpus[node][nr_cpus_node[node]-1].id = i;
+ cpus[node][nr_cpus_node[node]-1].socket_id = cpu_topo[i].socket;
+ cpus[node][nr_cpus_node[node]-1].core_id = cpu_topo[i].core;
+ /* Allocate the siblings maps. We will be filling them later */
+ cpus[node][nr_cpus_node[node]-1].siblings = virBitmapNew(nr_cpus);
+ if (!cpus[node][nr_cpus_node[node]-1].siblings) {
+ virReportOOMError();
+ goto cleanup;
+ }
+ }
+
+ /* Let's now populate the siblings bitmaps */
+ for (i = 0; i < nr_cpus; i++) {
+ int node = cpu_topo[i].node;
+ size_t j;
+
+ if (cpu_topo[i].core == LIBXL_CPUTOPOLOGY_INVALID_ENTRY)
+ continue;
+
+ for (j = 0; j < nr_cpus_node[node]; j++) {
+ if (cpus[node][j].socket_id == cpu_topo[i].socket &&
+ cpus[node][j].core_id == cpu_topo[i].core)
+ ignore_value(virBitmapSetBit(cpus[node][j].siblings, i));
+ }
+ }
+
+ for (i = 0; i < nr_nodes; i++) {
+ if (numa_info[i].size == LIBXL_NUMAINFO_INVALID_ENTRY)
+ continue;
+
+ if (virCapabilitiesAddHostNUMACell(caps, i, nr_cpus_node[i],
+ numa_info[i].size / 1024,
+ cpus[i]) < 0) {
+ virCapabilitiesClearHostNUMACellCPUTopology(cpus[i],
+ nr_cpus_node[i]);
+ goto cleanup;
+ }
+
+ /* This is safe, as the CPU list is now stored in the NUMA cell */
+ cpus[i] = NULL;
+ }
+
+ numa_failed = false;
+
+ cleanup:
+ if (numa_failed) {
+ /* Something went wrong: deallocate everything and unref caps */
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("libxenlight failed to build the NUMA topology"));
+
+ for (i = 0; i < nr_nodes; i++) {
+ VIR_FREE(cpus[i]);
+ }
+ virCapabilitiesFreeNUMAInfo(caps);
+
+ if (!virObjectUnref(caps))
+ caps = NULL;
I'm not fond of unref'ing caps and setting to NULL here, outside of the
function where caps was allocated.
I sent a patch to rework the capabilities code [2], which I think will
make it a bit easier to implement this function. What do you think?
Regards,
Jim
[1]
https://www.redhat.com/archives/libvir-list/2013-July/msg00458.html
[2]
https://www.redhat.com/archives/libvir-list/2013-August/msg00543.html
+ }
+
+ VIR_FREE(cpus);
+ VIR_FREE(nr_cpus_node);
+
+ return caps;
+}
+
+static virCapsPtr
libxlMakeCapabilitiesInternal(virArch hostarch,
libxl_physinfo *phy_info,
char *capabilities)
@@ -876,7 +977,11 @@ libxlMakeCapabilities(libxl_ctx *ctx)
{
int err;
libxl_physinfo phy_info;
+ libxl_numainfo *numa_info = NULL;
+ libxl_cputopology *cpu_topo = NULL;
const libxl_version_info *ver_info;
+ int nr_nodes = 0, nr_cpus = 0;
+ virCapsPtr caps;
err = regcomp(&xen_cap_rec, xen_cap_re, REG_EXTENDED);
if (err != 0) {
@@ -900,9 +1005,42 @@ libxlMakeCapabilities(libxl_ctx *ctx)
return NULL;
}
- return libxlMakeCapabilitiesInternal(virArchFromHost(),
+ caps = libxlMakeCapabilitiesInternal(virArchFromHost(),
&phy_info,
ver_info->capabilities);
+
+ /* Check if caps is valid. If it is, it must remain so till the end! */
+ if (caps == NULL)
+ goto cleanup;
+
+ /* Let's try to fetch all the topology information */
+ numa_info = libxl_get_numainfo(ctx, &nr_nodes);
+ if (numa_info == NULL || nr_nodes == 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("libxl_get_numainfo failed"));
+ goto cleanup;
+ } else {
+ cpu_topo = libxl_get_cpu_topology(ctx, &nr_cpus);
+ if (cpu_topo == NULL || nr_cpus == 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("libxl_get_cpu_topology failed"));
+ goto cleanup;
+ }
+ else {
+ /* And add topology information to caps */
+ caps = libxlMakeNumaCapabilities(numa_info,
+ nr_nodes,
+ cpu_topo,
+ nr_cpus,
+ caps);
+ }
+ }
+
+cleanup:
+ libxl_cputopology_list_free(cpu_topo, nr_cpus);
+ libxl_numainfo_list_free(numa_info, nr_nodes);
+
+ return caps;
}
int