On Wed, Apr 29, 2015 at 09:53:31AM +0200, Andrea Bolognani wrote:
numa_node_to_cpus() uses an internal cache to store the mapping
between NUMA nodes and CPUs (see Bug #1213835), which means
long-running processes such as libvirtd will get stale data if
CPU hotplugging is used during their lifetime.
The function has been rewritten to collect the information
directly from sysfs.
It might be worth noting that numactl does that anyway.
---
src/util/virnuma.c | 158 +++++++++++++++++++++++++++--------------------------
1 file changed, 81 insertions(+), 77 deletions(-)
diff --git a/src/util/virnuma.c b/src/util/virnuma.c
index 669192a..ccc356b 100644
--- a/src/util/virnuma.c
+++ b/src/util/virnuma.c
@@ -1,7 +1,7 @@
/*
* virnuma.c: helper APIs for managing numa
*
- * Copyright (C) 2011-2014 Red Hat, Inc.
+ * Copyright (C) 2011-2015 Red Hat, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@@ -234,81 +234,6 @@ virNumaGetNodeMemory(int node,
}
-/**
- * virNumaGetNodeCPUs:
- * @node: identifier of the requested NUMA node
- * @cpus: returns a bitmap of CPUs in @node
- *
- * Returns count of CPUs in the selected node and sets the map of the cpus to
- * @cpus. On error if the @node doesn't exist in the system this function
- * returns -2 and sets @cpus to NULL. On other errors -1 is returned, @cpus
- * is set to NULL and an error is reported.
- */
-
-# define n_bits(var) (8 * sizeof(var))
-# define MASK_CPU_ISSET(mask, cpu) \
- (((mask)[((cpu) / n_bits(*(mask)))] >> ((cpu) % n_bits(*(mask)))) & 1)
-int
-virNumaGetNodeCPUs(int node,
- virBitmapPtr *cpus)
-{
- unsigned long *mask = NULL;
- unsigned long *allonesmask = NULL;
- virBitmapPtr cpumap = NULL;
- int ncpus = 0;
- int max_n_cpus = virNumaGetMaxCPUs();
- int mask_n_bytes = max_n_cpus / 8;
- size_t i;
- int ret = -1;
-
- *cpus = NULL;
-
- if (VIR_ALLOC_N(mask, mask_n_bytes / sizeof(*mask)) < 0)
- goto cleanup;
-
- if (VIR_ALLOC_N(allonesmask, mask_n_bytes / sizeof(*mask)) < 0)
- goto cleanup;
-
- memset(allonesmask, 0xff, mask_n_bytes);
-
- /* The first time this returns -1, ENOENT if node doesn't exist... */
- if (numa_node_to_cpus(node, mask, mask_n_bytes) < 0) {
- VIR_WARN("NUMA topology for cell %d is not available, ignoring",
node);
- ret = -2;
- goto cleanup;
- }
-
- /* second, third... times it returns an all-1's mask */
- if (memcmp(mask, allonesmask, mask_n_bytes) == 0) {
- VIR_DEBUG("NUMA topology for cell %d is invalid, ignoring", node);
- ret = -2;
- goto cleanup;
- }
-
- if (!(cpumap = virBitmapNew(max_n_cpus)))
- goto cleanup;
-
- for (i = 0; i < max_n_cpus; i++) {
- if (MASK_CPU_ISSET(mask, i)) {
- ignore_value(virBitmapSetBit(cpumap, i));
- ncpus++;
- }
- }
-
- *cpus = cpumap;
- cpumap = NULL;
- ret = ncpus;
-
- cleanup:
- VIR_FREE(mask);
- VIR_FREE(allonesmask);
- virBitmapFree(cpumap);
-
- return ret;
-}
-# undef MASK_CPU_ISSET
-# undef n_bits
-
#else /* !WITH_NUMACTL */
int
@@ -352,6 +277,84 @@ virNumaGetNodeMemory(int node ATTRIBUTE_UNUSED,
return -1;
}
+#endif /* !WITH_NUMACTL */
+
+#ifdef __linux__
+
Let me get this straight. So if you were not on linux, but had
numactl installed, we would get some bad info from the library in case
the cached data didn't reflect the change made in the system, but
after your change, we wouldn't be able to get any data at all? That
seems nasty, could we at least fall back to the previous behaviour,
even though it has some bugs?
Looking at the code of libnuma, it looks like it uses the sysfs even
if it's not on linux. Could we throw away the #ifdef __linux__ and
just error out in case any of the directories doesn't exist?
Other than that, the patch looks fine. (one more nit pick down below)
+# define SYSFS_SYSTEM_PATH "/sys/devices/system"
+
+/**
+ * virNumaGetNodeCPUs:
+ * @node: identifier of the requested NUMA node
+ * @cpus: returns a bitmap of CPUs in @node
+ *
+ * Returns count of CPUs in the selected node and sets the map of the cpus to
+ * @cpus. On error if the @node doesn't exist in the system this function
+ * returns -2 and sets @cpus to NULL. On other errors -1 is returned, @cpus
+ * is set to NULL and an error is reported.
+ */
+int
+virNumaGetNodeCPUs(int node,
+ virBitmapPtr *cpus)
+{
+ virBitmapPtr cpumap = NULL;
+ char *sysfs_node_path;
+ DIR *node_dir;
+ struct dirent *node_dirent;
+ int direrr;
+ int cpu;
+ int online;
+ int ncpus = 0;
+ int ret = -1;
+
+ if (virAsprintf(&sysfs_node_path, "%s/node/node%d",
+ SYSFS_SYSTEM_PATH, node) < 0)
+ goto out;
Just returning -1 here doesn't send anyone looking down the code and
you can throw away the 'out' label.
+
+ if ((node_dir = opendir(sysfs_node_path)) == NULL) {
+ if (errno == ENOENT)
+ ret = -2;
+ goto cleanup;
+ }
+
+ if ((cpumap = virBitmapNew(virNumaGetMaxCPUs())) == NULL)
+ goto cleanup;
+
+ while ((direrr = virDirRead(node_dir, &node_dirent, sysfs_node_path)) > 0) {
+ if (sscanf(node_dirent->d_name, "cpu%u", &cpu) != 1)
+ continue;
+
+ if ((online = nodeGetCPUValue(sysfs_node_path, cpu, "online", 1)) <
0)
+ goto cleanup;
+
+ if (online) {
+ if (virBitmapSetBit(cpumap, cpu) < 0)
+ goto cleanup;
+ ncpus++;
+ }
+ }
+
+ if (direrr < 0)
+ goto cleanup;
+
+ if (cpus != NULL) {
+ *cpus = cpumap;
+ cpumap = NULL;
+ }
+
+ ret = ncpus;
+
+ cleanup:
+ virBitmapFree(cpumap);
+ if (node_dir != NULL)
+ closedir(node_dir);
+ VIR_FREE(sysfs_node_path);
+
+ out:
+ return ret;
+}
+
+#else /* !__linux__ */
int
virNumaGetNodeCPUs(int node ATTRIBUTE_UNUSED,
@@ -363,7 +366,8 @@ virNumaGetNodeCPUs(int node ATTRIBUTE_UNUSED,
_("NUMA isn't available on this host"));
return -1;
}
-#endif /* !WITH_NUMACTL */
+
+#endif /* !__linux__ */
/**
* virNumaGetMaxCPUs:
--
2.1.0
--
libvir-list mailing list
libvir-list(a)redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list