https://bugzilla.redhat.com/622515 - When hot-unplugging CPUs,
libvirt failed to start a guest that had been pinned to CPUs that
were still online, because it was failing to read status from
unrelated offline CPUs.
Tested on a dual-core laptop, where I also discovered that, per
http://www.cyberciti.biz/files/linux-kernel/Documentation/cpu-hotplug.txt,
/sys/devices/system/cpu/cpu0/online does not exist on systems where it
cannot be hot-unplugged.
* src/nodeinfo.c (linuxNodeInfoCPUPopulate): Ignore CPUs that are
currently offline. Detect readdir failure.
(parse_socket): Move guts...
(get_cpu_value): ...to new function, shared with...
(cpu_online): New function.
---
src/nodeinfo.c | 109 +++++++++++++++++++++++++++++++++++++------------------
1 files changed, 73 insertions(+), 36 deletions(-)
diff --git a/src/nodeinfo.c b/src/nodeinfo.c
index 5ec1bcf..6286aa2 100644
--- a/src/nodeinfo.c
+++ b/src/nodeinfo.c
@@ -23,6 +23,7 @@
#include <config.h>
+#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -60,6 +61,58 @@
int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
virNodeInfoPtr nodeinfo);
+/* Return the positive decimal contents of the given
+ * CPU_SYS_PATH/cpu%u/FILE, or -1 on error. If MISSING_OK and the
+ * file could not be found, return 1 instead of an error; this is
+ * because some machines cannot hot-unplug cpu0. */
+static int get_cpu_value(unsigned int cpu, const char *file, bool missing_ok)
+{
+ char *path;
+ FILE *pathfp;
+ char value_str[1024];
+ char *tmp;
+ int value = -1;
+
+ if (virAsprintf(&path, CPU_SYS_PATH "/cpu%u/%s", cpu, file) < 0) {
+ virReportOOMError();
+ return -1;
+ }
+
+ pathfp = fopen(path, "r");
+ if (pathfp == NULL) {
+ if (missing_ok && errno == ENOENT)
+ value = 1;
+ else
+ virReportSystemError(errno, _("cannot open %s"), path);
+ goto cleanup;
+ }
+
+ if (fgets(value_str, sizeof(value_str), pathfp) == NULL) {
+ virReportSystemError(errno, _("cannot read from %s"), path);
+ goto cleanup;
+ }
+ if (virStrToLong_i(value_str, &tmp, 10, &value) < 0) {
+ nodeReportError(VIR_ERR_INTERNAL_ERROR,
+ _("could not convert '%s' to an integer"),
+ value_str);
+ goto cleanup;
+ }
+
+cleanup:
+ if (pathfp)
+ fclose(pathfp);
+ VIR_FREE(path);
+
+ return value;
+}
+
+/* Check if CPU is online via CPU_SYS_PATH/cpu%u/online. Return 1 if online,
+ 0 if offline, and -1 on error. */
+static int cpu_online(unsigned int cpu)
+{
+ return get_cpu_value(cpu, "online", cpu == 0);
+}
+
static unsigned long count_thread_siblings(unsigned int cpu)
{
unsigned long ret = 0;
@@ -106,41 +159,7 @@ cleanup:
static int parse_socket(unsigned int cpu)
{
- char *path;
- FILE *pathfp;
- char socket_str[1024];
- char *tmp;
- int socket = -1;
-
- if (virAsprintf(&path, CPU_SYS_PATH
"/cpu%u/topology/physical_package_id",
- cpu) < 0) {
- virReportOOMError();
- return -1;
- }
-
- pathfp = fopen(path, "r");
- if (pathfp == NULL) {
- virReportSystemError(errno, _("cannot open %s"), path);
- VIR_FREE(path);
- return -1;
- }
-
- if (fgets(socket_str, sizeof(socket_str), pathfp) == NULL) {
- virReportSystemError(errno, _("cannot read from %s"), path);
- goto cleanup;
- }
- if (virStrToLong_i(socket_str, &tmp, 10, &socket) < 0) {
- nodeReportError(VIR_ERR_INTERNAL_ERROR,
- _("could not convert '%s' to an integer"),
- socket_str);
- goto cleanup;
- }
-
-cleanup:
- fclose(pathfp);
- VIR_FREE(path);
-
- return socket;
+ return get_cpu_value(cpu, "topology/physical_package_id", false);
}
int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
@@ -153,6 +172,8 @@ int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
unsigned long cur_threads;
int socket;
unsigned long long socket_mask = 0;
+ unsigned int remaining;
+ int online;
nodeinfo->cpus = 0;
nodeinfo->mhz = 0;
@@ -222,15 +243,25 @@ int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
/* OK, we've parsed what we can out of /proc/cpuinfo. Get the socket
* and thread information from /sys
*/
+ remaining = nodeinfo->cpus;
cpudir = opendir(CPU_SYS_PATH);
if (cpudir == NULL) {
virReportSystemError(errno, _("cannot opendir %s"), CPU_SYS_PATH);
return -1;
}
- while ((cpudirent = readdir(cpudir))) {
+ while (errno = 0, remaining && (cpudirent = readdir(cpudir))) {
if (sscanf(cpudirent->d_name, "cpu%u", &cpu) != 1)
continue;
+ online = cpu_online(cpu);
+ if (online < 0) {
+ closedir(cpudir);
+ return -1;
+ }
+ if (!online)
+ continue;
+ remaining--;
+
socket = parse_socket(cpu);
if (socket < 0) {
closedir(cpudir);
@@ -249,6 +280,12 @@ int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
if (cur_threads > nodeinfo->threads)
nodeinfo->threads = cur_threads;
}
+ if (errno) {
+ virReportSystemError(errno,
+ _("problem reading %s"), CPU_SYS_PATH);
+ closedir(cpudir);
+ return -1;
+ }
closedir(cpudir);
ACK, that looks fine !
thanks !
Daniel
--
Daniel Veillard | libxml Gnome XML XSLT toolkit