This solves the problem detailed in:
https://bugzilla.redhat.com/show_bug.cgi?id=816465
and further detailed in
https://www.redhat.com/archives/libvir-list/2012-May/msg00202.htm
A short explanation is included in the comments of the patch itself.
Even with ACK, I will wait to push this until I have verification that
it does not break lldpad<-->libvirtd communication (if it does, I may
need to use the nl_handle allocated during virNetlinkStartup() for
virNetlinkEventServiceStart()).
---
daemon/libvirtd.c | 6 +++++
src/libvirt_private.syms | 2 ++
src/util/virnetlink.c | 67 +++++++++++++++++++++++++++++++++++++++++++++-
src/util/virnetlink.h | 5 +++-
4 files changed, 78 insertions(+), 2 deletions(-)
diff --git a/daemon/libvirtd.c b/daemon/libvirtd.c
index b098f6a..5d57b50 100644
--- a/daemon/libvirtd.c
+++ b/daemon/libvirtd.c
@@ -1007,6 +1007,11 @@ int main(int argc, char **argv) {
goto cleanup;
}
+ if (virNetlinkStartup() < 0) {
+ ret = VIR_DAEMON_ERR_INIT;
+ goto cleanup;
+ }
+
if (!(srv = virNetServerNew(config->min_workers,
config->max_workers,
config->prio_workers,
@@ -1143,6 +1148,7 @@ cleanup:
virNetServerProgramFree(qemuProgram);
virNetServerClose(srv);
virNetServerFree(srv);
+ virNetlinkShutdown();
if (statuswrite != -1) {
if (ret != 0) {
/* Tell parent of daemon what failed */
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index d4038b2..e911774 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -1330,6 +1330,8 @@ virNetlinkEventRemoveClient;
virNetlinkEventServiceIsRunning;
virNetlinkEventServiceStop;
virNetlinkEventServiceStart;
+virNetlinkShutdown;
+virNetlinkStartup;
# virnetmessage.h
diff --git a/src/util/virnetlink.c b/src/util/virnetlink.c
index b2e9d51..a249e94 100644
--- a/src/util/virnetlink.c
+++ b/src/util/virnetlink.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2010-2011 Red Hat, Inc.
+ * Copyright (C) 2010-2012 Red Hat, Inc.
* Copyright (C) 2010-2012 IBM Corporation
*
* This library is free software; you can redistribute it and/or
@@ -88,10 +88,63 @@ static int nextWatch = 1;
# define NETLINK_EVENT_ALLOC_EXTENT 10
static virNetlinkEventSrvPrivatePtr server = NULL;
+static struct nl_handle *placeholder_nlhandle = NULL;
/* Function definitions */
/**
+ * virNetlinkStartup:
+ *
+ * Perform any initialization that needs to take place before the
+ * program starts up worker threads. This is currently used to assure
+ * that an nl_handle is allocated prior to any attempts to bind a
+ * netlink socket. For a discussion of why this is necessary, please
+ * see the following email message:
+ *
+ *
https://www.redhat.com/archives/libvir-list/2012-May/msg00202.html
+ *
+ * The short version is that, without this placeholder allocation of
+ * an nl_handle that is never used, it is possible for nl_connect() in
+ * one thread to collide with a direct bind() of a netlink socket in
+ * another thread, leading to failure of the operation (which could
+ * lead to failure of libvirtd to start). Since getaddrinfo() (used by
+ * libvirtd in virSocketAddrParse, which is called quite frequently
+ * during startup) directly calls bind() on a netlink socket, this is
+ * actually a very common occurence (15-20% failure rate on some
+ * hardware).
+ *
+ * Returns 0 on success, -1 on failure.
+ */
+int
+virNetlinkStartup(void)
+{
+ if (placeholder_nlhandle)
+ return 0;
+ placeholder_nlhandle = nl_handle_alloc();
+ if (!placeholder_nlhandle) {
+ virReportSystemError(errno, "%s",
+ _("cannot allocate placeholder nlhandle for
netlink"));
+ return -1;
+ }
+ return 0;
+}
+
+/**
+ * virNetlinkShutdown:
+ *
+ * Undo any initialization done by virNetlinkStartup. This currently
+ * destroys the placeholder nl_handle.
+ */
+void
+virNetlinkShutdown(void)
+{
+ if (placeholder_nlhandle) {
+ nl_handle_destroy(placeholder_nlhandle);
+ placeholder_nlhandle = NULL;
+ }
+}
+
+/**
* virNetlinkCommand:
* @nlmsg: pointer to netlink message
* @respbuf: pointer to pointer where response buffer will be allocated
@@ -535,6 +588,18 @@ static const char *unsupported = N_("libnl was not available at
build time");
static const char *unsupported = N_("not supported on non-linux platforms");
# endif
+int
+virNetlinkStartup(void)
+{
+ return 0;
+}
+
+void
+virNetlinkShutdown(void)
+{
+ return;
+}
+
int virNetlinkCommand(struct nl_msg *nl_msg ATTRIBUTE_UNUSED,
unsigned char **respbuf ATTRIBUTE_UNUSED,
unsigned int *respbuflen ATTRIBUTE_UNUSED,
diff --git a/src/util/virnetlink.h b/src/util/virnetlink.h
index a72612e..93df59a 100644
--- a/src/util/virnetlink.h
+++ b/src/util/virnetlink.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2010-2011 Red Hat, Inc.
+ * Copyright (C) 2010-2012 Red Hat, Inc.
* Copyright (C) 2010-2012 IBM Corporation
*
* This library is free software; you can redistribute it and/or
@@ -35,6 +35,9 @@ struct nlattr;
# endif /* __linux__ */
+int virNetlinkStartup(void);
+void virNetlinkShutdown(void);
+
int virNetlinkCommand(struct nl_msg *nl_msg,
unsigned char **respbuf, unsigned int *respbuflen,
int nl_pid);
--
1.7.10