When trying to start / stop a domain with macvtap device (direct type of
interface) having a device description like this one here
<interface type='direct'>
<source dev='static' mode='vepa'/>
</interface>
then I see netlink related errors when a 'virsh edit' session is
happening at the same time.
So, to reproduce this error you should try the following (on a kernel
supporting macvtap):
virsh edit <macvtap domain> -> do not terminate the edit sessions
virsh start <macvtap domain> -> works
virsh destroy <macvtap domain> -> leaves a macvtap device due to
nl_connect failing
virsh start <macvtap domain> -> does not start anymore
That should make it fail.
The work-around basically keeps on allocating new netlink library
handles until the nl_connect() succeeds. New netlink library handles
cause the next available port (intern to libnl; nl_pid) to be allocated.
For every ongoing virsh edit session, one new handle seems to be
required. So for 2 virsh edit session, the 3rd one (usually) works. I do
not know what in the system is causing this, but my guess it that
'something' is blocking the same port (nl_pid) -- it may not be in
libvirt but in a dependent library that's not using libnl (?).
Signed-off-by: Stefan Berger <stefanb(a)linux.vnet.ibm.com>
Index: libvirt-acl/src/util/macvtap.c
===================================================================
--- libvirt-acl.orig/src/util/macvtap.c
+++ libvirt-acl/src/util/macvtap.c
@@ -120,13 +120,43 @@ int nlComm(struct nl_msg *nl_msg,
fd_set readfds;
int fd;
int n;
- struct nl_handle *nlhandle = nl_handle_alloc();
+ struct nl_handle **nlhandles = NULL;
struct nlmsghdr *nlmsg = nlmsg_hdr(nl_msg);
+ unsigned int idx = 0, num_elms = 1, i;
- if (!nlhandle)
- return -1;
+realloc:
+ if (VIR_REALLOC_N(nlhandles, num_elms * sizeof(struct nl_handle *))
< 0) {
+ virReportOOMError();
+ rc = -1;
+ goto err_exit;
+ }
+
+ for (i = idx; i < num_elms ; i++)
+ nlhandles[i] = NULL;
+
+next_handle:
+ nlhandles[idx] = nl_handle_alloc();
- if (nl_connect(nlhandle, NETLINK_ROUTE) < 0) {
+ if (nlhandles[idx] == NULL) {
+ virReportOOMError();
+ rc = -1;
+ goto err_exit;
+ }
+
+ if (nl_connect(nlhandles[idx], NETLINK_ROUTE) < 0) {
+ VIR_DEBUG0("Could not create netlink socket - trying a new one\n");
+ /* get a new handle and keep the ones we have */
+ idx++;
+ if (idx < num_elms)
+ goto next_handle;
+ /* need to reallocate */
+ num_elms += 10;
+ if (idx < 500)
+ goto realloc;
+
+ macvtapError(VIR_ERR_INTERNAL_ERROR, "%s [%s]",
+ _("Could not create netlink socket"),
+ nl_geterror());
rc = -1;
goto err_exit;
}
@@ -135,15 +165,16 @@ int nlComm(struct nl_msg *nl_msg,
nlmsg->nlmsg_pid = getpid();
- nbytes = nl_send_auto_complete(nlhandle, nl_msg);
+ nbytes = nl_send_auto_complete(nlhandles[idx], nl_msg);
if (nbytes < 0) {
virReportSystemError(errno,
- "%s", _("cannot send to netlink
socket"));
+ "%s [%s]", _("cannot send to netlink
socket"),
+ nl_geterror());
rc = -1;
goto err_exit;
}
- fd = nl_socket_get_fd(nlhandle);
+ fd = nl_socket_get_fd(nlhandles[idx]);
FD_ZERO(&readfds);
FD_SET(fd, &readfds);
@@ -160,7 +191,7 @@ int nlComm(struct nl_msg *nl_msg,
goto err_exit;
}
- *respbuflen = nl_recv(nlhandle, &nladdr, respbuf, NULL);
+ *respbuflen = nl_recv(nlhandles[idx], &nladdr, respbuf, NULL);
if (*respbuflen <= 0)
rc = -1;
@@ -171,7 +202,12 @@ err_exit:
*respbuflen = 0;
}
- nl_handle_destroy(nlhandle);
+ if (nlhandles)
+ for (idx = 0; idx < num_elms; idx++)
+ nl_handle_destroy(nlhandles[idx]);
+
+ VIR_FREE(nlhandles);
+
return rc;
}