[libvirt] [PATCH] macvtap: Work-around failing nl_connect calls (weird problem)

15 Feb 2011

When trying to start / stop a domain with macvtap device (direct type of 
interface) having a device description like this one here

<interface type='direct'>
<source dev='static' mode='vepa'/>
</interface>

then I see netlink related errors when a 'virsh edit' session is 
happening at the same time.

So, to reproduce this error you should try the following (on a kernel 
supporting macvtap):

virsh edit <macvtap domain>       -> do not terminate the edit sessions
virsh start <macvtap domain>      -> works
virsh destroy <macvtap domain>    -> leaves a macvtap device due to 
nl_connect failing
virsh start <macvtap domain>      -> does not start anymore

That should make it fail.

The work-around basically keeps on allocating new netlink library 
handles until the nl_connect() succeeds. New netlink library handles 
cause the next available port (intern to libnl; nl_pid) to be allocated. 
For every ongoing virsh edit session, one new handle seems to be 
required. So for 2 virsh edit session, the 3rd one (usually) works. I do 
not know what in the system is causing this, but my guess it that 
'something' is blocking the same port (nl_pid) -- it may not be in 
libvirt but in a dependent library that's not using libnl (?).

   Signed-off-by: Stefan Berger <stefanb@linux.vnet.ibm.com>

Index: libvirt-acl/src/util/macvtap.c
===================================================================

--- libvirt-acl.orig/src/util/macvtap.c
+++ libvirt-acl/src/util/macvtap.c
@@ -120,13 +120,43 @@ int nlComm(struct nl_msg *nl_msg,
      fd_set readfds;
      int fd;
      int n;
-    struct nl_handle *nlhandle = nl_handle_alloc();
+    struct nl_handle **nlhandles = NULL;
      struct nlmsghdr *nlmsg = nlmsg_hdr(nl_msg);
+    unsigned int idx = 0, num_elms = 1, i;

-    if (!nlhandle)
-        return -1;
+realloc:
+    if (VIR_REALLOC_N(nlhandles, num_elms * sizeof(struct nl_handle *)) 
< 0) {
+        virReportOOMError();
+        rc = -1;
+        goto err_exit;
+    }
+
+    for (i = idx; i < num_elms ; i++)
+        nlhandles[i] = NULL;
+
+next_handle:
+    nlhandles[idx] = nl_handle_alloc();

-    if (nl_connect(nlhandle, NETLINK_ROUTE) < 0) {
+    if (nlhandles[idx] == NULL) {
+        virReportOOMError();
+        rc = -1;
+        goto err_exit;
+    }
+
+    if (nl_connect(nlhandles[idx], NETLINK_ROUTE) < 0) {
+        VIR_DEBUG0("Could not create netlink socket - trying a new one\n");
+         /* get a new handle and keep the ones we have */
+        idx++;
+        if (idx < num_elms)
+            goto next_handle;
+        /* need to reallocate */
+        num_elms += 10;
+        if (idx < 500)
+            goto realloc;
+
+        macvtapError(VIR_ERR_INTERNAL_ERROR, "%s [%s]",
+                     _("Could not create netlink socket"),
+                     nl_geterror());
          rc = -1;
          goto err_exit;
      }
@@ -135,15 +165,16 @@ int nlComm(struct nl_msg *nl_msg,

      nlmsg->nlmsg_pid = getpid();

-    nbytes = nl_send_auto_complete(nlhandle, nl_msg);
+    nbytes = nl_send_auto_complete(nlhandles[idx], nl_msg);
      if (nbytes < 0) {
          virReportSystemError(errno,
-                             "%s", _("cannot send to netlink socket"));
+                             "%s [%s]", _("cannot send to netlink socket"),
+                             nl_geterror());
          rc = -1;
          goto err_exit;
      }

-    fd = nl_socket_get_fd(nlhandle);
+    fd = nl_socket_get_fd(nlhandles[idx]);

      FD_ZERO(&readfds);
      FD_SET(fd, &readfds);
@@ -160,7 +191,7 @@ int nlComm(struct nl_msg *nl_msg,
          goto err_exit;
      }

-    *respbuflen = nl_recv(nlhandle, &nladdr, respbuf, NULL);
+    *respbuflen = nl_recv(nlhandles[idx], &nladdr, respbuf, NULL);
      if (*respbuflen <= 0)
          rc = -1;

@@ -171,7 +202,12 @@ err_exit:
          *respbuflen = 0;
      }

-    nl_handle_destroy(nlhandle);
+    if (nlhandles)
+        for (idx = 0; idx < num_elms; idx++)
+            nl_handle_destroy(nlhandles[idx]);
+
+    VIR_FREE(nlhandles);
+
      return rc;
  }

    

Stefan Berger

Daniel P. Berrange

Stefan Berger

Stefan Berger

Stefan Berger

Stefan Berger

tags

participants (2)