[libvirt] [PATCH] virNetSocketNewConnectUNIX: Use flocks when spawning a daemon

2 Apr 2015

https://bugzilla.redhat.com/show_bug.cgi?id=1200149

Even though we have a mutex mechanism so that two clients don't spawn
two daemons, it's not strong enough. It can happen that while one
client is spawning the daemon, the other one fails to connect.
Basically two possible errors can happen:

  error: Failed to connect socket to '/home/mprivozn/.cache/libvirt/libvirt-sock': Connection refused

or:

  error: Failed to connect socket to '/home/mprivozn/.cache/libvirt/libvirt-sock': No such file or directory

The problem in both cases is, the daemon is only starting up, while we
are trying to connect (and fail). We should postpone the connecting
phase until the daemon is started (by the other thread that is
spawning it). In order to do that, create a file lock 'libvirt-lock'
in the directory where session daemon would create its socket. So even
when called from multiple processes, spawning a daemon will serialize
on the file lock. So only the first to come will spawn the daemon.

Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
---
 src/rpc/virnetsocket.c | 46 ++++++++++++++++++++++++++++++----------------
 1 file changed, 30 insertions(+), 16 deletions(-)

diff --git a/src/rpc/virnetsocket.c b/src/rpc/virnetsocket.c
index 6b019cc..8d8b6e0 100644
--- a/src/rpc/virnetsocket.c
+++ b/src/rpc/virnetsocket.c
@@ -545,8 +545,10 @@ int virNetSocketNewConnectUNIX(const char *path,
 {
     char *binname = NULL;
     char *pidpath = NULL;
+    char *lockpath = NULL;
     int fd, passfd = -1;
     int pidfd = -1;
+    int lockfd = -1;
     virSocketAddr localAddr;
     virSocketAddr remoteAddr;
 
@@ -561,6 +563,22 @@ int virNetSocketNewConnectUNIX(const char *path,
         return -1;
     }
 
+    if (spawnDaemon) {
+        if (virPidFileConstructPath(false, NULL, "libvirt-lock", &lockpath) < 0)
+            goto error;
+
+        if ((lockfd = open(lockpath, O_RDWR | O_CREAT, 0600)) < 0 ||
+            virSetCloseExec(lockfd) < 0) {
+            virReportSystemError(errno, _("Unable to create lock '%s'"), lockpath);
+            goto error;
+        }
+
+        if (virFileLock(lockfd, false, 0, 1, true) < 0) {
+            virReportSystemError(errno, _("Unable to lock '%s'"), lockpath);
+            goto error;
+        }
+    }
+
     if ((fd = socket(PF_UNIX, SOCK_STREAM, 0)) < 0) {
         virReportSystemError(errno, "%s", _("Failed to create socket"));
         goto error;
@@ -595,17 +613,8 @@ int virNetSocketNewConnectUNIX(const char *path,
         if (virPidFileConstructPath(false, NULL, binname, &pidpath) < 0)
             goto error;
 
-        pidfd = virPidFileAcquirePath(pidpath, false, getpid());
-        if (pidfd < 0) {
-            /*
-             * This can happen in a very rare case of two clients spawning two
-             * daemons at the same time, and the error in the logs that gets
-             * reset here can be a clue to some future debugging.
-             */
-            virResetLastError();
-            spawnDaemon = false;
-            goto retry;
-        }
+        if ((pidfd = virPidFileAcquirePath(pidpath, false, getpid())) < 0)
+            goto error;
 
         if ((passfd = socket(PF_UNIX, SOCK_STREAM, 0)) < 0) {
             virReportSystemError(errno, "%s", _("Failed to create socket"));
@@ -668,16 +677,17 @@ int virNetSocketNewConnectUNIX(const char *path,
             goto error;
         }
 
-        /*
-         * Do we need to eliminate the super-rare race here any more?  It would
-         * need incorporating the following VIR_FORCE_CLOSE() into a
-         * virCommandHook inside a virNetSocketForkDaemon().
-         */
         VIR_FORCE_CLOSE(pidfd);
         if (virNetSocketForkDaemon(binary, passfd) < 0)
             goto error;
     }
 
+    if (lockfd) {
+        unlink(lockpath);
+        VIR_FORCE_CLOSE(lockfd);
+        VIR_FREE(lockpath);
+    }
+
     localAddr.len = sizeof(localAddr.data);
     if (getsockname(fd, &localAddr.data.sa, &localAddr.len) < 0) {
         virReportSystemError(errno, "%s", _("Unable to get local socket name"));
@@ -694,10 +704,14 @@ int virNetSocketNewConnectUNIX(const char *path,
  error:
     if (pidfd >= 0)
         virPidFileDeletePath(pidpath);
+    if (lockfd)
+        unlink(lockpath);
     VIR_FREE(pidpath);
+    VIR_FREE(lockpath);
     VIR_FORCE_CLOSE(fd);
     VIR_FORCE_CLOSE(passfd);
     VIR_FORCE_CLOSE(pidfd);
+    VIR_FORCE_CLOSE(lockfd);
     if (spawnDaemon)
         unlink(path);
     return -1;
-- 
2.0.5