On Thu, Aug 14, 2014 at 10:26:41AM +0100, Daniel P. Berrange wrote:
On Thu, Aug 14, 2014 at 11:23:27AM +0200, Martin Kletzander wrote:
> On Wed, Aug 13, 2014 at 04:09:09PM +0100, Daniel P. Berrange wrote:
> >On Wed, Jul 23, 2014 at 04:27:12PM +0200, Martin Kletzander wrote:
> >>This eliminates the need for active waiting.
> >>
> >>Resolves:
https://bugzilla.redhat.com/show_bug.cgi?id=927369
> >>
> >>Signed-off-by: Martin Kletzander <mkletzan(a)redhat.com>
> >>---
> >> src/rpc/virnetsocket.c | 102
++++++++++++++++++++++++++++++++++++++++---------
> >> 1 file changed, 83 insertions(+), 19 deletions(-)
> >>
> >>diff --git a/src/rpc/virnetsocket.c b/src/rpc/virnetsocket.c
[...]
> >>@@ -569,28 +574,82 @@ int
virNetSocketNewConnectUNIX(const char *path,
> >> if (remoteAddr.data.un.sun_path[0] == '@')
> >> remoteAddr.data.un.sun_path[0] = '\0';
> >>
> >>- retry:
> >>- if (connect(fd, &remoteAddr.data.sa, remoteAddr.len) < 0) {
> >>- if ((errno == ECONNREFUSED ||
> >>- errno == ENOENT) &&
> >>- spawnDaemon && retries < 20) {
> >>- VIR_DEBUG("Connection refused for %s, trying to spawn
%s",
> >>- path, binary);
> >>- if (retries == 0 &&
> >>- virNetSocketForkDaemon(binary) < 0)
> >>- goto error;
> >>+ if (spawnDaemon) {
> >>+ int err = 0;
> >>+ int rv = -1;
> >>+ int status = 0;
> >>+ pid_t pid = 0;
> >>
> >>- retries++;
> >>- usleep(1000 * 100 * retries);
> >>- goto retry;
> >>+ if ((passfd = socket(PF_UNIX, SOCK_STREAM, 0)) < 0) {
> >>+ virReportSystemError(errno, "%s", _("Failed to
create socket"));
> >>+ goto error;
> >> }
> >>
> >>- virReportSystemError(errno,
> >>- _("Failed to connect socket to
'%s'"),
> >>+ if (pipe2(errfd, O_CLOEXEC) < 0) {
> >>+ virReportSystemError(errno, "%s",
> >>+ _("Cannot create pipe for
child"));
> >>+ goto error;
> >>+ }
> >>+
> >>+ /*
> >>+ * We have to fork() here, because umask() is set
> >>+ * per-process, chmod() is racy and fchmod() has undefined
> >>+ * behaviour on sockets according to POSIX, so it doesn't
> >>+ * work outside Linux.
> >>+ */
> >>+
> >>+ if ((pid = virFork()) < 0)
> >>+ goto error;
> >>+
> >>+ if (pid == 0) {
> >>+ VIR_FORCE_CLOSE(errfd[0]);
> >>+
> >>+ umask(0077);
> >>+ rv = bind(passfd, &remoteAddr.data.sa, remoteAddr.len);
> >>+ if (rv < 0) {
> >>+ ignore_value(safewrite(errfd[1], &errno, sizeof(int)));
> >>+ }
> >>+ VIR_FORCE_CLOSE(errfd[1]);
> >>+ _exit(rv < 0 ? EXIT_FAILURE : EXIT_SUCCESS);
> >>+ }
> >>+
> >>+ VIR_FORCE_CLOSE(errfd[1]);
> >>+ rv = virProcessWait(pid, &status, false);
> >>+ ignore_value(saferead(errfd[0], &err, sizeof(int)));
> >>+ VIR_FORCE_CLOSE(errfd[0]);
> >>+
> >>+ if (rv < 0 || status != EXIT_SUCCESS) {
> >>+ if (err) {
> >>+ virReportSystemError(err,
> >>+ _("Failed to bind socket to %s
"
> >>+ "in child process"),
> >>+ path);
> >>+ } else {
> >>+ virReportError(VIR_ERR_INTERNAL_ERROR,
> >>+ _("Failed to bind socket to %s "
> >>+ "in child process"),
> >>+ path);
> >>+ }
> >>+ goto error;
> >>+ }
> >>+
> >>+ if (listen(passfd, 0) < 0) {
> >>+ virReportSystemError(errno, "%s",
> >>+ _("Failed to listen on socket
that's about "
> >>+ "to be passed to the
daemon"));
> >>+ goto error;
> >>+ }
> >
> >Perhaps I'm miss-reading the code, but isn't this block gonig to
> >result in failure if libvirtd is already running (& listening on
> >the socket we try to bind to) and we requested auto-spawn ?
> >
> >>+ }
> >>+
> >>+ if (connect(fd, &remoteAddr.data.sa, remoteAddr.len) < 0) {
> >>+ virReportSystemError(errno, _("Failed to connect socket to
'%s'"),
> >> path);
> >> goto error;
> >> }
> >
> >Also I think there's still a race here because the already running
> >libvirtd daemon could be unfortunately shutting down right at the
> >same time we try to connect. So I think we still need to have a
> >re-try loop around the connect attempt to address that race. Obviously
> >it should be pretty damn rare now so won't have the problems that the
> >current loop has.
> >
>
> Wouldn't connect(), bind(), connect() be enough (for both issues)? Or
> do we need to try the dance few times again?
We need to retry the whole block including daemon spawn to be able to
handle it if the existing daemon is in the process of shutting down
I believe.
I probably expressed myself badly with that connect, bind, connect.
This was the difference I was talking about:
diff --git i/src/rpc/virnetsocket.c w/src/rpc/virnetsocket.c
index 46be541..a4d5dd5 100644
--- i/src/rpc/virnetsocket.c
+++ w/src/rpc/virnetsocket.c
@@ -574,7 +574,12 @@ int virNetSocketNewConnectUNIX(const char *path,
if (remoteAddr.data.un.sun_path[0] == '@')
remoteAddr.data.un.sun_path[0] = '\0';
- if (spawnDaemon) {
+ if (connect(fd, &remoteAddr.data.sa, remoteAddr.len) < 0 &&
+ !spawnDaemon) {
+ virReportSystemError(errno, _("Failed to connect socket to
'%s'"),
+ path);
+ goto error;
+ } else if (spawnDaemon) {
int err = 0;
int rv = -1;
int status = 0;
@@ -639,16 +644,16 @@ int virNetSocketNewConnectUNIX(const char *path,
"to be passed to the daemon"));
goto error;
}
- }
- if (connect(fd, &remoteAddr.data.sa, remoteAddr.len) < 0) {
- virReportSystemError(errno, _("Failed to connect socket to
'%s'"),
- path);
- goto error;
- }
+ if (connect(fd, &remoteAddr.data.sa, remoteAddr.len) < 0) {
+ virReportSystemError(errno, _("Failed to connect socket to
'%s'"),
+ path);
+ goto error;
+ }
- if (spawnDaemon && virNetSocketForkDaemon(binary, passfd) < 0)
- goto error;
+ if (virNetSocketForkDaemon(binary, passfd) < 0)
+ goto error;
+ }
localAddr.len = sizeof(localAddr.data);
if (getsockname(fd, &localAddr.data.sa, &localAddr.len) < 0) {
--
Martin