[libvirt] [PATCH] command: Discard FD_SETSIZE limit for opened files

Currently, virCommand implementation uses FD_ macros from sys/select.h. However, those cannot handle more opened files than FD_SETSIZE. Therefore switch to generalized implementation based on array of integers. --- src/util/command.c | 108 ++++++++++++++++++++++++++++++++++++++++------------ 1 files changed, 83 insertions(+), 25 deletions(-) diff --git a/src/util/command.c b/src/util/command.c index f5effdf..6a2a0eb 100644 --- a/src/util/command.c +++ b/src/util/command.c @@ -75,9 +75,10 @@ struct _virCommand { char *pwd; - /* XXX Use int[] if we ever need to support more than FD_SETSIZE fd's. */ - fd_set preserve; /* FDs to pass to child. */ - fd_set transfer; /* FDs to close in parent. */ + int *preserve; /* FDs to pass to child. */ + int preserve_size; + int *transfer; /* FDs to close in parent. */ + int transfer_size; unsigned int flags; @@ -130,6 +131,64 @@ static int virClearCapabilities(void) } # endif +/* + * virCommandFDIsSet: + * @fd: FD to test + * @set: the set + * @set_size: actual size of @set + * + * Check if FD is already in @set or not. + * + * Returns true if @set contains @fd, + * false otherwise. + */ +static bool +virCommandFDIsSet(int fd, + const int *set, + int set_size) +{ + int i = 0; + + while (i < set_size) + if (set[i++] == fd) + return true; + + return false; +} + +/* + * virCommandFDSet: + * @fd: FD to be put into @set + * @set: the set + * @set_size: actual size of @set + * + * This is practically generalized implementation + * of FD_SET() as we do not want to be limited + * by FD_SETSIZE. + * + * Returns true on success, false otherwise. + */ +static bool +virCommandFDSet(int fd, + int **set, + int *set_size) +{ + if (fd < 0 || !set || !set_size) + return false; + + if (virCommandFDIsSet(fd, *set, *set_size)) + return true; + + if (VIR_REALLOC_N(*set, *set_size + 1) < 0) { + virReportOOMError(); + return false; + } + + (*set)[*set_size] = fd; + (*set_size)++; + + return true; +} /** * virFork: @@ -303,7 +362,8 @@ prepareStdFd(int fd, int std) static int virExecWithHook(const char *const*argv, const char *const*envp, - const fd_set *keepfd, + const int *keepfd, + int keepfd_size, pid_t *retpid, int infd, int *outfd, int *errfd, unsigned int flags, @@ -430,7 +490,7 @@ virExecWithHook(const char *const*argv, for (i = 3; i < openmax; i++) { if (i == infd || i == childout || i == childerr) continue; - if (!keepfd || i >= FD_SETSIZE || !FD_ISSET(i, keepfd)) { + if (!keepfd || !virCommandFDIsSet(i, keepfd, keepfd_size)) { tmpfd = i; VIR_FORCE_CLOSE(tmpfd); } else if (virSetInherit(i, true) < 0) { @@ -619,7 +679,8 @@ virRun(const char *const *argv ATTRIBUTE_UNUSED, static int virExecWithHook(const char *const*argv ATTRIBUTE_UNUSED, const char *const*envp ATTRIBUTE_UNUSED, - const fd_set *keepfd ATTRIBUTE_UNUSED, + const int *keepfd ATTRIBUTE_UNUSED, + int keepfd_size ATTRIBUTE_UNUSED, pid_t *retpid ATTRIBUTE_UNUSED, int infd ATTRIBUTE_UNUSED, int *outfd ATTRIBUTE_UNUSED, @@ -687,8 +748,6 @@ virCommandNewArgs(const char *const*args) cmd->handshakeNotify[0] = -1; cmd->handshakeNotify[1] = -1; - FD_ZERO(&cmd->preserve); - FD_ZERO(&cmd->transfer); cmd->infd = cmd->outfd = cmd->errfd = -1; cmd->inpipe = -1; cmd->pid = -1; @@ -739,16 +798,16 @@ virCommandKeepFD(virCommandPtr cmd, int fd, bool transfer) if (!cmd) return fd > STDERR_FILENO; - if (fd <= STDERR_FILENO || FD_SETSIZE <= fd) { + if (fd <= STDERR_FILENO || + !virCommandFDSet(fd, &cmd->preserve, &cmd->preserve_size) || + (transfer && !virCommandFDSet(fd, &cmd->transfer, + &cmd->transfer_size))) { if (!cmd->has_error) cmd->has_error = -1; VIR_DEBUG("cannot preserve %d", fd); - return fd > STDERR_FILENO; + return true; } - FD_SET(fd, &cmd->preserve); - if (transfer) - FD_SET(fd, &cmd->transfer); return false; } @@ -2064,7 +2123,8 @@ virCommandRunAsync(virCommandPtr cmd, pid_t *pid) ret = virExecWithHook((const char *const *)cmd->args, (const char *const *)cmd->env, - &cmd->preserve, + cmd->preserve, + cmd->preserve_size, &cmd->pid, cmd->infd, cmd->outfdptr, @@ -2077,13 +2137,11 @@ virCommandRunAsync(virCommandPtr cmd, pid_t *pid) VIR_DEBUG("Command result %d, with PID %d", ret, (int)cmd->pid); - for (i = STDERR_FILENO + 1; i < FD_SETSIZE; i++) { - if (FD_ISSET(i, &cmd->transfer)) { - int tmpfd = i; - VIR_FORCE_CLOSE(tmpfd); - FD_CLR(i, &cmd->transfer); - } + for (i = 0; i < cmd->transfer_size; i++) { + VIR_FORCE_CLOSE(cmd->transfer[i]); } + cmd->transfer_size = 0; + VIR_FREE(cmd->transfer); if (ret == 0 && pid) *pid = cmd->pid; @@ -2448,11 +2506,8 @@ virCommandFree(virCommandPtr cmd) if (!cmd) return; - for (i = STDERR_FILENO + 1; i < FD_SETSIZE; i++) { - if (FD_ISSET(i, &cmd->transfer)) { - int tmpfd = i; - VIR_FORCE_CLOSE(tmpfd); - } + for (i = 0; i < cmd->transfer_size; i++) { + VIR_FORCE_CLOSE(cmd->transfer[i]); } VIR_FREE(cmd->inbuf); @@ -2482,5 +2537,8 @@ virCommandFree(virCommandPtr cmd) if (cmd->reap) virCommandAbort(cmd); + VIR_FREE(cmd->transfer); + VIR_FREE(cmd->preserve); + VIR_FREE(cmd); } -- 1.7.3.4

On 01/03/2012 04:14 AM, Michal Privoznik wrote:
Currently, virCommand implementation uses FD_ macros from sys/select.h. However, those cannot handle more opened files than FD_SETSIZE. Therefore switch to generalized implementation based on array of integers. --- src/util/command.c | 108 ++++++++++++++++++++++++++++++++++++++++------------ 1 files changed, 83 insertions(+), 25 deletions(-)
NACK. While I agree with the idea, I'd rather use virBitmap rather than open-coding an int array ourselves. -- Eric Blake eblake@redhat.com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

On 03.01.2012 17:33, Eric Blake wrote:
On 01/03/2012 04:14 AM, Michal Privoznik wrote:
Currently, virCommand implementation uses FD_ macros from sys/select.h. However, those cannot handle more opened files than FD_SETSIZE. Therefore switch to generalized implementation based on array of integers. --- src/util/command.c | 108 ++++++++++++++++++++++++++++++++++++++++------------ 1 files changed, 83 insertions(+), 25 deletions(-)
NACK. While I agree with the idea, I'd rather use virBitmap rather than open-coding an int array ourselves.
Okay, but that would require of extending virBitmap as it is not capable of growing dynamically. Anyway, that's not a big deal. Will post v2. Michal

On Tue, Jan 03, 2012 at 09:33:03AM -0700, Eric Blake wrote:
On 01/03/2012 04:14 AM, Michal Privoznik wrote:
Currently, virCommand implementation uses FD_ macros from sys/select.h. However, those cannot handle more opened files than FD_SETSIZE. Therefore switch to generalized implementation based on array of integers. --- src/util/command.c | 108 ++++++++++++++++++++++++++++++++++++++++------------ 1 files changed, 83 insertions(+), 25 deletions(-)
NACK. While I agree with the idea, I'd rather use virBitmap rather than open-coding an int array ourselves.
Is it worth considering the efficiency implications of virBitmap in this use case ? Michal's initial patch is pretty time & space efficient, since we only need to store & iterate over an array that is equal in size to the number of preserved FDs. If we use virBitmap, and have libvirtd configured to have a very large max_files ulimit, then the virBitmap mask will need to be pretty large and very sparse. eg, if we have ulimit=65536 and want to pass through of FD=60000 to a guest, we'll end up with a virBitmap 8192 bytes in size, and have to test 5999 empty bits before we find the FD we're passing through. This seems pretty wasteful to me. virBitmap is good where we need to store a large number of bits which are mostly contiguous. In virCommand, any single virCommandPtr instance though has very few bits, which are quite sparse. So while code reuse is good, IMHO, virBitmap is not really the best choice here. Now if we want to create a new 'virSet' class for efficiently dealing with sparse sets.....otherwise I'm inclined to say this patch is fine as it is. Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On 01/03/2012 11:06 AM, Daniel P. Berrange wrote:
On Tue, Jan 03, 2012 at 09:33:03AM -0700, Eric Blake wrote:
On 01/03/2012 04:14 AM, Michal Privoznik wrote:
Currently, virCommand implementation uses FD_ macros from sys/select.h. However, those cannot handle more opened files than FD_SETSIZE. Therefore switch to generalized implementation based on array of integers. --- src/util/command.c | 108 ++++++++++++++++++++++++++++++++++++++++------------ 1 files changed, 83 insertions(+), 25 deletions(-)
NACK. While I agree with the idea, I'd rather use virBitmap rather than open-coding an int array ourselves.
Is it worth considering the efficiency implications of virBitmap in this use case ?
Michal's initial patch is pretty time & space efficient, since we only need to store & iterate over an array that is equal in size to the number of preserved FDs.
Now if we want to create a new 'virSet' class for efficiently dealing with sparse sets.....otherwise I'm inclined to say this patch is fine as it is.
Fair points. I'd call such a new class virBitset, rather than virSet, but agree that the number of preserved FDs is small enough that the efficiency hit of iterating through the set is not that bad, so the patch as-is shouldn't be tied up waiting for a new efficient sparse bit-set data structure. I retract my NACK. -- Eric Blake eblake@redhat.com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

On 01/03/2012 04:14 AM, Michal Privoznik wrote:
Currently, virCommand implementation uses FD_ macros from sys/select.h. However, those cannot handle more opened files than FD_SETSIZE. Therefore switch to generalized implementation based on array of integers. --- src/util/command.c | 108 ++++++++++++++++++++++++++++++++++++++++------------ 1 files changed, 83 insertions(+), 25 deletions(-) + * + * Returns true on success, false otherwise. + */ +static bool +virCommandFDSet(int fd, + int **set, + int *set_size) +{ + if (fd < 0 || !set || !set_size) + return false;
I'd rather return -1 here on usage error,
+ + if (virCommandFDIsSet(fd, *set, *set_size)) + return true; + + if (VIR_REALLOC_N(*set, *set_size + 1) < 0) { + virReportOOMError(); + return false;
and ENOMEM on memory allocation failure, with no virReportOOMError() at this phase of the game,
+ } + + (*set)[*set_size] = fd; + (*set_size)++; + + return true;
and 0 on success...
@@ -739,16 +798,16 @@ virCommandKeepFD(virCommandPtr cmd, int fd, bool transfer) if (!cmd) return fd > STDERR_FILENO;
- if (fd <= STDERR_FILENO || FD_SETSIZE <= fd) { + if (fd <= STDERR_FILENO || + !virCommandFDSet(fd, &cmd->preserve, &cmd->preserve_size) || + (transfer && !virCommandFDSet(fd, &cmd->transfer, + &cmd->transfer_size))) { if (!cmd->has_error) cmd->has_error = -1;
so that down here, we capture the output of virCommandFDSet and assign it to cmd->has_error if it was non-zero. That way, an allocation error will propagate all the way back to the user, by giving them the virReportOOMError() at the point in time where they call virCommandRun(). That is, if the user is constructing a virCommandPtr by pieces, and encounters some _other_ error along the way, we aren't overwriting their other error with virReportOOMError(). The virCommand code should _only_ emit errors on the code paths where the user has completed all their other processing, and requires the virCommandPtr to be successfully built by reaching the virCommandRun() code. -- Eric Blake eblake@redhat.com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

Currently, virCommand implementation uses FD_ macros from sys/select.h. However, those cannot handle more opened files than FD_SETSIZE. Therefore switch to generalized implementation based on array of integers. --- diff to v1: - Eric's review included src/util/command.c | 113 ++++++++++++++++++++++++++++++++++++++++------------ 1 files changed, 87 insertions(+), 26 deletions(-) diff --git a/src/util/command.c b/src/util/command.c index bdaa88b..41fb020 100644 --- a/src/util/command.c +++ b/src/util/command.c @@ -75,9 +75,10 @@ struct _virCommand { char *pwd; - /* XXX Use int[] if we ever need to support more than FD_SETSIZE fd's. */ - fd_set preserve; /* FDs to pass to child. */ - fd_set transfer; /* FDs to close in parent. */ + int *preserve; /* FDs to pass to child. */ + int preserve_size; + int *transfer; /* FDs to close in parent. */ + int transfer_size; unsigned int flags; @@ -130,6 +131,65 @@ static int virClearCapabilities(void) } # endif +/* + * virCommandFDIsSet: + * @fd: FD to test + * @set: the set + * @set_size: actual size of @set + * + * Check if FD is already in @set or not. + * + * Returns true if @set contains @fd, + * false otherwise. + */ +static bool +virCommandFDIsSet(int fd, + const int *set, + int set_size) +{ + int i = 0; + + while (i < set_size) + if (set[i++] == fd) + return true; + + return false; +} + +/* + * virCommandFDSet: + * @fd: FD to be put into @set + * @set: the set + * @set_size: actual size of @set + * + * This is practically generalized implementation + * of FD_SET() as we do not want to be limited + * by FD_SETSIZE. + * + * Returns: 0 on success, + * -1 on usage error, + * ENOMEM on OOM + */ +static int +virCommandFDSet(int fd, + int **set, + int *set_size) +{ + if (fd < 0 || !set || !set_size) + return -1; + + if (virCommandFDIsSet(fd, *set, *set_size)) + return 0; + + if (VIR_REALLOC_N(*set, *set_size + 1) < 0) { + return ENOMEM; + } + + (*set)[*set_size] = fd; + (*set_size)++; + + return 0; +} /** * virFork: @@ -303,7 +363,8 @@ prepareStdFd(int fd, int std) static int virExecWithHook(const char *const*argv, const char *const*envp, - const fd_set *keepfd, + const int *keepfd, + int keepfd_size, pid_t *retpid, int infd, int *outfd, int *errfd, unsigned int flags, @@ -430,7 +491,7 @@ virExecWithHook(const char *const*argv, for (i = 3; i < openmax; i++) { if (i == infd || i == childout || i == childerr) continue; - if (!keepfd || i >= FD_SETSIZE || !FD_ISSET(i, keepfd)) { + if (!keepfd || !virCommandFDIsSet(i, keepfd, keepfd_size)) { tmpfd = i; VIR_FORCE_CLOSE(tmpfd); } else if (virSetInherit(i, true) < 0) { @@ -619,7 +680,8 @@ virRun(const char *const *argv ATTRIBUTE_UNUSED, static int virExecWithHook(const char *const*argv ATTRIBUTE_UNUSED, const char *const*envp ATTRIBUTE_UNUSED, - const fd_set *keepfd ATTRIBUTE_UNUSED, + const int *keepfd ATTRIBUTE_UNUSED, + int keepfd_size ATTRIBUTE_UNUSED, pid_t *retpid ATTRIBUTE_UNUSED, int infd ATTRIBUTE_UNUSED, int *outfd ATTRIBUTE_UNUSED, @@ -687,8 +749,6 @@ virCommandNewArgs(const char *const*args) cmd->handshakeNotify[0] = -1; cmd->handshakeNotify[1] = -1; - FD_ZERO(&cmd->preserve); - FD_ZERO(&cmd->transfer); cmd->infd = cmd->outfd = cmd->errfd = -1; cmd->inpipe = -1; cmd->pid = -1; @@ -736,19 +796,21 @@ virCommandNewArgList(const char *binary, ...) static bool virCommandKeepFD(virCommandPtr cmd, int fd, bool transfer) { + int ret = 0; + if (!cmd) return fd > STDERR_FILENO; - if (fd <= STDERR_FILENO || FD_SETSIZE <= fd) { + if (fd <= STDERR_FILENO || + (ret = virCommandFDSet(fd, &cmd->preserve, &cmd->preserve_size)) || + (transfer && (ret = virCommandFDSet(fd, &cmd->transfer, + &cmd->transfer_size)))) { if (!cmd->has_error) - cmd->has_error = -1; + cmd->has_error = ret ? : -1 ; VIR_DEBUG("cannot preserve %d", fd); - return fd > STDERR_FILENO; + return true; } - FD_SET(fd, &cmd->preserve); - if (transfer) - FD_SET(fd, &cmd->transfer); return false; } @@ -2082,7 +2144,8 @@ virCommandRunAsync(virCommandPtr cmd, pid_t *pid) ret = virExecWithHook((const char *const *)cmd->args, (const char *const *)cmd->env, - &cmd->preserve, + cmd->preserve, + cmd->preserve_size, &cmd->pid, cmd->infd, cmd->outfdptr, @@ -2095,13 +2158,11 @@ virCommandRunAsync(virCommandPtr cmd, pid_t *pid) VIR_DEBUG("Command result %d, with PID %d", ret, (int)cmd->pid); - for (i = STDERR_FILENO + 1; i < FD_SETSIZE; i++) { - if (FD_ISSET(i, &cmd->transfer)) { - int tmpfd = i; - VIR_FORCE_CLOSE(tmpfd); - FD_CLR(i, &cmd->transfer); - } + for (i = 0; i < cmd->transfer_size; i++) { + VIR_FORCE_CLOSE(cmd->transfer[i]); } + cmd->transfer_size = 0; + VIR_FREE(cmd->transfer); if (ret == 0 && pid) *pid = cmd->pid; @@ -2466,11 +2527,8 @@ virCommandFree(virCommandPtr cmd) if (!cmd) return; - for (i = STDERR_FILENO + 1; i < FD_SETSIZE; i++) { - if (FD_ISSET(i, &cmd->transfer)) { - int tmpfd = i; - VIR_FORCE_CLOSE(tmpfd); - } + for (i = 0; i < cmd->transfer_size; i++) { + VIR_FORCE_CLOSE(cmd->transfer[i]); } VIR_FREE(cmd->inbuf); @@ -2500,5 +2558,8 @@ virCommandFree(virCommandPtr cmd) if (cmd->reap) virCommandAbort(cmd); + VIR_FREE(cmd->transfer); + VIR_FREE(cmd->preserve); + VIR_FREE(cmd); } -- 1.7.3.4

On 01/04/2012 09:58 AM, Michal Privoznik wrote:
Currently, virCommand implementation uses FD_ macros from sys/select.h. However, those cannot handle more opened files than FD_SETSIZE. Therefore switch to generalized implementation based on array of integers. --- diff to v1: - Eric's review included
src/util/command.c | 113 ++++++++++++++++++++++++++++++++++++++++------------ 1 files changed, 87 insertions(+), 26 deletions(-)
diff --git a/src/util/command.c b/src/util/command.c index bdaa88b..41fb020 100644 --- a/src/util/command.c +++ b/src/util/command.c @@ -75,9 +75,10 @@ struct _virCommand {
char *pwd;
- /* XXX Use int[] if we ever need to support more than FD_SETSIZE fd's. */ - fd_set preserve; /* FDs to pass to child. */ - fd_set transfer; /* FDs to close in parent. */ + int *preserve; /* FDs to pass to child. */ + int preserve_size; + int *transfer; /* FDs to close in parent. */ + int transfer_size;
Normally, I'd say "use size_t" for array sizes. But, as these are arrays of fds (aka non-negative ints), and there are no duplicates in the array (other than -1 once you have closed an fd), an array sized by int is sufficient. Meanwhile, if we were to worry about transferring a LARGE number of fds, then I'd suggest using VIR_EXPAND_N, which requires tracking both allocation and usage sizes as size_t, but I don't think we are hitting that much overhead in any of our virCommand usage. So no need to change this.
@@ -736,19 +796,21 @@ virCommandNewArgList(const char *binary, ...) static bool virCommandKeepFD(virCommandPtr cmd, int fd, bool transfer) { + int ret = 0; + if (!cmd) return fd > STDERR_FILENO;
- if (fd <= STDERR_FILENO || FD_SETSIZE <= fd) { + if (fd <= STDERR_FILENO || + (ret = virCommandFDSet(fd, &cmd->preserve, &cmd->preserve_size)) || + (transfer && (ret = virCommandFDSet(fd, &cmd->transfer, + &cmd->transfer_size)))) { if (!cmd->has_error) - cmd->has_error = -1; + cmd->has_error = ret ? : -1 ;
That's a gcc extension. Spell it out explicitly: cmd->has_error = ret ? ret : -1;
VIR_DEBUG("cannot preserve %d", fd); - return fd > STDERR_FILENO; + return true;
Don't change this line. Otherwise, calling virCommandTransferFD(cmd, 2) (a coding bug) would close stderr, instead of diagnosing the coding bug. ACK with those two lines fixed. -- Eric Blake eblake@redhat.com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

On 04.01.2012 19:51, Eric Blake wrote:
On 01/04/2012 09:58 AM, Michal Privoznik wrote:
Currently, virCommand implementation uses FD_ macros from sys/select.h. However, those cannot handle more opened files than FD_SETSIZE. Therefore switch to generalized implementation based on array of integers. --- diff to v1: - Eric's review included
src/util/command.c | 113 ++++++++++++++++++++++++++++++++++++++++------------ 1 files changed, 87 insertions(+), 26 deletions(-)
ACK with those two lines fixed.
Thanks, fixed & pushed. Michal
participants (3)
-
Daniel P. Berrange
-
Eric Blake
-
Michal Privoznik