[libvirt] [PATCH v3] qemu: Pass file descriptor when using TPM passthrough

Pass the TPM file descriptor to QEMU via command line. Instead of passing /dev/tpm0 we now pass /dev/fdset/10 and the additional parameters -add-fd set=10,fd=20. This addresses the use case when QEMU is started with non-root privileges and QEMU cannot open /dev/tpm0 for example. One problem is that for the passing of the file descriptor set to work, virCommandReorderFDs must not be called on the virCommand. This is prevented by setting a flag in the virCommandPassFDGetFDIndex that is checked to be clear when virCommandReorderFDs is run. Signed-off-by: Stefan Berger <stefanb@linux.vnet.ibm.com> v2->v3: Fixed some memory leaks --- src/libvirt_private.syms | 1 + src/qemu/qemu_command.c | 136 ++++++++++++++++++++++++++++++++++++++++++++--- src/util/vircommand.c | 33 ++++++++++++ src/util/vircommand.h | 3 ++ 4 files changed, 166 insertions(+), 7 deletions(-) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index aeec440..3194e8b 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1164,6 +1164,7 @@ virCommandNewArgList; virCommandNewArgs; virCommandNonblockingFDs; virCommandPassFD; +virCommandPassFDGetFDIndex; virCommandPassListenFDs; virCommandRawStatus; virCommandRequireHandshake; diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 8ed7934..17debba 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -159,6 +159,58 @@ VIR_ENUM_IMPL(qemuNumaPolicy, VIR_DOMAIN_NUMATUNE_MEM_LAST, "interleave"); /** + * qemuVirCommandGetFDSet: + * @cmd: the command to modify + * @fd: fd to reassign to the child + * + * Get the parameters for the QEMU -add-fd command line option + * for the given file descriptor. The file descriptor must previously + * have been 'transferred' in a virCommandPassFD() call. + * This function for example returns "set=10,fd=20". + */ +static char * +qemuVirCommandGetFDSet(virCommandPtr cmd, int fd) +{ + char *result = NULL; + int idx = virCommandPassFDGetFDIndex(cmd, fd); + + if (idx >= 0) { + ignore_value(virAsprintf(&result, "set=%d,fd=%d", idx, fd) < 0); + } else { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("file descriptor %d has not been transferred"), fd); + } + + return result; +} + +/** + * qemuVirCommandGetDevSet: + * @cmd: the command to modify + * @fd: fd to reassign to the child + * + * Get the parameters for the QEMU path= parameter where a file + * descriptor is accessed via a file descriptor set, for example + * /dev/fdset/10. The file descriptor must previously have been + * 'transferred' in a virCommandPassFD() call. + */ +static char * +qemuVirCommandGetDevSet(virCommandPtr cmd, int fd) +{ + char *result = NULL; + int idx = virCommandPassFDGetFDIndex(cmd, fd); + + if (idx >= 0) { + ignore_value(virAsprintf(&result, "/dev/fdset/%d", idx) < 0); + } else { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("file descriptor %d has not been transferred"), fd); + } + return result; +} + + +/** * qemuPhysIfaceConnect: * @def: the definition of the VM (needed by 802.1Qbh and audit) * @driver: pointer to the driver instance @@ -5926,14 +5978,20 @@ qemuBuildRNGDeviceArgs(virCommandPtr cmd, static char *qemuBuildTPMBackendStr(const virDomainDef *def, + virCommandPtr cmd, virQEMUCapsPtr qemuCaps, - const char *emulator) + const char *emulator, + int *tpmfd, int *cancelfd) { const virDomainTPMDef *tpm = def->tpm; virBuffer buf = VIR_BUFFER_INITIALIZER; const char *type = virDomainTPMBackendTypeToString(tpm->type); - char *cancel_path; + char *cancel_path = NULL; const char *tpmdev; + char *devset = NULL, *cancel_devset = NULL; + + *tpmfd = -1; + *cancelfd = -1; virBufferAsprintf(&buf, "%s,id=tpm-%s", type, tpm->info.alias); @@ -5946,11 +6004,49 @@ static char *qemuBuildTPMBackendStr(const virDomainDef *def, if (!(cancel_path = virTPMCreateCancelPath(tpmdev))) goto error; - virBufferAddLit(&buf, ",path="); - virBufferEscape(&buf, ',', ",", "%s", tpmdev); + if (virQEMUCapsGet(qemuCaps, QEMU_CAPS_ADD_FD)) { + *tpmfd = open(tpmdev, O_RDWR); + if (*tpmfd < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Could not open TPM device %s"), tpmdev); + goto error; + } + + virCommandPassFD(cmd, *tpmfd, + VIR_COMMAND_PASS_FD_CLOSE_PARENT); + devset = qemuVirCommandGetDevSet(cmd, *tpmfd); + if (devset == NULL) + goto error; + + *cancelfd = open(cancel_path, O_WRONLY); + if (*cancelfd < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Could not open TPM device's cancel path " + "%s"), cancel_path); + goto error; + } + + virCommandPassFD(cmd, *cancelfd, + VIR_COMMAND_PASS_FD_CLOSE_PARENT); + cancel_devset = qemuVirCommandGetDevSet(cmd, *cancelfd); + if (cancel_devset == NULL) + goto error; + + virBufferAddLit(&buf, ",path="); + virBufferEscape(&buf, ',', ",", "%s", devset); + VIR_FREE(devset); - virBufferAddLit(&buf, ",cancel-path="); - virBufferEscape(&buf, ',', ",", "%s", cancel_path); + virBufferAddLit(&buf, ",cancel-path="); + virBufferEscape(&buf, ',', ",", "%s", cancel_devset); + VIR_FREE(cancel_devset); + } else { + /* all test cases will use this path */ + virBufferAddLit(&buf, ",path="); + virBufferEscape(&buf, ',', ",", "%s", tpmdev); + + virBufferAddLit(&buf, ",cancel-path="); + virBufferEscape(&buf, ',', ",", "%s", cancel_path); + } VIR_FREE(cancel_path); break; @@ -5970,6 +6066,10 @@ static char *qemuBuildTPMBackendStr(const virDomainDef *def, emulator, type); error: + VIR_FREE(devset); + VIR_FREE(cancel_devset); + VIR_FREE(cancel_path); + virBufferFreeAndReset(&buf); return NULL; } @@ -9223,13 +9323,35 @@ qemuBuildCommandLine(virConnectPtr conn, if (def->tpm) { char *optstr; + int tpmfd = -1; + int cancelfd = -1; + char *fdset; - if (!(optstr = qemuBuildTPMBackendStr(def, qemuCaps, emulator))) + if (!(optstr = qemuBuildTPMBackendStr(def, cmd, qemuCaps, emulator, + &tpmfd, &cancelfd))) goto error; virCommandAddArgList(cmd, "-tpmdev", optstr, NULL); VIR_FREE(optstr); + if (tpmfd >= 0) { + fdset = qemuVirCommandGetFDSet(cmd, tpmfd); + if (!fdset) + goto error; + + virCommandAddArgList(cmd, "-add-fd", fdset, NULL); + VIR_FREE(fdset); + } + + if (cancelfd >= 0) { + fdset = qemuVirCommandGetFDSet(cmd, cancelfd); + if (!fdset) + goto error; + + virCommandAddArgList(cmd, "-add-fd", fdset, NULL); + VIR_FREE(fdset); + } + if (!(optstr = qemuBuildTPMDevStr(def, qemuCaps, emulator))) goto error; diff --git a/src/util/vircommand.c b/src/util/vircommand.c index 6527d85..2616446 100644 --- a/src/util/vircommand.c +++ b/src/util/vircommand.c @@ -67,6 +67,7 @@ enum { VIR_EXEC_RUN_SYNC = (1 << 3), VIR_EXEC_ASYNC_IO = (1 << 4), VIR_EXEC_LISTEN_FDS = (1 << 5), + VIR_EXEC_FIXED_FDS = (1 << 6), }; typedef struct _virCommandFD virCommandFD; @@ -214,6 +215,12 @@ virCommandReorderFDs(virCommandPtr cmd) if (!cmd || cmd->has_error || !cmd->npassfd) return; + if ((cmd->flags & VIR_EXEC_FIXED_FDS)) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("The fds are fixed and cannot be reordered")); + goto error; + } + for (i = 0; i < cmd->npassfd; i++) maxfd = MAX(cmd->passfd[i].fd, maxfd); @@ -1019,6 +1026,32 @@ virCommandPassListenFDs(virCommandPtr cmd) cmd->flags |= VIR_EXEC_LISTEN_FDS; } +/* + * virCommandPassFDGetFDIndex: + * @cmd: pointer to virCommand + * @fd: FD to get index of + * + * Determine the index of the FD in the transfer set. + * + * Returns index >= 0 if @set contains @fd, + * -1 otherwise. + */ +int +virCommandPassFDGetFDIndex(virCommandPtr cmd, int fd) +{ + size_t i = 0; + + while (i < cmd->npassfd) { + if (cmd->passfd[i].fd == fd) { + cmd->flags |= VIR_EXEC_FIXED_FDS; + return i; + } + i++; + } + + return -1; +} + /** * virCommandSetPidFile: * @cmd: the command to modify diff --git a/src/util/vircommand.h b/src/util/vircommand.h index bf65de4..198da2f 100644 --- a/src/util/vircommand.h +++ b/src/util/vircommand.h @@ -62,6 +62,9 @@ void virCommandPassFD(virCommandPtr cmd, void virCommandPassListenFDs(virCommandPtr cmd); +int virCommandPassFDGetFDIndex(virCommandPtr cmd, + int fd); + void virCommandSetPidFile(virCommandPtr cmd, const char *pidfile) ATTRIBUTE_NONNULL(2); -- 1.9.3

On 11/20/2014 08:08 AM, Stefan Berger wrote: Wow, I've been horribly slow at reviewing this. Do feel free to ping on list if no one seems to notice a patch, to widen the chances of anyone taking a glance at it.
Pass the TPM file descriptor to QEMU via command line. Instead of passing /dev/tpm0 we now pass /dev/fdset/10 and the additional parameters -add-fd set=10,fd=20.
This addresses the use case when QEMU is started with non-root privileges and QEMU cannot open /dev/tpm0 for example.
One problem is that for the passing of the file descriptor set to work, virCommandReorderFDs must not be called on the virCommand. This is prevented by setting a flag in the virCommandPassFDGetFDIndex that is checked to be clear when virCommandReorderFDs is run.
I'm still trying to figure out how virCommandReorderFDs() got into the picture (I didn't write that section of the code); when I originally worked on virCommand, the only way to pass fds to the child was in direct positions (same fd in child as in parent), not by remapping one fd in the parent to another position in the child, except for the three standard descriptors. It looks like virCommandReorderFDs was added to allow remapping other fds and populates the LISTEN_FDS environment variable with how many fds were thus mapped. So the two approaches don't really mix. Do we ever use virCommandPassListenFDs() on a virCommand that will also do raw fd passthrough? Maybe the real thing to do is to track at virCommand build-up time that only one of the two passthrough methods can be used, and fail loudly if a programmer tries to do both methods at once. Or maybe we should ALWAYS prepare to remap fds in the child, so that the child receives fds in contiguous order with no gaps. It might simplify the code base to always reorder things, and have the mapping done up front. That is, change the virCommandPassFD() function to return an integer of which next consecutive fd the child will see, or -1 on failure. Callers that then need to alter the command line of the child will have to pay attention to the return value (something a bit different than most virCommand build-up, which intentionally defer error checking to the very end), but it might be worth it. At any rate, this patch needs to be split into at least two - first, a patch to just util/vircommand.c and tests/commandtest.c to tweak the command passing functionality AND test that any new semantics work while no old semantics are broken (except maybe that we now explicitly reject a combination that previously was silently accepted but made no sense), and second the patch to qemu to use the enhancements in virCommand for the TPM feature at hand.
Signed-off-by: Stefan Berger <stefanb@linux.vnet.ibm.com>
v2->v3: Fixed some memory leaks --- src/libvirt_private.syms | 1 + src/qemu/qemu_command.c | 136 ++++++++++++++++++++++++++++++++++++++++++++--- src/util/vircommand.c | 33 ++++++++++++ src/util/vircommand.h | 3 ++ 4 files changed, 166 insertions(+), 7 deletions(-)
/** + * qemuVirCommandGetFDSet: + * @cmd: the command to modify + * @fd: fd to reassign to the child + * + * Get the parameters for the QEMU -add-fd command line option + * for the given file descriptor. The file descriptor must previously + * have been 'transferred' in a virCommandPassFD() call.
Would it make any more sense for this function to do the transfer instead of making the caller do it?
+ * This function for example returns "set=10,fd=20". + */ +static char * +qemuVirCommandGetFDSet(virCommandPtr cmd, int fd) +{ + char *result = NULL; + int idx = virCommandPassFDGetFDIndex(cmd, fd); + + if (idx >= 0) { + ignore_value(virAsprintf(&result, "set=%d,fd=%d", idx, fd) < 0); + } else { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("file descriptor %d has not been transferred"), fd); + } + + return result; +} + +/** + * qemuVirCommandGetDevSet: + * @cmd: the command to modify + * @fd: fd to reassign to the child + * + * Get the parameters for the QEMU path= parameter where a file + * descriptor is accessed via a file descriptor set, for example + * /dev/fdset/10. The file descriptor must previously have been + * 'transferred' in a virCommandPassFD() call.
Oh, maybe not, since the caller marks the fd for passing once, but then calls multiple helper methods to format multiple parameters to qemu that end up working together to describe the fd used by the child.
+ */ +static char * +qemuVirCommandGetDevSet(virCommandPtr cmd, int fd) +{ + char *result = NULL; + int idx = virCommandPassFDGetFDIndex(cmd, fd); + + if (idx >= 0) { + ignore_value(virAsprintf(&result, "/dev/fdset/%d", idx) < 0); + } else { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("file descriptor %d has not been transferred"), fd); + } + return result; +} + + +/** * qemuPhysIfaceConnect: * @def: the definition of the VM (needed by 802.1Qbh and audit) * @driver: pointer to the driver instance @@ -5926,14 +5978,20 @@ qemuBuildRNGDeviceArgs(virCommandPtr cmd,
static char *qemuBuildTPMBackendStr(const virDomainDef *def, + virCommandPtr cmd, virQEMUCapsPtr qemuCaps, - const char *emulator) + const char *emulator, + int *tpmfd, int *cancelfd) { const virDomainTPMDef *tpm = def->tpm; virBuffer buf = VIR_BUFFER_INITIALIZER; const char *type = virDomainTPMBackendTypeToString(tpm->type); - char *cancel_path; + char *cancel_path = NULL; const char *tpmdev; + char *devset = NULL, *cancel_devset = NULL; + + *tpmfd = -1; + *cancelfd = -1;
virBufferAsprintf(&buf, "%s,id=tpm-%s", type, tpm->info.alias);
@@ -5946,11 +6004,49 @@ static char *qemuBuildTPMBackendStr(const virDomainDef *def, if (!(cancel_path = virTPMCreateCancelPath(tpmdev))) goto error;
- virBufferAddLit(&buf, ",path="); - virBufferEscape(&buf, ',', ",", "%s", tpmdev); + if (virQEMUCapsGet(qemuCaps, QEMU_CAPS_ADD_FD)) { + *tpmfd = open(tpmdev, O_RDWR); + if (*tpmfd < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Could not open TPM device %s"), tpmdev);
Here, we should use virReportSystemError and pass along the errno value from the failed open().
+ goto error; + } + + virCommandPassFD(cmd, *tpmfd, + VIR_COMMAND_PASS_FD_CLOSE_PARENT); + devset = qemuVirCommandGetDevSet(cmd, *tpmfd); + if (devset == NULL) + goto error; + + *cancelfd = open(cancel_path, O_WRONLY); + if (*cancelfd < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Could not open TPM device's cancel path " + "%s"), cancel_path); + goto error; + } + + virCommandPassFD(cmd, *cancelfd, + VIR_COMMAND_PASS_FD_CLOSE_PARENT); + cancel_devset = qemuVirCommandGetDevSet(cmd, *cancelfd); + if (cancel_devset == NULL) + goto error; + + virBufferAddLit(&buf, ",path="); + virBufferEscape(&buf, ',', ",", "%s", devset); + VIR_FREE(devset);
- virBufferAddLit(&buf, ",cancel-path="); - virBufferEscape(&buf, ',', ",", "%s", cancel_path); + virBufferAddLit(&buf, ",cancel-path="); + virBufferEscape(&buf, ',', ",", "%s", cancel_devset); + VIR_FREE(cancel_devset); + } else { + /* all test cases will use this path */ + virBufferAddLit(&buf, ",path="); + virBufferEscape(&buf, ',', ",", "%s", tpmdev); + + virBufferAddLit(&buf, ",cancel-path="); + virBufferEscape(&buf, ',', ",", "%s", cancel_path); + }
The else clause looks redundant with the last few lines of the if clause. Why not just sink it to occur just once after the 'if', as long as the 'if' side does a mere tmpdev=devset?
VIR_FREE(cancel_path);
break; @@ -5970,6 +6066,10 @@ static char *qemuBuildTPMBackendStr(const virDomainDef *def, emulator, type);
error: + VIR_FREE(devset); + VIR_FREE(cancel_devset); + VIR_FREE(cancel_path); + virBufferFreeAndReset(&buf); return NULL; } @@ -9223,13 +9323,35 @@ qemuBuildCommandLine(virConnectPtr conn,
if (def->tpm) { char *optstr;
This function is really huge. Yeah, we've not been good at breaking it into chunks in the past, but I think this addition would be nicer if the code inside 'if (def->tpm)' were extracted into a smaller helper function. But if we do that refactoring, the code motion of the existing pre-patch code should be its own patch.
+ int tpmfd = -1; + int cancelfd = -1; + char *fdset;
- if (!(optstr = qemuBuildTPMBackendStr(def, qemuCaps, emulator))) + if (!(optstr = qemuBuildTPMBackendStr(def, cmd, qemuCaps, emulator, + &tpmfd, &cancelfd))) goto error;
virCommandAddArgList(cmd, "-tpmdev", optstr, NULL); VIR_FREE(optstr);
+ if (tpmfd >= 0) { + fdset = qemuVirCommandGetFDSet(cmd, tpmfd); + if (!fdset) + goto error; + + virCommandAddArgList(cmd, "-add-fd", fdset, NULL); + VIR_FREE(fdset); + } + + if (cancelfd >= 0) { + fdset = qemuVirCommandGetFDSet(cmd, cancelfd); + if (!fdset) + goto error; + + virCommandAddArgList(cmd, "-add-fd", fdset, NULL); + VIR_FREE(fdset); + } + if (!(optstr = qemuBuildTPMDevStr(def, qemuCaps, emulator))) goto error;
diff --git a/src/util/vircommand.c b/src/util/vircommand.c index 6527d85..2616446 100644 --- a/src/util/vircommand.c +++ b/src/util/vircommand.c @@ -67,6 +67,7 @@ enum { VIR_EXEC_RUN_SYNC = (1 << 3), VIR_EXEC_ASYNC_IO = (1 << 4), VIR_EXEC_LISTEN_FDS = (1 << 5), + VIR_EXEC_FIXED_FDS = (1 << 6), };
typedef struct _virCommandFD virCommandFD; @@ -214,6 +215,12 @@ virCommandReorderFDs(virCommandPtr cmd) if (!cmd || cmd->has_error || !cmd->npassfd) return;
+ if ((cmd->flags & VIR_EXEC_FIXED_FDS)) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("The fds are fixed and cannot be reordered")); + goto error; + }
Okay, so you are enforcing that a caller can't pick two conflicting fd passing methods on the same virCommand, but my point remains that the testsuite should prove we can hit this error case, to avoid regressions. Or we should redo virCommand to ALWAYS rearrange fds in the child, and make it possible for a caller passing an fd down to know what fd the child will see it as.
+ for (i = 0; i < cmd->npassfd; i++) maxfd = MAX(cmd->passfd[i].fd, maxfd);
@@ -1019,6 +1026,32 @@ virCommandPassListenFDs(virCommandPtr cmd) cmd->flags |= VIR_EXEC_LISTEN_FDS; }
+/* + * virCommandPassFDGetFDIndex: + * @cmd: pointer to virCommand + * @fd: FD to get index of + * + * Determine the index of the FD in the transfer set. + * + * Returns index >= 0 if @set contains @fd, + * -1 otherwise. + */ +int +virCommandPassFDGetFDIndex(virCommandPtr cmd, int fd) +{ + size_t i = 0; + + while (i < cmd->npassfd) { + if (cmd->passfd[i].fd == fd) { + cmd->flags |= VIR_EXEC_FIXED_FDS; + return i;
So, the mapping you are using is that the first fd passed through the child is eventually associated with /dev/fdset/$i, where $i represents which slot of cmd->passfd the descriptor was found in. I know the reason qemu added /dev/fdset/nnn was to allow the possibility of passing in sets of descriptors (such as a read-only and read-write handle both visiting the same file, so in the same fdset), but simplicity argues that unless we need that complexity, always naming our fdsets by the same fd that the child will first use is easier than trying to track the mapping between a parent fd and which order the fd was registered in. So I'm wondering if we change virCommandPassFD to always remap in the child and return an integer (the next consecutive integer starting at 3), then just use '-add-fd set=3,fd=3 ... /dev/fdset/3' is any simpler than trying to have set number unrelated to the fd number.
+ } + i++; + } + + return -1; +} + /** * virCommandSetPidFile: * @cmd: the command to modify diff --git a/src/util/vircommand.h b/src/util/vircommand.h index bf65de4..198da2f 100644 --- a/src/util/vircommand.h +++ b/src/util/vircommand.h @@ -62,6 +62,9 @@ void virCommandPassFD(virCommandPtr cmd,
void virCommandPassListenFDs(virCommandPtr cmd);
+int virCommandPassFDGetFDIndex(virCommandPtr cmd, + int fd); + void virCommandSetPidFile(virCommandPtr cmd, const char *pidfile) ATTRIBUTE_NONNULL(2);
Overall, I can see what you are hoping to do, and it's my fault for taking this long to get back to you on it. As penance, if you like my idea of changing virCommand to always remap child fds into consecutive orders, should I pitch in and help get that part of the patch in good condition? -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

On 01/30/2015 07:17 PM, Eric Blake wrote:
+ } + i++; + } + + return -1; +} + /** * virCommandSetPidFile: * @cmd: the command to modify diff --git a/src/util/vircommand.h b/src/util/vircommand.h index bf65de4..198da2f 100644 --- a/src/util/vircommand.h +++ b/src/util/vircommand.h @@ -62,6 +62,9 @@ void virCommandPassFD(virCommandPtr cmd,
void virCommandPassListenFDs(virCommandPtr cmd);
+int virCommandPassFDGetFDIndex(virCommandPtr cmd, + int fd); + void virCommandSetPidFile(virCommandPtr cmd, const char *pidfile) ATTRIBUTE_NONNULL(2);
Overall, I can see what you are hoping to do, and it's my fault for taking this long to get back to you on it. As penance, if you like my idea of changing virCommand to always remap child fds into consecutive orders, should I pitch in and help get that part of the patch in good condition?
Sounds good ;-) I'll get to the rest of your comments 'later'. Stefan

On 01/30/2015 07:17 PM, Eric Blake wrote:
On 11/20/2014 08:08 AM, Stefan Berger wrote:
Pass the TPM file descriptor to QEMU via command line. Instead of passing /dev/tpm0 we now pass /dev/fdset/10 and the additional parameters -add-fd set=10,fd=20.
This addresses the use case when QEMU is started with non-root privileges and QEMU cannot open /dev/tpm0 for example.
One problem is that for the passing of the file descriptor set to work, virCommandReorderFDs must not be called on the virCommand. This is prevented by setting a flag in the virCommandPassFDGetFDIndex that is checked to be clear when virCommandReorderFDs is run. I'm still trying to figure out how virCommandReorderFDs() got into the picture (I didn't write that section of the code); when I originally
Well, I also only stumbled upon this function and found it being used in a different area of the code and thought it wouldn't interfere with what we are doing here.
worked on virCommand, the only way to pass fds to the child was in direct positions (same fd in child as in parent), not by remapping one fd in the parent to another position in the child, except for the three standard descriptors. It looks like virCommandReorderFDs was added to allow remapping other fds and populates the LISTEN_FDS environment variable with how many fds were thus mapped. So the two approaches don't really mix. Do we ever use virCommandPassListenFDs() on a virCommand that will also do raw fd passthrough? Maybe the real thing
Did not see that. When I checked I found it in a different part of the code. Basicaly virCommandPassListenFDs causes the reordering to happen. This seems to only be run by src/rpc/virnetsocket::virNetSocketForkDaemon().
to do is to track at virCommand build-up time that only one of the two passthrough methods can be used, and fail loudly if a programmer tries to do both methods at once.
So far I think we would not run into this problem. Unfortunately the TPM related code, that would need the order to be stable, is likely not used frequently and the possibility could exist that some things may break without noticing.
Or maybe we should ALWAYS prepare to remap fds in the child, so that the child receives fds in contiguous order with no gaps. It might simplify the code base to always reorder things, and have the mapping done up front. That is, change the virCommandPassFD() function to return an integer of which next consecutive fd the child will see, or -1 on failure. Callers that then need to alter the command line of the child will have to pay attention to the return value (something a bit different than most virCommand build-up, which intentionally defer error checking to the very end), but it might be worth it.
Not sure I follow. But see my question further below.
At any rate, this patch needs to be split into at least two - first, a patch to just util/vircommand.c and tests/commandtest.c to tweak the command passing functionality AND test that any new semantics work while no old semantics are broken (except maybe that we now explicitly reject a combination that previously was silently accepted but made no sense),
previously we didn't pass file descriptors /dev/fdset to QEMU, so the problem presumably didn't exist.
and second the patch to qemu to use the enhancements in virCommand for the TPM feature at hand.
Yes, will do. Though let me know how you intend to change things -- took you by your offer :-)
Signed-off-by: Stefan Berger <stefanb@linux.vnet.ibm.com>
v2->v3: Fixed some memory leaks --- src/libvirt_private.syms | 1 + src/qemu/qemu_command.c | 136 ++++++++++++++++++++++++++++++++++++++++++++--- src/util/vircommand.c | 33 ++++++++++++ src/util/vircommand.h | 3 ++ 4 files changed, 166 insertions(+), 7 deletions(-)
/** + * qemuVirCommandGetFDSet: + * @cmd: the command to modify + * @fd: fd to reassign to the child + * + * Get the parameters for the QEMU -add-fd command line option + * for the given file descriptor. The file descriptor must previously + * have been 'transferred' in a virCommandPassFD() call. Would it make any more sense for this function to do the transfer instead of making the caller do it?
Maybe both should exist? The only reason that this could be unpleasant is if you wanted to do this in very different places in the code, the one place does the transfer, the other builds up some command line, like we do here. I guess the code could be adapted. Though there should always be a way to find the FD index.
+ * This function for example returns "set=10,fd=20". + */ +static char * +qemuVirCommandGetFDSet(virCommandPtr cmd, int fd) +{ + char *result = NULL; + int idx = virCommandPassFDGetFDIndex(cmd, fd); + + if (idx >= 0) { + ignore_value(virAsprintf(&result, "set=%d,fd=%d", idx, fd) < 0); + } else { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("file descriptor %d has not been transferred"), fd); + } + + return result; +} + +/** + * qemuVirCommandGetDevSet: + * @cmd: the command to modify + * @fd: fd to reassign to the child + * + * Get the parameters for the QEMU path= parameter where a file + * descriptor is accessed via a file descriptor set, for example + * /dev/fdset/10. The file descriptor must previously have been + * 'transferred' in a virCommandPassFD() call. Oh, maybe not, since the caller marks the fd for passing once, but then calls multiple helper methods to format multiple parameters to qemu that end up working together to describe the fd used by the child.
+ */ +static char * +qemuVirCommandGetDevSet(virCommandPtr cmd, int fd) +{ + char *result = NULL; + int idx = virCommandPassFDGetFDIndex(cmd, fd); + + if (idx >= 0) { + ignore_value(virAsprintf(&result, "/dev/fdset/%d", idx) < 0); + } else { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("file descriptor %d has not been transferred"), fd); + } + return result; +} + + +/** * qemuPhysIfaceConnect: * @def: the definition of the VM (needed by 802.1Qbh and audit) * @driver: pointer to the driver instance @@ -5926,14 +5978,20 @@ qemuBuildRNGDeviceArgs(virCommandPtr cmd,
static char *qemuBuildTPMBackendStr(const virDomainDef *def, + virCommandPtr cmd, virQEMUCapsPtr qemuCaps, - const char *emulator) + const char *emulator, + int *tpmfd, int *cancelfd) { const virDomainTPMDef *tpm = def->tpm; virBuffer buf = VIR_BUFFER_INITIALIZER; const char *type = virDomainTPMBackendTypeToString(tpm->type); - char *cancel_path; + char *cancel_path = NULL; const char *tpmdev; + char *devset = NULL, *cancel_devset = NULL; + + *tpmfd = -1; + *cancelfd = -1;
virBufferAsprintf(&buf, "%s,id=tpm-%s", type, tpm->info.alias);
@@ -5946,11 +6004,49 @@ static char *qemuBuildTPMBackendStr(const virDomainDef *def, if (!(cancel_path = virTPMCreateCancelPath(tpmdev))) goto error;
- virBufferAddLit(&buf, ",path="); - virBufferEscape(&buf, ',', ",", "%s", tpmdev); + if (virQEMUCapsGet(qemuCaps, QEMU_CAPS_ADD_FD)) { + *tpmfd = open(tpmdev, O_RDWR); + if (*tpmfd < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Could not open TPM device %s"), tpmdev); Here, we should use virReportSystemError and pass along the errno value from the failed open().
Ok, I will fix this.
+ goto error; + } + + virCommandPassFD(cmd, *tpmfd, + VIR_COMMAND_PASS_FD_CLOSE_PARENT); + devset = qemuVirCommandGetDevSet(cmd, *tpmfd); + if (devset == NULL) + goto error; + + *cancelfd = open(cancel_path, O_WRONLY); + if (*cancelfd < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Could not open TPM device's cancel path " + "%s"), cancel_path); + goto error; + } + + virCommandPassFD(cmd, *cancelfd, + VIR_COMMAND_PASS_FD_CLOSE_PARENT); + cancel_devset = qemuVirCommandGetDevSet(cmd, *cancelfd); + if (cancel_devset == NULL) + goto error; + + virBufferAddLit(&buf, ",path="); + virBufferEscape(&buf, ',', ",", "%s", devset); + VIR_FREE(devset);
- virBufferAddLit(&buf, ",cancel-path="); - virBufferEscape(&buf, ',', ",", "%s", cancel_path); + virBufferAddLit(&buf, ",cancel-path="); + virBufferEscape(&buf, ',', ",", "%s", cancel_devset); + VIR_FREE(cancel_devset); + } else { + /* all test cases will use this path */ + virBufferAddLit(&buf, ",path="); + virBufferEscape(&buf, ',', ",", "%s", tpmdev); + + virBufferAddLit(&buf, ",cancel-path="); + virBufferEscape(&buf, ',', ",", "%s", cancel_path); + } The else clause looks redundant with the last few lines of the if clause. Why not just sink it to occur just once after the 'if', as long as the 'if' side does a mere tmpdev=devset?
True. Will combine it.
VIR_FREE(cancel_path);
break; @@ -5970,6 +6066,10 @@ static char *qemuBuildTPMBackendStr(const virDomainDef *def, emulator, type);
error: + VIR_FREE(devset); + VIR_FREE(cancel_devset); + VIR_FREE(cancel_path); + virBufferFreeAndReset(&buf); return NULL; } @@ -9223,13 +9323,35 @@ qemuBuildCommandLine(virConnectPtr conn,
if (def->tpm) { char *optstr;
This function is really huge. Yeah, we've not been good at breaking it into chunks in the past, but I think this addition would be nicer if the code inside 'if (def->tpm)' were extracted into a smaller helper function. But if we do that refactoring, the code motion of the existing pre-patch code should be its own patch.
Yes, can do that also.
+ int tpmfd = -1; + int cancelfd = -1; + char *fdset;
- if (!(optstr = qemuBuildTPMBackendStr(def, qemuCaps, emulator))) + if (!(optstr = qemuBuildTPMBackendStr(def, cmd, qemuCaps, emulator, + &tpmfd, &cancelfd))) goto error;
virCommandAddArgList(cmd, "-tpmdev", optstr, NULL); VIR_FREE(optstr);
+ if (tpmfd >= 0) { + fdset = qemuVirCommandGetFDSet(cmd, tpmfd); + if (!fdset) + goto error; + + virCommandAddArgList(cmd, "-add-fd", fdset, NULL); + VIR_FREE(fdset); + } + + if (cancelfd >= 0) { + fdset = qemuVirCommandGetFDSet(cmd, cancelfd); + if (!fdset) + goto error; + + virCommandAddArgList(cmd, "-add-fd", fdset, NULL); + VIR_FREE(fdset); + } + if (!(optstr = qemuBuildTPMDevStr(def, qemuCaps, emulator))) goto error;
diff --git a/src/util/vircommand.c b/src/util/vircommand.c index 6527d85..2616446 100644 --- a/src/util/vircommand.c +++ b/src/util/vircommand.c @@ -67,6 +67,7 @@ enum { VIR_EXEC_RUN_SYNC = (1 << 3), VIR_EXEC_ASYNC_IO = (1 << 4), VIR_EXEC_LISTEN_FDS = (1 << 5), + VIR_EXEC_FIXED_FDS = (1 << 6), };
typedef struct _virCommandFD virCommandFD; @@ -214,6 +215,12 @@ virCommandReorderFDs(virCommandPtr cmd) if (!cmd || cmd->has_error || !cmd->npassfd) return;
+ if ((cmd->flags & VIR_EXEC_FIXED_FDS)) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("The fds are fixed and cannot be reordered")); + goto error; + } Okay, so you are enforcing that a caller can't pick two conflicting fd passing methods on the same virCommand, but my point remains that the testsuite should prove we can hit this error case, to avoid regressions. Or we should redo virCommand to ALWAYS rearrange fds in the child, and make it possible for a caller passing an fd down to know what fd the child will see it as.
That's the part I don't quite follow. We are passing indices via /dev/fdset/%d, right? At the time we build up the command line, we need to know the index that the fd will have. If we shuffle things around later in a way that we cannot determine at the point we build up the command line, how would this work then?
+ for (i = 0; i < cmd->npassfd; i++) maxfd = MAX(cmd->passfd[i].fd, maxfd);
@@ -1019,6 +1026,32 @@ virCommandPassListenFDs(virCommandPtr cmd) cmd->flags |= VIR_EXEC_LISTEN_FDS; }
+/* + * virCommandPassFDGetFDIndex: + * @cmd: pointer to virCommand + * @fd: FD to get index of + * + * Determine the index of the FD in the transfer set. + * + * Returns index >= 0 if @set contains @fd, + * -1 otherwise. + */ +int +virCommandPassFDGetFDIndex(virCommandPtr cmd, int fd) +{ + size_t i = 0; + + while (i < cmd->npassfd) { + if (cmd->passfd[i].fd == fd) { + cmd->flags |= VIR_EXEC_FIXED_FDS; + return i; So, the mapping you are using is that the first fd passed through the child is eventually associated with /dev/fdset/$i, where $i represents which slot of cmd->passfd the descriptor was found in. I know the reason qemu added /dev/fdset/nnn was to allow the possibility of passing in sets of descriptors (such as a read-only and read-write handle both visiting the same file, so in the same fdset), but simplicity argues that unless we need that complexity, always naming our fdsets by the same fd that the child will first use is easier than trying to track the mapping between a parent fd and which order the fd was registered in. So I'm wondering if we change virCommandPassFD to always remap in the child and return an integer (the next consecutive integer starting at 3), then just use '-add-fd set=3,fd=3 ... /dev/fdset/3' is any simpler than trying to have set number unrelated to the fd number.
We are not passing all file descriptors to the child. So how would you handle passing fd's 10,3,55 to the child? Do we pass empty slots for 4-9 and 11-54 then, like pass fd = -1, so we can pass indices 10,3, and 55 or something like that? Regards, Stefan

On 02/06/2015 09:41 AM, Stefan Berger wrote:
@@ -214,6 +215,12 @@ virCommandReorderFDs(virCommandPtr cmd) if (!cmd || cmd->has_error || !cmd->npassfd) return; + if ((cmd->flags & VIR_EXEC_FIXED_FDS)) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("The fds are fixed and cannot be reordered")); + goto error; + } Okay, so you are enforcing that a caller can't pick two conflicting fd passing methods on the same virCommand, but my point remains that the testsuite should prove we can hit this error case, to avoid regressions. Or we should redo virCommand to ALWAYS rearrange fds in the child, and make it possible for a caller passing an fd down to know what fd the child will see it as.
That's the part I don't quite follow. We are passing indices via /dev/fdset/%d, right? At the time we build up the command line, we need to know the index that the fd will have. If we shuffle things around later in a way that we cannot determine at the point we build up the command line, how would this work then?
When you register an fd with virCommand, you would know at that point what fd the child will see (regardless of what the fd number was in the parent); ideally, the first fd you register will be fd 3 in the guest, the next one will be fd 4 in the guest. I'm further proposing that we number our /dev/fdset/%n after the first fd we place in the set (that is, we know the guest will see fd 3, so place it in /dev/fdset/3). The only time an fdset needs more than one member is if we pass multiple fds pointing to the same file (a read-only fd and a read-write fd, for example), but we don't have a use for that yet.
We are not passing all file descriptors to the child. So how would you handle passing fd's 10,3,55 to the child? Do we pass empty slots for 4-9 and 11-54 then, like pass fd = -1, so we can pass indices 10,3, and 55 or something like that?
In my proposal, the parent's 10, 3, 55 become the child's 3, 4, 5. The current code has the parent's 10 become the child's 10, with closed fds in between, except when using the newer code that packs things and merely passes an environment variable to say how many were packed. I'm proposing that we just always pack things, and make it easier to know what number we packed into, and that the parent then build the command line around the packed number instead of around the parent's number. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

On 02/06/2015 01:56 PM, Eric Blake wrote:
On 02/06/2015 09:41 AM, Stefan Berger wrote:
@@ -214,6 +215,12 @@ virCommandReorderFDs(virCommandPtr cmd) if (!cmd || cmd->has_error || !cmd->npassfd) return; + if ((cmd->flags & VIR_EXEC_FIXED_FDS)) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("The fds are fixed and cannot be reordered")); + goto error; + } Okay, so you are enforcing that a caller can't pick two conflicting fd passing methods on the same virCommand, but my point remains that the testsuite should prove we can hit this error case, to avoid regressions. Or we should redo virCommand to ALWAYS rearrange fds in the child, and make it possible for a caller passing an fd down to know what fd the child will see it as. That's the part I don't quite follow. We are passing indices via /dev/fdset/%d, right? At the time we build up the command line, we need to know the index that the fd will have. If we shuffle things around later in a way that we cannot determine at the point we build up the command line, how would this work then? When you register an fd with virCommand, you would know at that point what fd the child will see (regardless of what the fd number was in the parent); ideally, the first fd you register will be fd 3 in the guest, the next one will be fd 4 in the guest. I'm further proposing that we number our /dev/fdset/%n after the first fd we place in the set (that is, we know the guest will see fd 3, so place it in /dev/fdset/3). The only time an fdset needs more than one member is if we pass multiple fds pointing to the same file (a read-only fd and a read-write fd, for example), but we don't have a use for that yet.
We are not passing all file descriptors to the child. So how would you handle passing fd's 10,3,55 to the child? Do we pass empty slots for 4-9 and 11-54 then, like pass fd = -1, so we can pass indices 10,3, and 55 or something like that? In my proposal, the parent's 10, 3, 55 become the child's 3, 4, 5. The current code has the parent's 10 become the child's 10, with closed fds in between, except when using the newer code that packs things and merely passes an environment variable to say how many were packed. I'm proposing that we just always pack things, and make it easier to know what number we packed into, and that the parent then build the command line around the packed number instead of around the parent's number.
I see. I guess a modified virCommandFDSet may then look like this: /* * virCommandFDSet: * @fd: FD to be put into @set * @set: the set * @set_size: actual size of @set * * This is practically generalized implementation * of FD_SET() as we do not want to be limited * by FD_SETSIZE. * * Returns: the target filedescriptor on success, * -1 on usage error, * -ENOMEM on OOM // -> no returns negative errno to not collide with file descriptor */ static int virCommandFDSet(virCommandPtr cmd, int fd, unsigned int flags) { if (!cmd || fd < 0) return -1; if (virCommandFDIsSet(cmd, fd)) return 0; if (VIR_EXPAND_N(cmd->passfd, cmd->npassfd, 1) < 0) return -ENOMEM; cmd->passfd[cmd->npassfd - 1].fd = fd; cmd->passfd[cmd->npassfd - 1].flags = flags; return cmd->npassfd - 1 + 3; // the filedescriptor given to the child; base is '3' } virCommandReorderFDs is not used on WIN32 -- that would then mean that the fdsets could not be used on WIN32 or a different return value would have to be given here. Like return cmd->npassfd -1 on WIN32 ? Stefan

On 02/06/2015 12:19 PM, Stefan Berger wrote:
virCommandReorderFDs is not used on WIN32 -- that would then mean that the fdsets could not be used on WIN32 or a different return value would have to be given here. Like
return cmd->npassfd -1
on WIN32 ?
virCommand in general has problems on WIN32, so returning failure for now may be reasonable. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

On 02/06/2015 03:11 PM, Eric Blake wrote:
On 02/06/2015 12:19 PM, Stefan Berger wrote:
virCommandReorderFDs is not used on WIN32 -- that would then mean that the fdsets could not be used on WIN32 or a different return value would have to be given here. Like
return cmd->npassfd -1
on WIN32 ? virCommand in general has problems on WIN32, so returning failure for now may be reasonable.
We could just leave the function virCommandPassFd as is. Anyone who wants to build a QEMU /dev/fdset string would have to go through the function that looks up the fd the child will see and will fail on WIN32. Then the problem would be reduced to just having the file descriptors reordered every time -- a simple change in the code, unless we only wanted to do the re-ordering if someone was building a /dev/fdset string and as part of the fd lookup a flag was set requesting re-ordering. I would not expect any side effects in existing code except that maybe test cases would have to be adapted. Stefan

On Fri, Jan 30, 2015 at 05:17:56PM -0700, Eric Blake wrote:
On 11/20/2014 08:08 AM, Stefan Berger wrote:
Wow, I've been horribly slow at reviewing this. Do feel free to ping on list if no one seems to notice a patch, to widen the chances of anyone taking a glance at it.
Pass the TPM file descriptor to QEMU via command line. Instead of passing /dev/tpm0 we now pass /dev/fdset/10 and the additional parameters -add-fd set=10,fd=20.
This addresses the use case when QEMU is started with non-root privileges and QEMU cannot open /dev/tpm0 for example.
One problem is that for the passing of the file descriptor set to work, virCommandReorderFDs must not be called on the virCommand. This is prevented by setting a flag in the virCommandPassFDGetFDIndex that is checked to be clear when virCommandReorderFDs is run.
I'm still trying to figure out how virCommandReorderFDs() got into the picture (I didn't write that section of the code); when I originally worked on virCommand, the only way to pass fds to the child was in direct positions (same fd in child as in parent), not by remapping one fd in the parent to another position in the child, except for the three standard descriptors. It looks like virCommandReorderFDs was added to allow remapping other fds and populates the LISTEN_FDS environment variable with how many fds were thus mapped. So the two approaches don't really mix. Do we ever use virCommandPassListenFDs() on a virCommand that will also do raw fd passthrough? Maybe the real thing to do is to track at virCommand build-up time that only one of the two passthrough methods can be used, and fail loudly if a programmer tries to do both methods at once.
Or instead of virCommandPassFD() which only takes a source FD number, we could have added a virComandPassFDRemap which takes a source and target FD number. That way we don't have a global "reorder FDs" concept that can break in unexpected ways - we would only ever re-order FDs for which an explicitly target FD was requested.
Or maybe we should ALWAYS prepare to remap fds in the child, so that the child receives fds in contiguous order with no gaps. It might simplify the code base to always reorder things, and have the mapping done up front. That is, change the virCommandPassFD() function to return an integer of which next consecutive fd the child will see, or -1 on failure. Callers that then need to alter the command line of the child will have to pay attention to the return value (something a bit different than most virCommand build-up, which intentionally defer error checking to the very end), but it might be worth it.
This might be simplest way to go - I'm just wondering if it will cause us any other type of fun problems Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On 02/06/2015 10:49 AM, Daniel P. Berrange wrote:
I'm still trying to figure out how virCommandReorderFDs() got into the picture (I didn't write that section of the code); when I originally worked on virCommand, the only way to pass fds to the child was in direct positions (same fd in child as in parent), not by remapping one fd in the parent to another position in the child, except for the three standard descriptors. It looks like virCommandReorderFDs was added to allow remapping other fds and populates the LISTEN_FDS environment variable with how many fds were thus mapped. So the two approaches don't really mix. Do we ever use virCommandPassListenFDs() on a virCommand that will also do raw fd passthrough? Maybe the real thing to do is to track at virCommand build-up time that only one of the two passthrough methods can be used, and fail loudly if a programmer tries to do both methods at once.
Or instead of virCommandPassFD() which only takes a source FD number, we could have added a virComandPassFDRemap which takes a source and target FD number. That way we don't have a global "reorder FDs" concept that can break in unexpected ways - we would only ever re-order FDs for which an explicitly target FD was requested.
But then we have to be careful that there are no collisions between inherited-in-place and reordered fds. My argument is that reordering ALWAYS ensures we don't have to worry about collisions between different registration styles, because we no longer have different registration styles.
Or maybe we should ALWAYS prepare to remap fds in the child, so that the child receives fds in contiguous order with no gaps. It might simplify the code base to always reorder things, and have the mapping done up front. That is, change the virCommandPassFD() function to return an integer of which next consecutive fd the child will see, or -1 on failure. Callers that then need to alter the command line of the child will have to pay attention to the return value (something a bit different than most virCommand build-up, which intentionally defer error checking to the very end), but it might be worth it.
This might be simplest way to go - I'm just wondering if it will cause us any other type of fun problems
And it sounds like Stefan is looking to me to play with it and see what breaks. Oh well, I guess I walked into that one :) -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

On 02/06/2015 01:58 PM, Eric Blake wrote:
On 02/06/2015 10:49 AM, Daniel P. Berrange wrote:
I'm still trying to figure out how virCommandReorderFDs() got into the picture (I didn't write that section of the code); when I originally worked on virCommand, the only way to pass fds to the child was in direct positions (same fd in child as in parent), not by remapping one fd in the parent to another position in the child, except for the three standard descriptors. It looks like virCommandReorderFDs was added to allow remapping other fds and populates the LISTEN_FDS environment variable with how many fds were thus mapped. So the two approaches don't really mix. Do we ever use virCommandPassListenFDs() on a virCommand that will also do raw fd passthrough? Maybe the real thing to do is to track at virCommand build-up time that only one of the two passthrough methods can be used, and fail loudly if a programmer tries to do both methods at once. Or instead of virCommandPassFD() which only takes a source FD number, we could have added a virComandPassFDRemap which takes a source and target FD number. That way we don't have a global "reorder FDs" concept that can break in unexpected ways - we would only ever re-order FDs for which an explicitly target FD was requested. But then we have to be careful that there are no collisions between inherited-in-place and reordered fds. My argument is that reordering ALWAYS ensures we don't have to worry about collisions between different registration styles, because we no longer have different registration styles.
Or maybe we should ALWAYS prepare to remap fds in the child, so that the child receives fds in contiguous order with no gaps. It might simplify the code base to always reorder things, and have the mapping done up front. That is, change the virCommandPassFD() function to return an integer of which next consecutive fd the child will see, or -1 on failure. Callers that then need to alter the command line of the child will have to pay attention to the return value (something a bit different than most virCommand build-up, which intentionally defer error checking to the very end), but it might be worth it. This might be simplest way to go - I'm just wondering if it will cause us any other type of fun problems And it sounds like Stefan is looking to me to play with it and see what breaks. Oh well, I guess I walked into that one :)
Well, we can think it through. Are the following changes sufficient or is there much more to it? 1) reorder the FDs all the time ; the function is invoked in virExec and we can move the call up 2 lines out of the if () branch. 2a) modify the return value as shown in the other email and propagate into virCommandPassFD and return an int here now that can be ignored by callers OR 2b) possibly have another function that lets one determine the fd the child will see given a passed fd 3) possibly adapt test cases... Stefan

On 02/06/2015 02:43 PM, Stefan Berger wrote:
On 02/06/2015 01:58 PM, Eric Blake wrote:
On 02/06/2015 10:49 AM, Daniel P. Berrange wrote:
I'm still trying to figure out how virCommandReorderFDs() got into the picture (I didn't write that section of the code); when I originally worked on virCommand, the only way to pass fds to the child was in direct positions (same fd in child as in parent), not by remapping one fd in the parent to another position in the child, except for the three standard descriptors. It looks like virCommandReorderFDs was added to allow remapping other fds and populates the LISTEN_FDS environment variable with how many fds were thus mapped. So the two approaches don't really mix. Do we ever use virCommandPassListenFDs() on a virCommand that will also do raw fd passthrough? Maybe the real thing to do is to track at virCommand build-up time that only one of the two passthrough methods can be used, and fail loudly if a programmer tries to do both methods at once. Or instead of virCommandPassFD() which only takes a source FD number, we could have added a virComandPassFDRemap which takes a source and target FD number. That way we don't have a global "reorder FDs" concept that can break in unexpected ways - we would only ever re-order FDs for which an explicitly target FD was requested. But then we have to be careful that there are no collisions between inherited-in-place and reordered fds. My argument is that reordering ALWAYS ensures we don't have to worry about collisions between different registration styles, because we no longer have different registration styles.
Or maybe we should ALWAYS prepare to remap fds in the child, so that the child receives fds in contiguous order with no gaps. It might simplify the code base to always reorder things, and have the mapping done up front. That is, change the virCommandPassFD() function to return an integer of which next consecutive fd the child will see, or -1 on failure. Callers that then need to alter the command line of the child will have to pay attention to the return value (something a bit different than most virCommand build-up, which intentionally defer error checking to the very end), but it might be worth it. This might be simplest way to go - I'm just wondering if it will cause us any other type of fun problems And it sounds like Stefan is looking to me to play with it and see what breaks. Oh well, I guess I walked into that one :)
Well, we can think it through.
Are the following changes sufficient or is there much more to it?
1) reorder the FDs all the time ; the function is invoked in virExec and we can move the call up 2 lines out of the if () branch. 2a) modify the return value as shown in the other email and propagate into virCommandPassFD and return an int here now that can be ignored by callers OR 2b) possibly have another function that lets one determine the fd the child will see given a passed fd
3) possibly adapt test cases...
it's not that simple unfortunately; if we started to re-order / re-map using dup() the file descriptors then all fd's that make it onto the command line, like those for -netdev and others, also need to be adapted to have that re-ordered fd on the command line. Conversely, this currently prevents us from ordering the file descriptors as well, since nothing would work without code change. So ordering the file descriptors cannot currently be done with anything that builds up a QEMU command line. So do we really need to re-order them then as part of this code change? Stefan
Stefan

On 02/06/2015 04:12 PM, Stefan Berger wrote:
it's not that simple unfortunately; if we started to re-order / re-map using dup() the file descriptors then all fd's that make it onto the command line, like those for -netdev and others, also need to be adapted to have that re-ordered fd on the command line.
Correct, and that's what I hope to accomplish by making fd registration enforce that the caller check the return value, to make sure that -netdev is now constructing the command line with the fd the child will see rather than what the parent owns. Yes, it means auditing all existing callers.
Conversely, this currently prevents us from ordering the file descriptors as well, since nothing would work without code change. So ordering the file descriptors cannot currently be done with anything that builds up a QEMU command line. So do we really need to re-order them then as part of this code change?
I think that a pre-req series that orders fds in the child for all callers, independent of adding your new TPM features, will be a good thing in the long run. So I'm currently working on said series. :) -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org
participants (3)
-
Daniel P. Berrange
-
Eric Blake
-
Stefan Berger