[libvirt] [PATCH 0/3] Fix selinux problem when restoring qemu domain via a pipe

Patches 1 & 2 provide *most* of the fix required for https://bugzilla.redhat.com/show_bug.cgi?id=667756 Note that restoring from a gzip'ed image also suffers the same problem, and requires the same remedy. Patch 3 remedies a hang during failure that I encountered while debugging the main problem.

A need was found to set the SELinux context label on an open fd (a pipe, as a matter of fact). This patch adds a function to the security driver API that will set the label on an open fd to secdef.label. For all drivers other than the SELinux driver, it's a NOP. For the SElinux driver, it calls fsetfilecon(). If the return is a failure, it only returns error up to the caller if 1) the desired label is different from the existing label, 2) the destination fd is of a type that supports setting the selinux context, and 3) selinux is in enforcing mode. Otherwise it will return success. This follows the pattern of the existing function SELinuxSetFilecon(). --- src/libvirt_private.syms | 1 + src/security/security_apparmor.c | 10 +++++++ src/security/security_dac.c | 10 +++++++ src/security/security_driver.h | 6 +++- src/security/security_manager.c | 11 +++++++ src/security/security_manager.h | 3 ++ src/security/security_nop.c | 9 ++++++ src/security/security_selinux.c | 54 ++++++++++++++++++++++++++++++++++++++ src/security/security_stack.c | 18 ++++++++++++ 9 files changed, 121 insertions(+), 1 deletions(-) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index f60489c..c7e4772 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -723,6 +723,7 @@ virSecurityManagerRestoreAllLabel; virSecurityManagerRestoreHostdevLabel; virSecurityManagerRestoreSavedStateLabel; virSecurityManagerSetAllLabel; +virSecurityManagerSetFDLabel; virSecurityManagerSetImageLabel; virSecurityManagerSetHostdevLabel; virSecurityManagerSetProcessLabel; diff --git a/src/security/security_apparmor.c b/src/security/security_apparmor.c index d82ba73..7dc01ac 100644 --- a/src/security/security_apparmor.c +++ b/src/security/security_apparmor.c @@ -798,6 +798,14 @@ AppArmorRestoreSavedStateLabel(virSecurityManagerPtr mgr, return reload_profile(mgr, vm, NULL, false); } +static int +AppArmorSetFDLabel(virSecurityManagerPtr mgr ATTRIBUTE_UNUSED, + virDomainObjPtr vm ATTRIBUTE_UNUSED, + int fd ATTRIBUTE_UNUSED) +{ + return 0; +} + virSecurityDriver virAppArmorSecurityDriver = { 0, SECURITY_APPARMOR_NAME, @@ -831,4 +839,6 @@ virSecurityDriver virAppArmorSecurityDriver = { AppArmorSetSavedStateLabel, AppArmorRestoreSavedStateLabel, + + AppArmorSetFDLabel, }; diff --git a/src/security/security_dac.c b/src/security/security_dac.c index 0f24034..52353a3 100644 --- a/src/security/security_dac.c +++ b/src/security/security_dac.c @@ -675,6 +675,14 @@ virSecurityDACClearSocketLabel(virSecurityManagerPtr mgr ATTRIBUTE_UNUSED, return 0; } +static int +virSecurityDACSetFDLabel(virSecurityManagerPtr mgr ATTRIBUTE_UNUSED, + virDomainObjPtr vm ATTRIBUTE_UNUSED, + int fd ATTRIBUTE_UNUSED) +{ + return 0; +} + virSecurityDriver virSecurityDriverDAC = { sizeof(virSecurityDACData), @@ -710,4 +718,6 @@ virSecurityDriver virSecurityDriverDAC = { virSecurityDACSetSavedStateLabel, virSecurityDACRestoreSavedStateLabel, + + virSecurityDACSetFDLabel, }; diff --git a/src/security/security_driver.h b/src/security/security_driver.h index e5a8d41..42dfcb8 100644 --- a/src/security/security_driver.h +++ b/src/security/security_driver.h @@ -79,7 +79,9 @@ typedef int (*virSecurityDomainSetProcessLabel) (virSecurityManagerPtr mgr, virDomainObjPtr vm); typedef int (*virSecurityDomainSecurityVerify) (virSecurityManagerPtr mgr, virDomainDefPtr def); - +typedef int (*virSecurityDomainSetFDLabel) (virSecurityManagerPtr mgr, + virDomainObjPtr vm, + int fd); struct _virSecurityDriver { size_t privateDataLen; @@ -114,6 +116,8 @@ struct _virSecurityDriver { virSecurityDomainSetSavedStateLabel domainSetSavedStateLabel; virSecurityDomainRestoreSavedStateLabel domainRestoreSavedStateLabel; + + virSecurityDomainSetFDLabel domainSetSecurityFDLabel; }; virSecurityDriverPtr virSecurityDriverLookup(const char *name); diff --git a/src/security/security_manager.c b/src/security/security_manager.c index 6406161..0246dd8 100644 --- a/src/security/security_manager.c +++ b/src/security/security_manager.c @@ -323,3 +323,14 @@ int virSecurityManagerVerify(virSecurityManagerPtr mgr, virSecurityReportError(VIR_ERR_NO_SUPPORT, __FUNCTION__); return -1; } + +int virSecurityManagerSetFDLabel(virSecurityManagerPtr mgr, + virDomainObjPtr vm, + int fd) +{ + if (mgr->drv->domainSetSecurityFDLabel) + return mgr->drv->domainSetSecurityFDLabel(mgr, vm, fd); + + virSecurityReportError(VIR_ERR_NO_SUPPORT, __FUNCTION__); + return -1; +} diff --git a/src/security/security_manager.h b/src/security/security_manager.h index 189b6b4..3f88801 100644 --- a/src/security/security_manager.h +++ b/src/security/security_manager.h @@ -91,5 +91,8 @@ int virSecurityManagerSetProcessLabel(virSecurityManagerPtr mgr, virDomainObjPtr vm); int virSecurityManagerVerify(virSecurityManagerPtr mgr, virDomainDefPtr def); +int virSecurityManagerSetFDLabel(virSecurityManagerPtr mgr, + virDomainObjPtr vm, + int fd); #endif /* VIR_SECURITY_MANAGER_H__ */ diff --git a/src/security/security_nop.c b/src/security/security_nop.c index 6d7cb47..24d36fe 100644 --- a/src/security/security_nop.c +++ b/src/security/security_nop.c @@ -149,6 +149,13 @@ static int virSecurityDomainVerifyNop(virSecurityManagerPtr mgr ATTRIBUTE_UNUSED return 0; } +static int virSecurityDomainSetFDLabelNop(virSecurityManagerPtr mgr ATTRIBUTE_UNUSED, + virDomainObjPtr sec ATTRIBUTE_UNUSED, + int fd ATTRIBUTE_UNUSED) +{ + return 0; +} + virSecurityDriver virSecurityDriverNop = { 0, "none", @@ -182,4 +189,6 @@ virSecurityDriver virSecurityDriverNop = { virSecurityDomainSetSavedStateLabelNop, virSecurityDomainRestoreSavedStateLabelNop, + + virSecurityDomainSetFDLabelNop, }; diff --git a/src/security/security_selinux.c b/src/security/security_selinux.c index 7b71fd9..24609bc 100644 --- a/src/security/security_selinux.c +++ b/src/security/security_selinux.c @@ -371,6 +371,45 @@ SELinuxSetFilecon(const char *path, char *tcon) return 0; } +static int +SELinuxFSetFilecon(int fd, char *tcon) +{ + security_context_t econ; + + VIR_INFO("Setting SELinux context on fd %d to '%s'", fd, tcon); + + if (fsetfilecon(fd, tcon) < 0) { + int fsetfilecon_errno = errno; + + if (fgetfilecon(fd, &econ) >= 0) { + if (STREQ(tcon, econ)) { + freecon(econ); + /* It's alright, there's nothing to change anyway. */ + return 0; + } + freecon(econ); + } + + /* if the error complaint is related to an image hosted on + * an nfs mount, or a usbfs/sysfs filesystem not supporting + * labelling, then just ignore it & hope for the best. + * The user hopefully set one of the necessary SELinux + * virt_use_{nfs,usb,pci} boolean tunables to allow it... + */ + if (fsetfilecon_errno != EOPNOTSUPP) { + virReportSystemError(fsetfilecon_errno, + _("unable to set security context '%s' on fd %d"), + tcon, fd); + if (security_getenforce() == 1) + return -1; + } else { + VIR_INFO("Setting security context '%s' on fd %d not supported", + tcon, fd); + } + } + return 0; +} + /* Set fcon to the appropriate label for path and mode, or return -1. */ static int getContext(const char *newpath, mode_t mode, security_context_t *fcon) @@ -1087,6 +1126,19 @@ SELinuxSetSecurityAllLabel(virSecurityManagerPtr mgr, return 0; } +static int +SELinuxSetFDLabel(virSecurityManagerPtr mgr ATTRIBUTE_UNUSED, + virDomainObjPtr vm, + int fd) +{ + const virSecurityLabelDefPtr secdef = &vm->def->seclabel; + + if (secdef->imagelabel == NULL) + return 0; + + return SELinuxFSetFilecon(fd, secdef->imagelabel); +} + virSecurityDriver virSecurityDriverSELinux = { 0, SECURITY_SELINUX_NAME, @@ -1120,4 +1172,6 @@ virSecurityDriver virSecurityDriverSELinux = { SELinuxSetSavedStateLabel, SELinuxRestoreSavedStateLabel, + + SELinuxSetFDLabel, }; diff --git a/src/security/security_stack.c b/src/security/security_stack.c index e8bb058..79b3e1f 100644 --- a/src/security/security_stack.c +++ b/src/security/security_stack.c @@ -364,6 +364,22 @@ virSecurityStackClearSocketLabel(virSecurityManagerPtr mgr, return rc; } +static int +virSecurityStackSetFDLabel(virSecurityManagerPtr mgr, + virDomainObjPtr vm, + int fd) +{ + virSecurityStackDataPtr priv = virSecurityManagerGetPrivateData(mgr); + int rc = 0; + + if (virSecurityManagerSetFDLabel(priv->secondary, vm, fd) < 0) + rc = -1; + if (virSecurityManagerSetFDLabel(priv->primary, vm, fd) < 0) + rc = -1; + + return rc; +} + virSecurityDriver virSecurityDriverStack = { sizeof(virSecurityStackData), @@ -398,4 +414,6 @@ virSecurityDriver virSecurityDriverStack = { virSecurityStackSetSavedStateLabel, virSecurityStackRestoreSavedStateLabel, + + virSecurityStackSetFDLabel, }; -- 1.7.3.4

On Tue, Jan 25, 2011 at 04:24:18AM -0500, Laine Stump wrote:
A need was found to set the SELinux context label on an open fd (a pipe, as a matter of fact). This patch adds a function to the security driver API that will set the label on an open fd to secdef.label. For all drivers other than the SELinux driver, it's a NOP. For the SElinux driver, it calls fsetfilecon().
If the return is a failure, it only returns error up to the caller if 1) the desired label is different from the existing label, 2) the destination fd is of a type that supports setting the selinux context, and 3) selinux is in enforcing mode. Otherwise it will return success. This follows the pattern of the existing function SELinuxSetFilecon().
ACK Daniel

On 01/25/2011 12:48 PM, Daniel P. Berrange wrote:
On Tue, Jan 25, 2011 at 04:24:18AM -0500, Laine Stump wrote:
A need was found to set the SELinux context label on an open fd (a pipe, as a matter of fact). This patch adds a function to the security driver API that will set the label on an open fd to secdef.label. For all drivers other than the SELinux driver, it's a NOP. For the SElinux driver, it calls fsetfilecon().
If the return is a failure, it only returns error up to the caller if 1) the desired label is different from the existing label, 2) the destination fd is of a type that supports setting the selinux context, and 3) selinux is in enforcing mode. Otherwise it will return success. This follows the pattern of the existing function SELinuxSetFilecon(). ACK
Thanks. I'll hold off on pushing this just in case the discussion on PATCH 2/3 leads to a change requirement in this one.

On 01/25/2011 04:03 PM, Laine Stump wrote:
On 01/25/2011 12:48 PM, Daniel P. Berrange wrote:
On Tue, Jan 25, 2011 at 04:24:18AM -0500, Laine Stump wrote:
A need was found to set the SELinux context label on an open fd (a pipe, as a matter of fact). This patch adds a function to the security driver API that will set the label on an open fd to secdef.label. For all drivers other than the SELinux driver, it's a NOP. For the SElinux driver, it calls fsetfilecon().
If the return is a failure, it only returns error up to the caller if 1) the desired label is different from the existing label, 2) the destination fd is of a type that supports setting the selinux context, and 3) selinux is in enforcing mode. Otherwise it will return success. This follows the pattern of the existing function SELinuxSetFilecon(). ACK
Thanks. I'll hold off on pushing this just in case the discussion on PATCH 2/3 leads to a change requirement in this one.
Now pushed.

This patch is a partial resolution to the following bug: https://bugzilla.redhat.com/show_bug.cgi?id=667756 (to complete the fix, an updated selinux-policy package is required, to add the policy that allows libvirt to set the context of a fifo, which was previously not allowed). Explanation : When an incoming migration is over a pipe (for example, if the image was compressed and is being fed through gzip, or was on a root-squash nfs server, so needed to be opened by a child process running as a different uid), qemu cannot read it unless the selinux context label for the pipe has been set properly. The solution is to check the fd used as the source of the migration just before passing it to qemu; if it's a fifo (implying that it's a pipe), we call the newly added virSecurityManagerSetFDLabel() function to set the context properly. --- src/qemu/qemu_driver.c | 18 ++++++++++++++++++ 1 files changed, 18 insertions(+), 0 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 34cc29f..985b062 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -2667,6 +2667,24 @@ static int qemudStartVMDaemon(virConnectPtr conn, vm, stdin_path) < 0) goto cleanup; + if (stdin_fd != -1) { + /* if there's an fd to migrate from, and it's a pipe, put the + * proper security label on it + */ + struct stat stdin_sb; + + DEBUG0("setting security label on pipe used for migration"); + + if (fstat(stdin_fd, &stdin_sb) < 0) { + virReportSystemError(errno, + _("cannot stat fd %d"), stdin_fd); + goto cleanup; + } + if (S_ISFIFO(stdin_sb.st_mode) && + virSecurityManagerSetFDLabel(driver->securityManager, vm, stdin_fd) < 0) + goto cleanup; + } + /* Ensure no historical cgroup for this VM is lying around bogus * settings */ DEBUG0("Ensuring no historical cgroup is lying around"); -- 1.7.3.4

On Tue, Jan 25, 2011 at 04:24:19AM -0500, Laine Stump wrote:
This patch is a partial resolution to the following bug:
https://bugzilla.redhat.com/show_bug.cgi?id=667756
(to complete the fix, an updated selinux-policy package is required, to add the policy that allows libvirt to set the context of a fifo, which was previously not allowed).
Explanation : When an incoming migration is over a pipe (for example, if the image was compressed and is being fed through gzip, or was on a root-squash nfs server, so needed to be opened by a child process running as a different uid), qemu cannot read it unless the selinux context label for the pipe has been set properly.
The solution is to check the fd used as the source of the migration just before passing it to qemu; if it's a fifo (implying that it's a pipe), we call the newly added virSecurityManagerSetFDLabel() function to set the context properly. --- src/qemu/qemu_driver.c | 18 ++++++++++++++++++ 1 files changed, 18 insertions(+), 0 deletions(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 34cc29f..985b062 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -2667,6 +2667,24 @@ static int qemudStartVMDaemon(virConnectPtr conn, vm, stdin_path) < 0) goto cleanup;
+ if (stdin_fd != -1) { + /* if there's an fd to migrate from, and it's a pipe, put the + * proper security label on it + */ + struct stat stdin_sb; + + DEBUG0("setting security label on pipe used for migration"); + + if (fstat(stdin_fd, &stdin_sb) < 0) { + virReportSystemError(errno, + _("cannot stat fd %d"), stdin_fd); + goto cleanup; + } + if (S_ISFIFO(stdin_sb.st_mode) && + virSecurityManagerSetFDLabel(driver->securityManager, vm, stdin_fd) < 0) + goto cleanup; + }
This feels like the wrong place to put this call. The callers of qemudStartVMDaemon() which opened 'stdin_fd' in the first place will already know if it is a pipe or not. If we put the virSecurityManagerSetFDLabel call in the appropriate callers, then the fstat() complexity is avoided. Daniel

On 01/25/2011 12:49 PM, Daniel P. Berrange wrote:
This patch is a partial resolution to the following bug:
https://bugzilla.redhat.com/show_bug.cgi?id=667756
(to complete the fix, an updated selinux-policy package is required, to add the policy that allows libvirt to set the context of a fifo, which was previously not allowed).
Explanation : When an incoming migration is over a pipe (for example, if the image was compressed and is being fed through gzip, or was on a root-squash nfs server, so needed to be opened by a child process running as a different uid), qemu cannot read it unless the selinux context label for the pipe has been set properly.
The solution is to check the fd used as the source of the migration just before passing it to qemu; if it's a fifo (implying that it's a pipe), we call the newly added virSecurityManagerSetFDLabel() function to set the context properly. --- src/qemu/qemu_driver.c | 18 ++++++++++++++++++ 1 files changed, 18 insertions(+), 0 deletions(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 34cc29f..985b062 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -2667,6 +2667,24 @@ static int qemudStartVMDaemon(virConnectPtr conn, vm, stdin_path)< 0) goto cleanup;
+ if (stdin_fd != -1) { + /* if there's an fd to migrate from, and it's a pipe, put the + * proper security label on it + */ + struct stat stdin_sb; + + DEBUG0("setting security label on pipe used for migration"); + + if (fstat(stdin_fd,&stdin_sb)< 0) { + virReportSystemError(errno, + _("cannot stat fd %d"), stdin_fd); + goto cleanup; + } + if (S_ISFIFO(stdin_sb.st_mode)&& + virSecurityManagerSetFDLabel(driver->securityManager, vm, stdin_fd)< 0) + goto cleanup; + } This feels like the wrong place to put this call. The callers of qemudStartVMDaemon() which opened 'stdin_fd' in the first
On Tue, Jan 25, 2011 at 04:24:19AM -0500, Laine Stump wrote: place will already know if it is a pipe or not. If we put the virSecurityManagerSetFDLabel call in the appropriate callers, then the fstat() complexity is avoided.
That was my first intent too. However, in the case of an image on root-squashed NFS, the knowledge about whether to directly open, or open via a pipe to a child process, is made in qemudOpenAsUID(), which doesn't have access to the domainObj, so cannot call the security driver. In a broader view, qemudOpenAsUID() is really a potentially general purpose function that could be used outside of this context some day, but gumming it up with application-specific things like a domainObj would lock it into being specific to domain-related functions. More specifically (and importantly), the domainObj hasn't even been constructed until after qemudOpenAsUID() is finished and returned, since it's going to be created by the caller from the header of the file that qemudOpenAsUID() has just opened. So by the time we have the domainObj, we no longer have the knowledge that the fd is actually the read side of a pipe - we would still have to call fstat (or clutter up the calling sequence to pass back an "is_fifo" bool or something, which sounds even less right). Compared to that, putting the call to SetFDLabel() in a single place, qualified by fstat() to see if the fd was a fifo, seemed like a much less intrusive change to the code. (The other instance of a pipe being created (for compression) is less problematic, as everything we need is already there. However, since we're already doing the fstat() for the root-squash case, and since doing two FDSetLabels() would be superfluous (in the case of a compressed image stored on a root-squashed share), I figured we might as well have a single call in a common place (which, by the way, is strategically located as late as possible, so that any future additions of pipes will automatically be caught and accounted for).) However, If anyone has a suggestion for dealing with the chicken-egg problem of domainObj vs fifo that is less ugly, please speak up, and I'll be happy to implement it! :-) (NB: I have an idea in the back of my head that all files in libvirt could be opened by a VIR_OPEN() that would automatically handle "the root squash problem", possibly in some other manner than qemudOpenAsUID() (using SCM_RIGHTS maybe?), and in that case we wouldn't have the option of passing the security label down to the innards of the utility function even if we had it (unless we wanted to wire up every single VIR_OPEN() with an extra NULL arg or something).

On Tue, Jan 25, 2011 at 03:54:12PM -0500, Laine Stump wrote:
On 01/25/2011 12:49 PM, Daniel P. Berrange wrote:
This patch is a partial resolution to the following bug:
https://bugzilla.redhat.com/show_bug.cgi?id=667756
(to complete the fix, an updated selinux-policy package is required, to add the policy that allows libvirt to set the context of a fifo, which was previously not allowed).
Explanation : When an incoming migration is over a pipe (for example, if the image was compressed and is being fed through gzip, or was on a root-squash nfs server, so needed to be opened by a child process running as a different uid), qemu cannot read it unless the selinux context label for the pipe has been set properly.
The solution is to check the fd used as the source of the migration just before passing it to qemu; if it's a fifo (implying that it's a pipe), we call the newly added virSecurityManagerSetFDLabel() function to set the context properly. --- src/qemu/qemu_driver.c | 18 ++++++++++++++++++ 1 files changed, 18 insertions(+), 0 deletions(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 34cc29f..985b062 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -2667,6 +2667,24 @@ static int qemudStartVMDaemon(virConnectPtr conn, vm, stdin_path)< 0) goto cleanup;
+ if (stdin_fd != -1) { + /* if there's an fd to migrate from, and it's a pipe, put the + * proper security label on it + */ + struct stat stdin_sb; + + DEBUG0("setting security label on pipe used for migration"); + + if (fstat(stdin_fd,&stdin_sb)< 0) { + virReportSystemError(errno, + _("cannot stat fd %d"), stdin_fd); + goto cleanup; + } + if (S_ISFIFO(stdin_sb.st_mode)&& + virSecurityManagerSetFDLabel(driver->securityManager, vm, stdin_fd)< 0) + goto cleanup; + } This feels like the wrong place to put this call. The callers of qemudStartVMDaemon() which opened 'stdin_fd' in the first
On Tue, Jan 25, 2011 at 04:24:19AM -0500, Laine Stump wrote: place will already know if it is a pipe or not. If we put the virSecurityManagerSetFDLabel call in the appropriate callers, then the fstat() complexity is avoided.
That was my first intent too. However, in the case of an image on root-squashed NFS, the knowledge about whether to directly open, or open via a pipe to a child process, is made in qemudOpenAsUID(), which doesn't have access to the domainObj, so cannot call the security driver.
In a broader view, qemudOpenAsUID() is really a potentially general purpose function that could be used outside of this context some day, but gumming it up with application-specific things like a domainObj would lock it into being specific to domain-related functions.
More specifically (and importantly), the domainObj hasn't even been constructed until after qemudOpenAsUID() is finished and returned, since it's going to be created by the caller from the header of the file that qemudOpenAsUID() has just opened. So by the time we have the domainObj, we no longer have the knowledge that the fd is actually the read side of a pipe - we would still have to call fstat (or clutter up the calling sequence to pass back an "is_fifo" bool or something, which sounds even less right).
Compared to that, putting the call to SetFDLabel() in a single place, qualified by fstat() to see if the fd was a fifo, seemed like a much less intrusive change to the code. (The other instance of a pipe being created (for compression) is less problematic, as everything we need is already there. However, since we're already doing the fstat() for the root-squash case, and since doing two FDSetLabels() would be superfluous (in the case of a compressed image stored on a root-squashed share), I figured we might as well have a single call in a common place (which, by the way, is strategically located as late as possible, so that any future additions of pipes will automatically be caught and accounted for).)
However, If anyone has a suggestion for dealing with the chicken-egg problem of domainObj vs fifo that is less ugly, please speak up, and I'll be happy to implement it! :-)
Ok, I see what you mean here. ACK to the original patch Daniel

On 01/26/2011 07:11 AM, Daniel P. Berrange wrote:
On Tue, Jan 25, 2011 at 03:54:12PM -0500, Laine Stump wrote:
On 01/25/2011 12:49 PM, Daniel P. Berrange wrote:
On Tue, Jan 25, 2011 at 04:24:19AM -0500, Laine Stump wrote:
This patch is a partial resolution to the following bug:
https://bugzilla.redhat.com/show_bug.cgi?id=667756
(to complete the fix, an updated selinux-policy package is required, to add the policy that allows libvirt to set the context of a fifo, which was previously not allowed).
Explanation : When an incoming migration is over a pipe (for example, if the image was compressed and is being fed through gzip, or was on a root-squash nfs server, so needed to be opened by a child process running as a different uid), qemu cannot read it unless the selinux context label for the pipe has been set properly.
The solution is to check the fd used as the source of the migration just before passing it to qemu; if it's a fifo (implying that it's a pipe), we call the newly added virSecurityManagerSetFDLabel() function to set the context properly. --- src/qemu/qemu_driver.c | 18 ++++++++++++++++++ 1 files changed, 18 insertions(+), 0 deletions(-)
Ok, I see what you mean here. ACK to the original patch
Thanks! I just pushed.

If a guest image is saved in compressed format, and the restore fails in some way after the intermediate process used to uncompress the image has been started, but before qemu has been started to hook up to the uncompressor, libvirt will endlessly wait for the uncompressor to finish, but it never will because it's still waiting to have something hooked up to drain its output. The solution is to manually send a SIGTERM to the compressor process before calling waitpid on it (only if the restore has failed, of course). --- src/qemu/qemu_driver.c | 6 ++++++ 1 files changed, 6 insertions(+), 0 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 985b062..cdf7964 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -5764,6 +5764,12 @@ qemudDomainSaveImageStartVM(virConnectPtr conn, VIR_VM_OP_RESTORE); if (intermediate_pid != -1) { + /* if there was an error setting up qemu, the intermediate process will + * wait forever to write to stdout, so we must manually kill it + */ + if (ret < 0) + kill(intermediate_pid, SIGTERM); + /* Wait for intermediate process to exit */ while (waitpid(intermediate_pid, &childstat, 0) == -1 && errno == EINTR) { -- 1.7.3.4

On Tue, Jan 25, 2011 at 04:24:20AM -0500, Laine Stump wrote:
If a guest image is saved in compressed format, and the restore fails in some way after the intermediate process used to uncompress the image has been started, but before qemu has been started to hook up to the uncompressor, libvirt will endlessly wait for the uncompressor to finish, but it never will because it's still waiting to have something hooked up to drain its output.
The solution is to manually send a SIGTERM to the compressor process before calling waitpid on it (only if the restore has failed, of course).
Are we leaking a file descriptor here then ? I would think it would get EPIPE or EIO or an end-of-file if QEMU didn't start up and automatically exit. That we need to kill it seems odd (though a worthy extra measure once we're verified that all FDs are closed properly). Daniel

On 01/25/2011 12:47 PM, Daniel P. Berrange wrote:
If a guest image is saved in compressed format, and the restore fails in some way after the intermediate process used to uncompress the image has been started, but before qemu has been started to hook up to the uncompressor, libvirt will endlessly wait for the uncompressor to finish, but it never will because it's still waiting to have something hooked up to drain its output.
The solution is to manually send a SIGTERM to the compressor process before calling waitpid on it (only if the restore has failed, of course). Are we leaking a file descriptor here then ? I would think it would get EPIPE or EIO or an end-of-file if QEMU didn't start up and automatically exit. That we need to kill it seems odd (though a worthy extra measure once we're verified
On Tue, Jan 25, 2011 at 04:24:20AM -0500, Laine Stump wrote: that all FDs are closed properly).
Good point. I guess the proper thing to do is first close the fd that's pointing to the pipe, then do the kill (which will probably be unnecessary, but as you say a worthy extra measure). Do you think it's necessary to do anything this complicated? kill(pid, SIGTERM); (usleep or maybe a waitpid(pid, &childstat, WNOHANG) ?) if (kill(pid, 0) == 0) kill(pid, SIGKILL); Or is the single SIGTERM sufficient?

On Tue, Jan 25, 2011 at 03:51:06PM -0500, Laine Stump wrote:
On 01/25/2011 12:47 PM, Daniel P. Berrange wrote:
If a guest image is saved in compressed format, and the restore fails in some way after the intermediate process used to uncompress the image has been started, but before qemu has been started to hook up to the uncompressor, libvirt will endlessly wait for the uncompressor to finish, but it never will because it's still waiting to have something hooked up to drain its output.
The solution is to manually send a SIGTERM to the compressor process before calling waitpid on it (only if the restore has failed, of course). Are we leaking a file descriptor here then ? I would think it would get EPIPE or EIO or an end-of-file if QEMU didn't start up and automatically exit. That we need to kill it seems odd (though a worthy extra measure once we're verified
On Tue, Jan 25, 2011 at 04:24:20AM -0500, Laine Stump wrote: that all FDs are closed properly).
Good point. I guess the proper thing to do is first close the fd that's pointing to the pipe, then do the kill (which will probably be unnecessary, but as you say a worthy extra measure).
Do you think it's necessary to do anything this complicated?
kill(pid, SIGTERM); (usleep or maybe a waitpid(pid, &childstat, WNOHANG) ?) if (kill(pid, 0) == 0) kill(pid, SIGKILL);
Or is the single SIGTERM sufficient?
I reckon closing the FD + SIGTERM is sufficient. GZip isn't sufficiently complicated that it will hang needing SIGKILL Daniel

On 01/26/2011 07:12 AM, Daniel P. Berrange wrote:
I reckon closing the FD + SIGTERM is sufficient. GZip isn't sufficiently complicated that it will hang needing SIGKILL
How about this? I do notice that these two fd's end up getting closed further down anyway (even if we've failed, we still waitpid, close the intermediatefd, then call qemudDomainSaveImageClose() to close fd. So it's really a matter of timing - is it better to close the fd's earlier and give gzip a chance to die a more natural death, or is it okay to just give it one to the head, and clean up the fd's later? --- src/qemu/qemu_driver.c | 9 +++++++++ 1 files changed, 9 insertions(+), 0 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index b6a5cd6..23e0db0 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -5770,6 +5770,15 @@ qemudDomainSaveImageStartVM(virConnectPtr conn, VIR_VM_OP_RESTORE); if (intermediate_pid != -1) { + if (ret < 0) { + /* if there was an error setting up qemu, the intermediate process will + * wait forever to write to stdout, so we must manually kill it. + */ + VIR_FORCE_CLOSE(intermediatefd); + VIR_FORCE_CLOSE(fd); + kill(intermediate_pid, SIGTERM); + } + /* Wait for intermediate process to exit */ while (waitpid(intermediate_pid, &childstat, 0) == -1 && errno == EINTR) { -- 1.7.3.4

On Wed, Jan 26, 2011 at 09:00:14AM -0500, Laine Stump wrote:
On 01/26/2011 07:12 AM, Daniel P. Berrange wrote:
I reckon closing the FD + SIGTERM is sufficient. GZip isn't sufficiently complicated that it will hang needing SIGKILL
How about this?
I do notice that these two fd's end up getting closed further down anyway (even if we've failed, we still waitpid, close the intermediatefd, then call qemudDomainSaveImageClose() to close fd. So it's really a matter of timing - is it better to close the fd's earlier and give gzip a chance to die a more natural death, or is it okay to just give it one to the head, and clean up the fd's later?
--- src/qemu/qemu_driver.c | 9 +++++++++ 1 files changed, 9 insertions(+), 0 deletions(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index b6a5cd6..23e0db0 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -5770,6 +5770,15 @@ qemudDomainSaveImageStartVM(virConnectPtr conn, VIR_VM_OP_RESTORE);
if (intermediate_pid != -1) { + if (ret < 0) { + /* if there was an error setting up qemu, the intermediate process will + * wait forever to write to stdout, so we must manually kill it. + */ + VIR_FORCE_CLOSE(intermediatefd); + VIR_FORCE_CLOSE(fd); + kill(intermediate_pid, SIGTERM); + } +
ACK Daniel
participants (2)
-
Daniel P. Berrange
-
Laine Stump