[libvirt] [PATCH 0/5] Yet another qemu namespace fixes

*** BLURB HERE *** Michal Privoznik (5): lxc: Move lxcContainerAvailable to virprocess lxc_container: Drop userns_supported util: Introduce virFileMoveMount qemu: Use namespaces iff available on the host kernel qemuDomainCreateDevice: Canonicalize paths src/libvirt_private.syms | 2 ++ src/lxc/lxc_container.c | 54 ++--------------------------- src/lxc/lxc_container.h | 2 -- src/lxc/lxc_driver.c | 7 ++-- src/qemu/qemu_conf.c | 6 +++- src/qemu/qemu_domain.c | 90 ++++++++++++++++++------------------------------ src/util/virfile.c | 28 +++++++++++++++ src/util/virfile.h | 3 ++ src/util/virprocess.c | 72 ++++++++++++++++++++++++++++++++++++++ src/util/virprocess.h | 10 ++++++ 10 files changed, 162 insertions(+), 112 deletions(-) -- 2.11.0

Other drivers (like qemu) would like to know if the namespaces are available therefore it makes sense to move this function to a shared module. At the same time, this function had some default namespaces that are checked with every call. It is not necessary - let callers pass just those namespaces they are interested in. With the move the function is renamed to virProcessNamespaceAvailable. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/libvirt_private.syms | 1 + src/lxc/lxc_container.c | 44 +---------------------------- src/lxc/lxc_container.h | 2 -- src/lxc/lxc_driver.c | 7 +++-- src/util/virprocess.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++ src/util/virprocess.h | 10 +++++++ 6 files changed, 89 insertions(+), 47 deletions(-) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 9c74d35c4..d02d23b35 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -2275,6 +2275,7 @@ virProcessGetPids; virProcessGetStartTime; virProcessKill; virProcessKillPainfully; +virProcessNamespaceAvailable; virProcessRunInMountNamespace; virProcessSchedPolicyTypeFromString; virProcessSchedPolicyTypeToString; diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c index 32c0c3a4a..e5619b168 100644 --- a/src/lxc/lxc_container.c +++ b/src/lxc/lxc_container.c @@ -27,7 +27,6 @@ #include <config.h> #include <fcntl.h> -#include <sched.h> #include <limits.h> #include <stdlib.h> #include <stdio.h> @@ -2265,7 +2264,7 @@ static int lxcContainerChild(void *data) static int userns_supported(void) { - return lxcContainerAvailable(LXC_CONTAINER_FEATURE_USER) == 0; + return virProcessNamespaceAvailable(VIR_PROCESS_NAMESPACE_USER) == 0; } static int userns_required(virDomainDefPtr def) @@ -2399,47 +2398,6 @@ int lxcContainerStart(virDomainDefPtr def, return pid; } -ATTRIBUTE_NORETURN static int -lxcContainerDummyChild(void *argv ATTRIBUTE_UNUSED) -{ - _exit(0); -} - -int lxcContainerAvailable(int features) -{ - int flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS| - CLONE_NEWIPC|SIGCHLD; - int cpid; - char *childStack; - char *stack; - int stacksize = getpagesize() * 4; - - if (features & LXC_CONTAINER_FEATURE_USER) - flags |= CLONE_NEWUSER; - - if (features & LXC_CONTAINER_FEATURE_NET) - flags |= CLONE_NEWNET; - - if (VIR_ALLOC_N(stack, stacksize) < 0) - return -1; - - childStack = stack + stacksize; - - cpid = clone(lxcContainerDummyChild, childStack, flags, NULL); - VIR_FREE(stack); - if (cpid < 0) { - char ebuf[1024] ATTRIBUTE_UNUSED; - VIR_DEBUG("clone call returned %s, container support is not enabled", - virStrerror(errno, ebuf, sizeof(ebuf))); - return -1; - } else if (virProcessWait(cpid, NULL, false) < 0) { - return -1; - } - - VIR_DEBUG("container support is enabled"); - return 0; -} - int lxcContainerChown(virDomainDefPtr def, const char *path) { uid_t uid; diff --git a/src/lxc/lxc_container.h b/src/lxc/lxc_container.h index 33eaab49c..fbdfc998a 100644 --- a/src/lxc/lxc_container.h +++ b/src/lxc/lxc_container.h @@ -65,8 +65,6 @@ int lxcContainerStart(virDomainDefPtr def, size_t nttyPaths, char **ttyPaths); -int lxcContainerAvailable(int features); - int lxcContainerSetupHostdevCapsMakePath(const char *dev); virArch lxcContainerGetAlt32bitArch(virArch arch); diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c index 04a4b8c2a..a2c1052c6 100644 --- a/src/lxc/lxc_driver.c +++ b/src/lxc/lxc_driver.c @@ -1575,7 +1575,7 @@ static int lxcCheckNetNsSupport(void) if (virRun(argv, &ip_rc) < 0 || ip_rc == 255) return 0; - if (lxcContainerAvailable(LXC_CONTAINER_FEATURE_NET) < 0) + if (virProcessNamespaceAvailable(VIR_PROCESS_NAMESPACE_NET) < 0) return 0; return 1; @@ -1633,7 +1633,10 @@ static int lxcStateInitialize(bool privileged, } /* Check that this is a container enabled kernel */ - if (lxcContainerAvailable(0) < 0) { + if (virProcessNamespaceAvailable(VIR_PROCESS_NAMESPACE_MNT | + VIR_PROCESS_NAMESPACE_PID | + VIR_PROCESS_NAMESPACE_UTS | + VIR_PROCESS_NAMESPACE_IPC) < 0) { VIR_INFO("LXC support not available in this kernel, disabling driver"); return 0; } diff --git a/src/util/virprocess.c b/src/util/virprocess.c index 1ebe863fb..f5c7ebb96 100644 --- a/src/util/virprocess.c +++ b/src/util/virprocess.c @@ -1183,6 +1183,78 @@ virProcessSetupPrivateMountNS(void) } #endif /* !defined(HAVE_SYS_MOUNT_H) || !defined(HAVE_UNSHARE) */ +#if defined(__linux__) +ATTRIBUTE_NORETURN static int +virProcessDummyChild(void *argv ATTRIBUTE_UNUSED) +{ + _exit(0); +} + +/** + * virProcessNamespaceAvailable: + * @ns: what namespaces to check (bitwise-OR of virProcessNamespaceFlags) + * + * Check if given list of namespaces (@ns) is available. + * If not, appropriate error message is produced. + * + * Returns: 0 on success (all the namespaces from @flags are available), + * -1 on error (with error message reported). + */ +int +virProcessNamespaceAvailable(unsigned int ns) +{ + int flags = 0; + int cpid; + char *childStack; + char *stack; + int stacksize = getpagesize() * 4; + + if (ns & VIR_PROCESS_NAMESPACE_MNT) + flags |= CLONE_NEWNS; + if (ns & VIR_PROCESS_NAMESPACE_IPC) + flags |= CLONE_NEWIPC; + if (ns & VIR_PROCESS_NAMESPACE_NET) + flags |= CLONE_NEWNET; + if (ns & VIR_PROCESS_NAMESPACE_PID) + flags |= CLONE_NEWPID; + if (ns & VIR_PROCESS_NAMESPACE_USER) + flags |= CLONE_NEWUSER; + if (ns & VIR_PROCESS_NAMESPACE_UTS) + flags |= CLONE_NEWUTS; + + /* Signal parent as soon as the child dies. RIP. */ + flags |= SIGCHLD; + + if (VIR_ALLOC_N(stack, stacksize) < 0) + return -1; + + childStack = stack + stacksize; + + cpid = clone(virProcessDummyChild, childStack, flags, NULL); + VIR_FREE(stack); + if (cpid < 0) { + char ebuf[1024] ATTRIBUTE_UNUSED; + VIR_DEBUG("clone call returned %s, container support is not enabled", + virStrerror(errno, ebuf, sizeof(ebuf))); + return -1; + } else if (virProcessWait(cpid, NULL, false) < 0) { + return -1; + } + + VIR_DEBUG("All namespaces (%x) are enabled", ns); + return 0; +} + +#else /* !defined(__linux__) */ + +int +virProcessNamespaceAvailable(unsigned int ns ATTRIBUTE_UNUSED) +{ + virReportSystemError(ENOSYS, "%s", + _("Namespaces are not supported on this platform.")); + return -1; +} +#endif /* !defined(__linux__) */ /** * virProcessExitWithStatus: diff --git a/src/util/virprocess.h b/src/util/virprocess.h index c76a1fbc5..3c5a88277 100644 --- a/src/util/virprocess.h +++ b/src/util/virprocess.h @@ -95,5 +95,15 @@ int virProcessSetupPrivateMountNS(void); int virProcessSetScheduler(pid_t pid, virProcessSchedPolicy policy, int priority); +typedef enum { + VIR_PROCESS_NAMESPACE_MNT = (1 << 1), + VIR_PROCESS_NAMESPACE_IPC = (1 << 2), + VIR_PROCESS_NAMESPACE_NET = (1 << 3), + VIR_PROCESS_NAMESPACE_PID = (1 << 4), + VIR_PROCESS_NAMESPACE_USER = (1 << 5), + VIR_PROCESS_NAMESPACE_UTS = (1 << 6), +} virProcessNamespaceFlags; + +int virProcessNamespaceAvailable(unsigned int ns); #endif /* __VIR_PROCESS_H__ */ -- 2.11.0

On Wed, Jan 11, 2017 at 05:43:12PM +0100, Michal Privoznik wrote:
Other drivers (like qemu) would like to know if the namespaces are available therefore it makes sense to move this function to a shared module.
At the same time, this function had some default namespaces that are checked with every call. It is not necessary - let callers pass just those namespaces they are interested in.
With the move the function is renamed to virProcessNamespaceAvailable.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/libvirt_private.syms | 1 + src/lxc/lxc_container.c | 44 +---------------------------- src/lxc/lxc_container.h | 2 -- src/lxc/lxc_driver.c | 7 +++-- src/util/virprocess.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++ src/util/virprocess.h | 10 +++++++ 6 files changed, 89 insertions(+), 47 deletions(-)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 9c74d35c4..d02d23b35 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -2275,6 +2275,7 @@ virProcessGetPids; virProcessGetStartTime; virProcessKill; virProcessKillPainfully; +virProcessNamespaceAvailable; virProcessRunInMountNamespace; virProcessSchedPolicyTypeFromString; virProcessSchedPolicyTypeToString; diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c index 32c0c3a4a..e5619b168 100644 --- a/src/lxc/lxc_container.c +++ b/src/lxc/lxc_container.c @@ -27,7 +27,6 @@ #include <config.h>
#include <fcntl.h> -#include <sched.h> #include <limits.h> #include <stdlib.h> #include <stdio.h> @@ -2265,7 +2264,7 @@ static int lxcContainerChild(void *data)
static int userns_supported(void) { - return lxcContainerAvailable(LXC_CONTAINER_FEATURE_USER) == 0; + return virProcessNamespaceAvailable(VIR_PROCESS_NAMESPACE_USER) == 0; }
static int userns_required(virDomainDefPtr def) @@ -2399,47 +2398,6 @@ int lxcContainerStart(virDomainDefPtr def, return pid; }
-ATTRIBUTE_NORETURN static int -lxcContainerDummyChild(void *argv ATTRIBUTE_UNUSED) -{ - _exit(0); -} - -int lxcContainerAvailable(int features) -{ - int flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS| - CLONE_NEWIPC|SIGCHLD; - int cpid; - char *childStack; - char *stack; - int stacksize = getpagesize() * 4; - - if (features & LXC_CONTAINER_FEATURE_USER) - flags |= CLONE_NEWUSER; - - if (features & LXC_CONTAINER_FEATURE_NET) - flags |= CLONE_NEWNET;
These two constants need to be dropped from lxc_container.h now too. ACK if that's fixed. Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://entangle-photo.org -o- http://search.cpan.org/~danberr/ :|

This is unnecessary wrapper around virProcessNamespaceAvailable(). Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/lxc/lxc_container.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c index e5619b168..601b9b035 100644 --- a/src/lxc/lxc_container.c +++ b/src/lxc/lxc_container.c @@ -2262,11 +2262,6 @@ static int lxcContainerChild(void *data) return ret; } -static int userns_supported(void) -{ - return virProcessNamespaceAvailable(VIR_PROCESS_NAMESPACE_USER) == 0; -} - static int userns_required(virDomainDefPtr def) { return def->idmap.uidmap && def->idmap.gidmap; @@ -2346,15 +2341,14 @@ int lxcContainerStart(virDomainDefPtr def, cflags = CLONE_NEWPID|CLONE_NEWNS|SIGCHLD; if (userns_required(def)) { - if (userns_supported()) { - VIR_DEBUG("Enable user namespace"); - cflags |= CLONE_NEWUSER; - } else { + if (virProcessNamespaceAvailable(VIR_PROCESS_NAMESPACE_USER) < 0) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", _("Kernel doesn't support user namespace")); VIR_FREE(stack); return -1; } + VIR_DEBUG("Enable user namespace"); + cflags |= CLONE_NEWUSER; } if (!nsInheritFDs || nsInheritFDs[VIR_LXC_DOMAIN_NAMESPACE_SHARENET] == -1) { if (lxcNeedNetworkNamespace(def)) { -- 2.11.0

On Wed, Jan 11, 2017 at 05:43:13PM +0100, Michal Privoznik wrote:
This is unnecessary wrapper around virProcessNamespaceAvailable().
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/lxc/lxc_container.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-)
ACK Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://entangle-photo.org -o- http://search.cpan.org/~danberr/ :|

This is a simple wrapper over mount(). However, not every system out there is capable of moving a mount point. Therefore, instead of having to deal with this fact in all the places of our code we can have a simple wrapper and deal with this fact at just one place. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/libvirt_private.syms | 1 + src/qemu/qemu_domain.c | 22 +++------------------- src/util/virfile.c | 28 ++++++++++++++++++++++++++++ src/util/virfile.h | 3 +++ 4 files changed, 35 insertions(+), 19 deletions(-) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index d02d23b35..70ed87b95 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1605,6 +1605,7 @@ virFileMakeParentPath; virFileMakePath; virFileMakePathWithMode; virFileMatchesNameSuffix; +virFileMoveMount; virFileNBDDeviceAssociate; virFileOpenAs; virFileOpenTty; diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 473d0c1a2..8602f01c7 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -7332,7 +7332,6 @@ qemuDomainBuildNamespace(virQEMUDriverPtr driver, virDomainObjPtr vm) { virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); - const unsigned long mount_flags = MS_MOVE; char *devPath = NULL; char **devMountsPath = NULL, **devMountsSavePath = NULL; size_t ndevMountsPath = 0, i; @@ -7376,13 +7375,8 @@ qemuDomainBuildNamespace(virQEMUDriverPtr driver, goto cleanup; } - if (mount(devMountsPath[i], devMountsSavePath[i], - NULL, mount_flags, NULL) < 0) { - virReportSystemError(errno, - _("Unable to move %s mount"), - devMountsPath[i]); + if (virFileMoveMount(devMountsPath[i], devMountsSavePath[i]) < 0) goto cleanup; - } } if (qemuDomainSetupAllDisks(driver, vm, devPath) < 0) @@ -7403,12 +7397,8 @@ qemuDomainBuildNamespace(virQEMUDriverPtr driver, if (qemuDomainSetupAllRNGs(driver, vm, devPath) < 0) goto cleanup; - if (mount(devPath, "/dev", NULL, mount_flags, NULL) < 0) { - virReportSystemError(errno, - _("Failed to mount %s on /dev"), - devPath); + if (virFileMoveMount(devPath, "/dev") < 0) goto cleanup; - } for (i = 0; i < ndevMountsPath; i++) { if (devMountsSavePath[i] == devPath) @@ -7420,14 +7410,8 @@ qemuDomainBuildNamespace(virQEMUDriverPtr driver, goto cleanup; } - if (mount(devMountsSavePath[i], devMountsPath[i], - NULL, mount_flags, NULL) < 0) { - virReportSystemError(errno, - _("Failed to mount %s on %s"), - devMountsSavePath[i], - devMountsPath[i]); + if (virFileMoveMount(devMountsSavePath[i], devMountsPath[i]) < 0) goto cleanup; - } } ret = 0; diff --git a/src/util/virfile.c b/src/util/virfile.c index 99157be16..bf8099e34 100644 --- a/src/util/virfile.c +++ b/src/util/virfile.c @@ -3610,6 +3610,24 @@ virFileBindMountDevice(const char *src, return 0; } + +int +virFileMoveMount(const char *src, + const char *dst) +{ + const unsigned long mount_flags = MS_MOVE; + + if (mount(src, dst, NULL, mount_flags, NULL) < 0) { + virReportSystemError(errno, + _("Unable to move %s mount to %s"), + src, dst); + return -1; + } + + return 0; +} + + #else /* !defined(__linux__) || !defined(HAVE_SYS_MOUNT_H) */ int @@ -3630,6 +3648,16 @@ virFileBindMountDevice(const char *src ATTRIBUTE_UNUSED, _("mount is not supported on this platform.")); return -1; } + + +int +virFileMoveMount(const char *src ATTRIBUTE_UNUSED, + const char *dst ATTRIBUTE_UNUSED) +{ + virReportSystemError(ENOSYS, "%s", + _("mount move is not supported on this platform.")); + return -1; +} #endif /* !defined(__linux__) || !defined(HAVE_SYS_MOUNT_H) */ diff --git a/src/util/virfile.h b/src/util/virfile.h index 571e5bdc8..0343acd5b 100644 --- a/src/util/virfile.h +++ b/src/util/virfile.h @@ -318,6 +318,9 @@ int virFileSetupDev(const char *path, int virFileBindMountDevice(const char *src, const char *dst); +int virFileMoveMount(const char *src, + const char *dst); + int virFileGetACLs(const char *file, void **acl); -- 2.11.0

On Wed, Jan 11, 2017 at 05:43:14PM +0100, Michal Privoznik wrote:
This is a simple wrapper over mount(). However, not every system out there is capable of moving a mount point. Therefore, instead of having to deal with this fact in all the places of our code we can have a simple wrapper and deal with this fact at just one place.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/libvirt_private.syms | 1 + src/qemu/qemu_domain.c | 22 +++------------------- src/util/virfile.c | 28 ++++++++++++++++++++++++++++ src/util/virfile.h | 3 +++ 4 files changed, 35 insertions(+), 19 deletions(-)
ACK Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://entangle-photo.org -o- http://search.cpan.org/~danberr/ :|

On Wed, Jan 11, Michal Privoznik wrote:
This is a simple wrapper over mount(). However, not every system out there is capable of moving a mount point. Therefore, instead of having to deal with this fact in all the places of our code we can have a simple wrapper and deal with this fact at just one place.
+++ b/src/util/virfile.c
+ const unsigned long mount_flags = MS_MOVE;
This fails to compile on systems without MS_MOVE, like openSUSE 11.4. configure checks all sorts of things, perhaps it should detect the presence of MS_MOVE as well and do the appropriate action. [ 306s] util/virfile.c:3618:39: error: 'MS_MOVE' undeclared (first use in this function) Olaf

On 01/30/2017 10:14 PM, Olaf Hering wrote:
On Wed, Jan 11, Michal Privoznik wrote:
This is a simple wrapper over mount(). However, not every system out there is capable of moving a mount point. Therefore, instead of having to deal with this fact in all the places of our code we can have a simple wrapper and deal with this fact at just one place.
+++ b/src/util/virfile.c
+ const unsigned long mount_flags = MS_MOVE;
This fails to compile on systems without MS_MOVE, like openSUSE 11.4. configure checks all sorts of things, perhaps it should detect the presence of MS_MOVE as well and do the appropriate action.
[ 306s] util/virfile.c:3618:39: error: 'MS_MOVE' undeclared (first use in this function)
Olaf
Ah, have you tried with the latest git HEAD? BTW: looking at wiki, isn't openSUSE 11.4 out of life? It has kernel 2.6.34 which is quite old. But looking into kernel's git log the symbol was introduced in the very first commit (import from CVS, I did not bother to search that can of worms). Anyway, this is 2.6.12 that we are talking about. I wonder whether SUSE is doing some magic over the linux header files that removes the symbol? Does the following command find anything? $ grep -r MS_MOVE /usr/include/ Michal

On Tue, Jan 31, Michal Privoznik wrote:
This fails to compile on systems without MS_MOVE, like openSUSE 11.4. $ grep -r MS_MOVE /usr/include/
At that point glibc did not provide MS_MOVE. /usr/include/sys/mount.h: MS_MOVE = 8192, /usr/include/sys/mount.h:#define MS_MOVE MS_MOVE /usr/include/linux/fs.h:#define MS_MOVE 8192 /WD20_11.4/usr/include/linux/fs.h:#define MS_MOVE 8192 /WD20_13.1/usr/include/sys/mount.h: MS_MOVE = 8192, /WD20_13.1/usr/include/sys/mount.h:#define MS_MOVE MS_MOVE /WD20_13.1/usr/include/linux/fs.h:#define MS_MOVE 8192 So what should be done? Olaf

On 01/31/2017 11:33 AM, Olaf Hering wrote:
On Tue, Jan 31, Michal Privoznik wrote:
This fails to compile on systems without MS_MOVE, like openSUSE 11.4. $ grep -r MS_MOVE /usr/include/
At that point glibc did not provide MS_MOVE.
/usr/include/sys/mount.h: MS_MOVE = 8192, /usr/include/sys/mount.h:#define MS_MOVE MS_MOVE /usr/include/linux/fs.h:#define MS_MOVE 8192 /WD20_11.4/usr/include/linux/fs.h:#define MS_MOVE 8192 /WD20_13.1/usr/include/sys/mount.h: MS_MOVE = 8192, /WD20_13.1/usr/include/sys/mount.h:#define MS_MOVE MS_MOVE /WD20_13.1/usr/include/linux/fs.h:#define MS_MOVE 8192
So what should be done?
so sys/mount.h is present and does provide the symbol. And yet, compiler does not see it. Anything suspicious on sys/mount.h that would make the compiler not see it (e.g. #ifdef). Meanwhile, does this help? diff --git i/src/util/virfile.c w/src/util/virfile.c index bf8099e34..a900bd405 100644 --- i/src/util/virfile.c +++ w/src/util/virfile.c @@ -34,6 +34,9 @@ #include <sys/wait.h> #if defined(HAVE_SYS_MOUNT_H) # include <sys/mount.h> +# if defined(__linux__) +# include <linux/fs.h> +# endif #endif #include <unistd.h> #include <dirent.h> Michal

On Tue, Jan 31, Michal Privoznik wrote:
On 01/31/2017 11:33 AM, Olaf Hering wrote:
/WD20_11.4/usr/include/linux/fs.h:#define MS_MOVE 8192 so sys/mount.h is present and does provide the symbol. And yet, compiler
No, 11.4 had it only in linux/fs.h. If libvirt provides a compat layer for old userland a #ifndef MS_MOVE ; #define MS_MOVE 8192 may do the trick. Olaf

On 01/31/2017 01:29 PM, Olaf Hering wrote:
On Tue, Jan 31, Michal Privoznik wrote:
On 01/31/2017 11:33 AM, Olaf Hering wrote:
/WD20_11.4/usr/include/linux/fs.h:#define MS_MOVE 8192 so sys/mount.h is present and does provide the symbol. And yet, compiler
No, 11.4 had it only in linux/fs.h.
That's why I thought my patch was going to help. Did it?
If libvirt provides a compat layer for old userland a #ifndef MS_MOVE ; #define MS_MOVE 8192 may do the trick.
No, we should not do this. We should include a header file that defines it. Michal

On Tue, Jan 31, Michal Privoznik wrote:
That's why I thought my patch was going to help. Did it?
I have not tried yet.
If libvirt provides a compat layer for old userland a #ifndef MS_MOVE ; #define MS_MOVE 8192 may do the trick. No, we should not do this. We should include a header file that defines it.
Usually linux/* and asm/* is not safe to include unconditionally. Make a copy of required interfaces, thats the common consense AFAIK. Olaf

So far the namespaces were turned on by default unconditionally. For all non-Linux platforms we provided stub functions that just ignored whatever namespaces setting there was in qemu.conf and returned 0 to indicate success. Moreover, we didn't really check if namespaces are available on the host kernel. This is suboptimal as we might have ignored user setting. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_conf.c | 6 +++++- src/qemu/qemu_domain.c | 35 ++++++++++------------------------- 2 files changed, 15 insertions(+), 26 deletions(-) diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c index 86170fb7a..6613d59bc 100644 --- a/src/qemu/qemu_conf.c +++ b/src/qemu/qemu_conf.c @@ -317,8 +317,12 @@ virQEMUDriverConfigPtr virQEMUDriverConfigNew(bool privileged) if (!(cfg->namespaces = virBitmapNew(QEMU_DOMAIN_NS_LAST))) goto error; - if (virBitmapSetBit(cfg->namespaces, QEMU_DOMAIN_NS_MOUNT) < 0) +#if defined(__linux__) + if (privileged && + virProcessNamespaceAvailable(VIR_PROCESS_NAMESPACE_MNT) == 0 && + virBitmapSetBit(cfg->namespaces, QEMU_DOMAIN_NS_MOUNT) < 0) goto error; +#endif /* defined(__linux__) */ #ifdef DEFAULT_LOADER_NVRAM if (virFirmwareParseList(DEFAULT_LOADER_NVRAM, diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 8602f01c7..6e6cb844a 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -6879,7 +6879,6 @@ qemuDomainGetHostdevPath(virDomainHostdevDefPtr dev, } -#if defined(__linux__) /** * qemuDomainGetPreservedMounts: * @@ -7432,12 +7431,20 @@ qemuDomainCreateNamespace(virQEMUDriverPtr driver, virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); int ret = -1; - if (!virBitmapIsBitSet(cfg->namespaces, QEMU_DOMAIN_NS_MOUNT) || - !virQEMUDriverIsPrivileged(driver)) { + if (!virBitmapIsBitSet(cfg->namespaces, QEMU_DOMAIN_NS_MOUNT)) { ret = 0; goto cleanup; } + if (!virQEMUDriverIsPrivileged(driver)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("cannot use namespaces in session mode")); + goto cleanup; + } + + if (virProcessNamespaceAvailable(VIR_PROCESS_NAMESPACE_MNT) < 0) + goto cleanup; + if (qemuDomainEnableNamespace(vm, QEMU_DOMAIN_NS_MOUNT) < 0) goto cleanup; @@ -7447,28 +7454,6 @@ qemuDomainCreateNamespace(virQEMUDriverPtr driver, return ret; } -#else /* !defined(__linux__) */ - -int -qemuDomainBuildNamespace(virQEMUDriverPtr driver ATTRIBUTE_UNUSED, - virDomainObjPtr vm ATTRIBUTE_UNUSED) -{ - /* Namespaces are Linux specific. On other platforms just - * carry on with the old behaviour. */ - return 0; -} - - -int -qemuDomainCreateNamespace(virQEMUDriverPtr driver ATTRIBUTE_UNUSED, - virDomainObjPtr vm ATTRIBUTE_UNUSED) -{ - /* Namespaces are Linux specific. On other platforms just - * carry on with the old behaviour. */ - return 0; -} -#endif /* !defined(__linux__) */ - struct qemuDomainAttachDeviceMknodData { virQEMUDriverPtr driver; -- 2.11.0

On Wed, Jan 11, 2017 at 05:43:15PM +0100, Michal Privoznik wrote:
So far the namespaces were turned on by default unconditionally. For all non-Linux platforms we provided stub functions that just ignored whatever namespaces setting there was in qemu.conf and returned 0 to indicate success. Moreover, we didn't really check if namespaces are available on the host kernel.
This is suboptimal as we might have ignored user setting.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_conf.c | 6 +++++- src/qemu/qemu_domain.c | 35 ++++++++++------------------------- 2 files changed, 15 insertions(+), 26 deletions(-)
ACK Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://entangle-photo.org -o- http://search.cpan.org/~danberr/ :|

So far the decision whether /dev/* entry is created in the qemu namespace is really simple: does the path starts with "/dev/"? This can be easily fooled by providing path like the following (for any considered device like disk, rng, chardev, ..): /dev/../var/lib/libvirt/images/disk.qcow2 Therefore, before making the decision the path should be canonicalized. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_domain.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 6e6cb844a..1399dee0d 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -6955,28 +6955,38 @@ qemuDomainCreateDevice(const char *device, bool allow_noent) { char *devicePath = NULL; + char *canonDevicePath = NULL; struct stat sb; int ret = -1; - if (!STRPREFIX(device, DEVPREFIX)) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("invalid device: %s"), - device); + if (virFileResolveAllLinks(device, &canonDevicePath) < 0) { + if (errno == ENOENT && allow_noent) { + /* Ignore non-existent device. */ + ret = 0; + goto cleanup; + } + + virReportError(errno, _("Unable to canonicalize %s"), device); + goto cleanup; + } + + if (!STRPREFIX(canonDevicePath, DEVPREFIX)) { + ret = 0; goto cleanup; } if (virAsprintf(&devicePath, "%s/%s", - path, device + strlen(DEVPREFIX)) < 0) + path, canonDevicePath + strlen(DEVPREFIX)) < 0) goto cleanup; - if (stat(device, &sb) < 0) { + if (stat(canonDevicePath, &sb) < 0) { if (errno == ENOENT && allow_noent) { /* Ignore non-existent device. */ ret = 0; goto cleanup; } - virReportSystemError(errno, _("Unable to stat %s"), device); + virReportSystemError(errno, _("Unable to stat %s"), canonDevicePath); goto cleanup; } @@ -7005,7 +7015,7 @@ qemuDomainCreateDevice(const char *device, goto cleanup; } - if (virFileCopyACLs(device, devicePath) < 0 && + if (virFileCopyACLs(canonDevicePath, devicePath) < 0 && errno != ENOTSUP) { virReportSystemError(errno, _("Failed to copy ACLs on device %s"), @@ -7015,6 +7025,7 @@ qemuDomainCreateDevice(const char *device, ret = 0; cleanup: + VIR_FREE(canonDevicePath); VIR_FREE(devicePath); return ret; } @@ -7096,8 +7107,7 @@ qemuDomainSetupDisk(virQEMUDriverPtr driver ATTRIBUTE_UNUSED, int ret = -1; for (next = disk->src; next; next = next->backingStore) { - if (!next->path || !virStorageSourceIsLocalStorage(next) || - !STRPREFIX(next->path, DEVPREFIX)) { + if (!next->path || !virStorageSourceIsLocalStorage(next)) { /* Not creating device. Just continue. */ continue; } @@ -7717,8 +7727,7 @@ qemuDomainNamespaceSetupDisk(virQEMUDriverPtr driver, return 0; for (next = disk->src; next; next = next->backingStore) { - if (!next->path || !virStorageSourceIsBlockLocal(disk->src) || - !STRPREFIX(next->path, DEVPREFIX)) { + if (!next->path || !virStorageSourceIsBlockLocal(disk->src)) { /* Not creating device. Just continue. */ continue; } -- 2.11.0

On Wed, Jan 11, 2017 at 05:43:16PM +0100, Michal Privoznik wrote:
So far the decision whether /dev/* entry is created in the qemu namespace is really simple: does the path starts with "/dev/"? This can be easily fooled by providing path like the following (for any considered device like disk, rng, chardev, ..):
/dev/../var/lib/libvirt/images/disk.qcow2
Did you find someone/thing that was actually doing that ? ACK Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://entangle-photo.org -o- http://search.cpan.org/~danberr/ :|

On 01/11/2017 06:04 PM, Daniel P. Berrange wrote:
On Wed, Jan 11, 2017 at 05:43:16PM +0100, Michal Privoznik wrote:
So far the decision whether /dev/* entry is created in the qemu namespace is really simple: does the path starts with "/dev/"? This can be easily fooled by providing path like the following (for any considered device like disk, rng, chardev, ..):
/dev/../var/lib/libvirt/images/disk.qcow2
Did you find someone/thing that was actually doing that ?
No, but Martin asked me about that when talking about namespaces and I thought of trying that out. The domain startup did not fail, but only because of 3aae99fe71 which made mknod() not error out on EEXIST. Michal

On Wed, Jan 11, 2017 at 18:07:19 +0100, Michal Privoznik wrote:
On 01/11/2017 06:04 PM, Daniel P. Berrange wrote:
On Wed, Jan 11, 2017 at 05:43:16PM +0100, Michal Privoznik wrote:
So far the decision whether /dev/* entry is created in the qemu namespace is really simple: does the path starts with "/dev/"? This can be easily fooled by providing path like the following (for any considered device like disk, rng, chardev, ..):
/dev/../var/lib/libvirt/images/disk.qcow2
Did you find someone/thing that was actually doing that ?
No, but Martin asked me about that when talking about namespaces and I thought of trying that out. The domain startup did not fail, but only because of 3aae99fe71 which made mknod() not error out on EEXIST.
While this specific case may be rare, /some/path/uuid1/uuid2/uuid3 paths which (through several chained symlinks) actually end up being /dev/something are pretty common :-) Jirka
participants (4)
-
Daniel P. Berrange
-
Jiri Denemark
-
Michal Privoznik
-
Olaf Hering