[libvirt] [PATCH 1/2] Implementation deficiency in virInitctlSetRunLevel v4

Implement virProcessRunInMountNamespace, which runs callback of type virProcessNamespaceCallback in a container namespace. Hope it'll nail it this time. --- src/libvirt_private.syms | 1 + src/util/virprocess.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++ src/util/virprocess.h | 6 +++++ 3 files changed, 70 insertions(+) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 2dbb8f8..e210fd0 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1646,6 +1646,7 @@ virProcessGetNamespaces; virProcessGetStartTime; virProcessKill; virProcessKillPainfully; +virProcessRunInMountNamespace; virProcessSetAffinity; virProcessSetMaxFiles; virProcessSetMaxMemLock; diff --git a/src/util/virprocess.c b/src/util/virprocess.c index 9fc3207..7bb494e 100644 --- a/src/util/virprocess.c +++ b/src/util/virprocess.c @@ -31,6 +31,7 @@ # include <sys/resource.h> #endif #include <sched.h> +#include <stdlib.h> #ifdef __FreeBSD__ # include <sys/param.h> @@ -847,3 +848,65 @@ int virProcessGetStartTime(pid_t pid, return 0; } #endif + +#ifdef HAVE_SETNS +int virProcessRunInMountNamespace(pid_t pid, + virProcessNamespaceCallback cb, + void *opaque) +{ + char* path = NULL; + int ret = -1; + int cpid = -1; + int status = -1; + int fd = -1; + + if (virAsprintf(&path, "/proc/%llu/ns/mnt", + (unsigned long long)pid) < 0) { + goto cleanup; + } + + if ((fd = open(path, O_RDONLY)) < 0) { + virReportSystemError(errno, "%s", + _("Kernel does not provide mount namespace")); + goto cleanup; + } + + switch (cpid = fork()) { + case 0: + if (setns(fd, 0) == -1) { + _exit(-1); + } + + ret = cb(pid, opaque); + _exit(ret); + break; + case -1: + virReportSystemError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Fork failed")); + goto cleanup; + default: + if (virProcessWait(cpid, &status) < 0 || status < 0) { + virReportSystemError(errno, + _("Callback failed with status %i"), + status); + ret = 1; + } else { + ret = 0; + } + } + +cleanup: + VIR_FREE(path); + VIR_FORCE_CLOSE(fd); + return ret; +} +#else +int virProcessRunInMountNamespace(pid_t pid ATTRIBUTE_UNUSED, + virProcessNamespaceCallback cb ATTRIBUTE_UNUSED, + void *opaque ATTRIBUTE_UNUSED) +{ + virReportSystemError(ENOSYS, "%s", + _("Mount namespaces are not available on this platform")); + return -1; +} +#endif diff --git a/src/util/virprocess.h b/src/util/virprocess.h index 9f77bc5..205abf7 100644 --- a/src/util/virprocess.h +++ b/src/util/virprocess.h @@ -60,4 +60,10 @@ int virProcessSetNamespaces(size_t nfdlist, int virProcessSetMaxMemLock(pid_t pid, unsigned long long bytes); int virProcessSetMaxProcesses(pid_t pid, unsigned int procs); int virProcessSetMaxFiles(pid_t pid, unsigned int files); + +typedef int (*virProcessNamespaceCallback)(pid_t pid, void *opaque); + +int virProcessRunInMountNamespace(pid_t pid, + virProcessNamespaceCallback cb, + void *opaque); #endif /* __VIR_PROCESS_H__ */ -- 1.7.10.4

On Fri, Dec 20, 2013 at 08:24:41PM +0400, Reco wrote:
Implement virProcessRunInMountNamespace, which runs callback of type virProcessNamespaceCallback in a container namespace.
Hope it'll nail it this time.
--- src/libvirt_private.syms | 1 + src/util/virprocess.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++ src/util/virprocess.h | 6 +++++ 3 files changed, 70 insertions(+)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 2dbb8f8..e210fd0 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1646,6 +1646,7 @@ virProcessGetNamespaces; virProcessGetStartTime; virProcessKill; virProcessKillPainfully; +virProcessRunInMountNamespace; virProcessSetAffinity; virProcessSetMaxFiles; virProcessSetMaxMemLock; diff --git a/src/util/virprocess.c b/src/util/virprocess.c index 9fc3207..7bb494e 100644 --- a/src/util/virprocess.c +++ b/src/util/virprocess.c @@ -31,6 +31,7 @@ # include <sys/resource.h> #endif #include <sched.h> +#include <stdlib.h>
#ifdef __FreeBSD__ # include <sys/param.h> @@ -847,3 +848,65 @@ int virProcessGetStartTime(pid_t pid, return 0; } #endif + +#ifdef HAVE_SETNS +int virProcessRunInMountNamespace(pid_t pid, + virProcessNamespaceCallback cb, + void *opaque) +{ + char* path = NULL; + int ret = -1; + int cpid = -1; + int status = -1; + int fd = -1; + + if (virAsprintf(&path, "/proc/%llu/ns/mnt", + (unsigned long long)pid) < 0) { + goto cleanup; + } + + if ((fd = open(path, O_RDONLY)) < 0) { + virReportSystemError(errno, "%s", + _("Kernel does not provide mount namespace")); + goto cleanup; + } + + switch (cpid = fork()) { + case 0: + if (setns(fd, 0) == -1) { + _exit(-1); + } + + ret = cb(pid, opaque); + _exit(ret); + break; + case -1: + virReportSystemError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Fork failed")); + goto cleanup; + default: + if (virProcessWait(cpid, &status) < 0 || status < 0) { + virReportSystemError(errno, + _("Callback failed with status %i"), + status); + ret = 1; + } else { + ret = 0; + } + } + +cleanup: + VIR_FREE(path); + VIR_FORCE_CLOSE(fd); + return ret; +} +#else +int virProcessRunInMountNamespace(pid_t pid ATTRIBUTE_UNUSED, + virProcessNamespaceCallback cb ATTRIBUTE_UNUSED, + void *opaque ATTRIBUTE_UNUSED) +{ + virReportSystemError(ENOSYS, "%s", + _("Mount namespaces are not available on this platform")); + return -1; +} +#endif diff --git a/src/util/virprocess.h b/src/util/virprocess.h index 9f77bc5..205abf7 100644 --- a/src/util/virprocess.h +++ b/src/util/virprocess.h @@ -60,4 +60,10 @@ int virProcessSetNamespaces(size_t nfdlist, int virProcessSetMaxMemLock(pid_t pid, unsigned long long bytes); int virProcessSetMaxProcesses(pid_t pid, unsigned int procs); int virProcessSetMaxFiles(pid_t pid, unsigned int files); + +typedef int (*virProcessNamespaceCallback)(pid_t pid, void *opaque); + +int virProcessRunInMountNamespace(pid_t pid, + virProcessNamespaceCallback cb, + void *opaque); #endif /* __VIR_PROCESS_H__ */
ACK Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On 12/20/2013 09:34 AM, Daniel P. Berrange wrote:
On Fri, Dec 20, 2013 at 08:24:41PM +0400, Reco wrote:
Implement virProcessRunInMountNamespace, which runs callback of type virProcessNamespaceCallback in a container namespace.
Hope it'll nail it this time.
Fails 'make syntax-check': prohibit_fork_wrappers src/util/virprocess.c:874: switch (cpid = fork()) { maint.mk: use virCommand for child processes make: *** [sc_prohibit_fork_wrappers] Error 1
+ + switch (cpid = fork()) { + case 0: + if (setns(fd, 0) == -1) { + _exit(-1);
exit(-1) is wrong; the user will see $? of 255 (not -1), and it is not our typical exit status for failure (where we normally want to be < 128 so that it is not confused with exit due to signal). Besides, I hate magic numbers; better would be _exit(EXIT_FAILURE).
+ default: + if (virProcessWait(cpid, &status) < 0 || status < 0) {
No need to pass &status to virProcessWait() if you are going to insist on 0 status anyways; better to pass NULL and let virProcessWait do the validation on your behalf.
+ virReportSystemError(errno, + _("Callback failed with status %i"), + status);
This overwrites the (better) error message that virProcessWait() already generated.
+ ret = 1;
This returns a number > 0 on failure, even though our normal conventions is to return a negative number on failure.
+++ b/src/util/virprocess.h @@ -60,4 +60,10 @@ int virProcessSetNamespaces(size_t nfdlist, int virProcessSetMaxMemLock(pid_t pid, unsigned long long bytes); int virProcessSetMaxProcesses(pid_t pid, unsigned int procs); int virProcessSetMaxFiles(pid_t pid, unsigned int files); + +typedef int (*virProcessNamespaceCallback)(pid_t pid, void *opaque);
Worth documenting that the return value of this callback is passed on to _exit() (and thus that errors should be EXIT_FAILURE rather than our typical -1 on failure). I'll repost a full v5 series with my proposed fixes to these issues in a separate thread, and wait for a review (since this fixes a CVE, it needs a second set of eyes). -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

On 12/20/2013 11:17 AM, Eric Blake wrote:
On 12/20/2013 09:34 AM, Daniel P. Berrange wrote:
On Fri, Dec 20, 2013 at 08:24:41PM +0400, Reco wrote:
Implement virProcessRunInMountNamespace, which runs callback of type virProcessNamespaceCallback in a container namespace.
Hope it'll nail it this time.
Fails 'make syntax-check':
prohibit_fork_wrappers src/util/virprocess.c:874: switch (cpid = fork()) { maint.mk: use virCommand for child processes make: *** [sc_prohibit_fork_wrappers] Error 1
+ + switch (cpid = fork()) {
Yuck. Rewriting this to use virFork() has exposed some nastiness in our existing library: 'virsh lxc-enter-namespace' and 'virt-login-shell' both use virFork() incorrectly, which is a sign that the interface itself is to blame. The fact that a child process must explicitly call _exit() if virFork() returns < 0 but set *pid to 0 is just nasty - it violates the normal expectation that a negative return has no more work to do. I still plan on posting a revised version of this patch, but it will be part of a bit bigger series that first tries to make virFork and virProcessWait friendlier. I spent some time learning about namespaces in the process - I can see why 'virsh lxc-enter-namespace' must fork twice (although setns() can join some namespaces on a per-thread basis, it cannot do so for the 'user namespace', so one fork is required to get to a single-threaded state since virsh is multithreaded; the second fork is required because setting the 'pid namespace' doesn't actually change the pid of the current process, it only designates that child processes will be in the new namespace). But why is 'virt-login-shell' double-forking? It is already single-threaded, and doesn't do anything after fork except wait on the child process, so a single fork after setting the 'pid namespace' ought to be sufficient.
+ case 0: + if (setns(fd, 0) == -1) { + _exit(-1);
exit(-1) is wrong; the user will see $? of 255 (not -1), and it is not our typical exit status for failure (where we normally want to be < 128 so that it is not confused with exit due to signal). Besides, I hate magic numbers; better would be _exit(EXIT_FAILURE).
Or maybe _exit(127), which is more idiomatic for failures when fork() succeeds but exec() is not reached (see posix_spawn() for example).
+ default: + if (virProcessWait(cpid, &status) < 0 || status < 0) {
No need to pass &status to virProcessWait() if you are going to insist on 0 status anyways; better to pass NULL and let virProcessWait do the validation on your behalf.
On the other hand, we may NEED to pass status on to the user, as I noticed in patch 2 that your callback function wants to distinguish between fatal errors and the simpler case of no initctl support; collapsing all non-zero status into failure loses our chance to report multiple bits of information.
I'll repost a full v5 series with my proposed fixes to these issues in a separate thread, and wait for a review (since this fixes a CVE, it needs a second set of eyes).
That, and we still need the fix for virDomainDeviceAttach using /dev only within the guest namespace. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

On 12/20/2013 09:24 AM, Reco wrote:
Implement virProcessRunInMountNamespace, which runs callback of type virProcessNamespaceCallback in a container namespace.
Hope it'll nail it this time.
This comment doesn't fit in the commit message; it's better to put review comments...
---
...here, after the '---' separator, so that 'git am' will strip the information that was useful to reviewers, but not to overall history. For the same reason, your patch should be titled: [PATCHv4 1/2] ... and not [PATCH 1/2] ... v4 since v4 means nothing in libvirt.git (where we don't see the earlier three versions), but only on the mailing list. Also, when sending a series, it's better to stick both 1/2 and 2/2 as in-reply-to a 0/2 cover letter. You can do all that with: git send-email -2 --cover-letter --subject-prefix=PATCHv4 At any rate, since Dan has ack'ed it, and it fixes a CVE (where we're still waiting for the number to be assigned, but the flaw is real), I'll go ahead and push this soon. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

Hi. On Fri, 20 Dec 2013 10:48:39 -0700 Eric Blake <eblake@redhat.com> wrote:
On 12/20/2013 09:24 AM, Reco wrote:
Implement virProcessRunInMountNamespace, which runs callback of type virProcessNamespaceCallback in a container namespace.
Hope it'll nail it this time.
This comment doesn't fit in the commit message; it's better to put review comments...
---
...here, after the '---' separator, so that 'git am' will strip the information that was useful to reviewers, but not to overall history. For the same reason, your patch should be titled:
[PATCHv4 1/2] ...
and not
[PATCH 1/2] ... v4
since v4 means nothing in libvirt.git (where we don't see the earlier three versions), but only on the mailing list.
Also, when sending a series, it's better to stick both 1/2 and 2/2 as in-reply-to a 0/2 cover letter.
You can do all that with: git send-email -2 --cover-letter --subject-prefix=PATCHv4
At any rate, since Dan has ack'ed it, and it fixes a CVE (where we're still waiting for the number to be assigned, but the flaw is real), I'll go ahead and push this soon.
Thank you for the advices, Eric, sorry I didn't followed'em to the letter. It wasn't intentional, I just don't have that much experience with git. It's good to know that these small patches benefited the Libvirt project. Reco

On 12/20/2013 10:53 AM, Reco wrote:
At any rate, since Dan has ack'ed it, and it fixes a CVE (where we're still waiting for the number to be assigned, but the flaw is real), I'll go ahead and push this soon.
Thank you for the advices, Eric, sorry I didn't followed'em to the letter. It wasn't intentional, I just don't have that much experience with git.
That's okay - you've already shown great initiative by reporting _and fixing_ a security bug. Getting all the details right comes with time and experience, but we all have to start somewhere. If you read HACKING, but found things confusing or lacking, let us know - we'd like to improve it for the next first-time contributor. If you didn't read HACKING, then that might explain why it took a few tries to come up to speed on our conventions.
It's good to know that these small patches benefited the Libvirt project.
Reco
You still haven't answered my query from an earlier version: we prefer to list a legal name in the authorship of git commits (after all, copyleft licenses work _because_ of copyright law, but the law prefers working with full names rather than nicknames). Is there something that I should list you as rather than just "Reco"? -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

You still haven't answered my query from an earlier version: we prefer to list a legal name in the authorship of git commits (after all, copyleft licenses work _because_ of copyright law, but the law prefers working with full names rather than nicknames). Is there something that I should list you as rather than just "Reco"?
As far as I'm concerned, this bug was fixed by (in alphabet order): Daniel P. Berrange <berrange@redhat.com> Eric Blake <eblake@redhat.com> Michal Privoznik <mprivozn@redhat.com> /me merely pushed buttons on the keyboard and spammed this maillist. Hopefully this eliminates current and future legal issues. And 'Reco' is the alias as good as any. Reco

On 12/20/2013 11:40 AM, Reco wrote:
You still haven't answered my query from an earlier version: we prefer to list a legal name in the authorship of git commits (after all, copyleft licenses work _because_ of copyright law, but the law prefers working with full names rather than nicknames). Is there something that I should list you as rather than just "Reco"?
As far as I'm concerned, this bug was fixed by (in alphabet order):
Daniel P. Berrange <berrange@redhat.com> Eric Blake <eblake@redhat.com> Michal Privoznik <mprivozn@redhat.com>
/me merely pushed buttons on the keyboard and spammed this maillist.
Hopefully this eliminates current and future legal issues.
And 'Reco' is the alias as good as any.
Fair enough. Then since I'm rewriting a good chunk of the patch anyways, I'll reassign authorship to myself and leave a line of credit to you - giving credit is much looser than claiming authorship. v5 coming up shortly, and I'll wait for your response on that thread to ensure that what I did was okay. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org
participants (3)
-
Daniel P. Berrange
-
Eric Blake
-
Reco