Normally when a process' uid is changed to non-0, all the capabilities
bits are cleared, even those explicitly set with calls to
capng_update()/capng_apply() made immediately before setuid. And
*after* the process' uid has been changed, it no longer has the
necessary privileges to add capabilities back to the process.
In order to set a non-0 uid while still maintaining any capabilities
bits, it is necessary to either call capng_change_id() (which
unfortunately doesn't currently call initgroups to setup auxiliary
group membership), or to perform the small amount of calisthenics
contained in the new utility function virSetUIDGIDWithCaps().
Another very important difference between the capabilities
setting/clearing in virSetUIDGIDWithCaps() and virCommand's
virSetCapabilities() (which it will replace in the next patch) is that
the new function properly clears the capabilities bounding set, so it
will not be possible for a child process to set any new
capabilities.
A short description of what is done by virSetUIDGIDWithCaps():
1) clear all capabilities then set all those desired by the caller (in
capBits) plus CAP_SETGID, CAP_SETUID, and CAP_SETPCAP (which is needed
to change the capabilities bounding set).
2) call prctl(), telling it that we want to maintain current
capabilities across an upcoming setuid().
3) switch to the new uid/gid
4) again call prctl(), telling it we will no longer want capabilities
maintained if this process does another setuid().
5) clear the capabilities that we added to allow us to
setuid/setgid/change the bounding set (unless they were also requested
by the caller via the virCommand API).
Because the modification/maintaining of capabilities is intermingled
with setting the uid, this is necessarily done in a single function,
rather than having two independent functions.
Note that, due to the way that effective capabilities are computed (at
time of execve) for a process that has uid != 0, the *file*
capabilities of the binary being executed must also have the desired
capabilities bit(s) set (see "man 7 capabilities"). This can be done
with the "filecap" command. (e.g. "filecap /usr/bin/qemu-kvm
sys_rawio").
---
Change from V1:
* properly cast when comparing gid/uid, and only short circuit for -1 (not 0)
* fix // style comments
* add ATTRIBUTE_UNUSED where appropriate for capBits argument.
src/libvirt_private.syms | 1 +
src/util/virutil.c | 111 +++++++++++++++++++++++++++++++++++++++++++++++
src/util/virutil.h | 1 +
3 files changed, 113 insertions(+)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index dcdcb67..d8d5877 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -1312,6 +1312,7 @@ virSetDeviceUnprivSGIO;
virSetInherit;
virSetNonBlock;
virSetUIDGID;
+virSetUIDGIDWithCaps;
virSkipSpaces;
virSkipSpacesAndBackslash;
virSkipSpacesBackwards;
diff --git a/src/util/virutil.c b/src/util/virutil.c
index 0d7db00..28fcc2f 100644
--- a/src/util/virutil.c
+++ b/src/util/virutil.c
@@ -60,6 +60,7 @@
#endif
#if WITH_CAPNG
# include <cap-ng.h>
+# include <sys/prctl.h>
#endif
#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R
# include <mntent.h>
@@ -2990,6 +2991,116 @@ virGetGroupName(gid_t gid ATTRIBUTE_UNUSED)
}
#endif /* HAVE_GETPWUID_R */
+#if WITH_CAPNG
+/* Set the real and effective uid and gid to the given values, while
+ * maintaining the capabilities indicated by bits in @capBits. return
+ * 0 on success, -1 on failure (the original system error remains in
+ * errno).
+ */
+int
+virSetUIDGIDWithCaps(uid_t uid, gid_t gid, unsigned long long capBits)
+{
+ int ii, capng_ret, ret = -1;
+ bool need_setgid = false, need_setuid = false;
+ bool need_prctl = false, need_setpcap = false;
+
+ /* First drop all caps except those in capBits + the extra ones we
+ * need to change uid/gid and change the capabilities bounding
+ * set.
+ */
+
+ capng_clear(CAPNG_SELECT_BOTH);
+
+ for (ii = 0; ii <= CAP_LAST_CAP; ii++) {
+ if (capBits & (1ULL << ii)) {
+ capng_update(CAPNG_ADD,
+ CAPNG_EFFECTIVE|CAPNG_INHERITABLE|
+ CAPNG_PERMITTED|CAPNG_BOUNDING_SET,
+ ii);
+ }
+ }
+
+ if (gid != (gid_t)-1 &&
+ !capng_have_capability(CAPNG_EFFECTIVE, CAP_SETGID)) {
+ need_setgid = true;
+ capng_update(CAPNG_ADD, CAPNG_EFFECTIVE|CAPNG_PERMITTED, CAP_SETGID);
+ }
+ if (uid != (uid_t)-1 &&
+ !capng_have_capability(CAPNG_EFFECTIVE, CAP_SETUID)) {
+ need_setuid = true;
+ capng_update(CAPNG_ADD, CAPNG_EFFECTIVE|CAPNG_PERMITTED, CAP_SETUID);
+ }
+# ifdef PR_CAPBSET_DROP
+ /* If newer kernel, we need also need setpcap to change the bounding set */
+ if ((capBits || need_setgid || need_setuid) &&
+ !capng_have_capability(CAPNG_EFFECTIVE, CAP_SETPCAP)) {
+ need_setpcap = true;
+ }
+ if (need_setpcap)
+ capng_update(CAPNG_ADD, CAPNG_EFFECTIVE|CAPNG_PERMITTED, CAP_SETPCAP);
+# endif
+
+ need_prctl = capBits || need_setgid || need_setuid || need_setpcap;
+
+ /* Tell system we want to keep caps across uid change */
+ if (need_prctl && prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0)) {
+ virReportSystemError(errno, "%s",
+ _("prctl failed to set KEEPCAPS"));
+ goto cleanup;
+ }
+
+ /* Change to the temp capabilities */
+ if ((capng_ret = capng_apply(CAPNG_SELECT_BOTH)) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("cannot apply process capabilities %d"), capng_ret);
+ goto cleanup;
+ }
+
+ if (virSetUIDGID(uid, gid) < 0)
+ goto cleanup;
+
+ /* Tell it we are done keeping capabilities */
+ if (need_prctl && prctl(PR_SET_KEEPCAPS, 0, 0, 0, 0)) {
+ virReportSystemError(errno, "%s",
+ _("prctl failed to reset KEEPCAPS"));
+ goto cleanup;
+ }
+
+ /* Drop the caps that allow setuid/gid (unless they were requested) */
+ if (need_setgid)
+ capng_update(CAPNG_DROP, CAPNG_EFFECTIVE|CAPNG_PERMITTED, CAP_SETGID);
+ if (need_setuid)
+ capng_update(CAPNG_DROP, CAPNG_EFFECTIVE|CAPNG_PERMITTED, CAP_SETUID);
+ /* Throw away CAP_SETPCAP so no more changes */
+ if (need_setpcap)
+ capng_update(CAPNG_DROP, CAPNG_EFFECTIVE|CAPNG_PERMITTED, CAP_SETPCAP);
+
+ if (need_prctl && ((capng_ret = capng_apply(CAPNG_SELECT_BOTH)) < 0)) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("cannot apply process capabilities %d"), capng_ret);
+ ret = -1;
+ goto cleanup;
+ }
+
+ ret = 0;
+cleanup:
+ return ret;
+}
+
+#else
+/*
+ * On platforms without libcapng, the capabilities setting is treated
+ * as a NOP.
+ */
+
+int
+virSetUIDGIDWithCaps(uid_t uid, gid_t gid,
+ unsigned long long capBits ATTRIBUTE_UNUSED)
+{
+ return virSetUIDGID(uid, gid);
+}
+#endif
+
#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R
/* search /proc/mounts for mount point of *type; return pointer to
diff --git a/src/util/virutil.h b/src/util/virutil.h
index 4201aa1..2dc3403 100644
--- a/src/util/virutil.h
+++ b/src/util/virutil.h
@@ -54,6 +54,7 @@ int virPipeReadUntilEOF(int outfd, int errfd,
char **outbuf, char **errbuf);
int virSetUIDGID(uid_t uid, gid_t gid);
+int virSetUIDGIDWithCaps(uid_t uid, gid_t gid, unsigned long long capBits);
int virFileReadLimFD(int fd, int maxlen, char **buf) ATTRIBUTE_RETURN_CHECK;
--
1.8.1