Added <capabilities> in the <features> section of LXC domains
configuration. This section can contain elements named after the
capabilities like:
<mknod state="on"/>, keep CAP_MKNOD capability
<sys_chroot state="off"/> drop CAP_SYS_CHROOT capability
Users can restrict or give more capabilities than the default using
this mechanism.
---
docs/schemas/domaincommon.rng | 196 ++++++++++++++++++++++++
src/conf/domain_conf.c | 93 ++++++++++-
src/conf/domain_conf.h | 47 ++++++
src/libvirt_private.syms | 1 +
src/lxc/lxc_cgroup.c | 5 +
src/lxc/lxc_container.c | 90 +++++++++--
tests/domainschemadata/domain-caps-features.xml | 28 ++++
7 files changed, 442 insertions(+), 18 deletions(-)
create mode 100644 tests/domainschemadata/domain-caps-features.xml
diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng
index 6cc922c..297d0ae 100644
--- a/docs/schemas/domaincommon.rng
+++ b/docs/schemas/domaincommon.rng
@@ -3744,6 +3744,9 @@
<empty/>
</element>
</optional>
+ <optional>
+ <ref name="capabilities"/>
+ </optional>
</interleave>
</element>
</optional>
@@ -4303,6 +4306,199 @@
</element>
</define>
+ <!-- Optional capabilities features -->
+ <define name="capabilities">
+ <element name="capabilities">
+ <interleave>
+ <optional>
+ <element name="audit_control">
+ <ref name="featurestate"/>
+ </element>
+ </optional>
+ <optional>
+ <element name="audit_write">
+ <ref name="featurestate"/>
+ </element>
+ </optional>
+ <optional>
+ <element name="block_suspend">
+ <ref name="featurestate"/>
+ </element>
+ </optional>
+ <optional>
+ <element name="chown">
+ <ref name="featurestate"/>
+ </element>
+ </optional>
+ <optional>
+ <element name="dac_override">
+ <ref name="featurestate"/>
+ </element>
+ </optional>
+ <optional>
+ <element name="dac_read_search">
+ <ref name="featurestate"/>
+ </element>
+ </optional>
+ <optional>
+ <element name="fowner">
+ <ref name="featurestate"/>
+ </element>
+ </optional>
+ <optional>
+ <element name="fsetid">
+ <ref name="featurestate"/>
+ </element>
+ </optional>
+ <optional>
+ <element name="ipc_lock">
+ <ref name="featurestate"/>
+ </element>
+ </optional>
+ <optional>
+ <element name="ipc_owner">
+ <ref name="featurestate"/>
+ </element>
+ </optional>
+ <optional>
+ <element name="kill">
+ <ref name="featurestate"/>
+ </element>
+ </optional>
+ <optional>
+ <element name="lease">
+ <ref name="featurestate"/>
+ </element>
+ </optional>
+ <optional>
+ <element name="linux_immutable">
+ <ref name="featurestate"/>
+ </element>
+ </optional>
+ <optional>
+ <element name="mac_admin">
+ <ref name="featurestate"/>
+ </element>
+ </optional>
+ <optional>
+ <element name="mac_override">
+ <ref name="featurestate"/>
+ </element>
+ </optional>
+ <optional>
+ <element name="mknod">
+ <ref name="featurestate"/>
+ </element>
+ </optional>
+ <optional>
+ <element name="net_admin">
+ <ref name="featurestate"/>
+ </element>
+ </optional>
+ <optional>
+ <element name="net_bind_service">
+ <ref name="featurestate"/>
+ </element>
+ </optional>
+ <optional>
+ <element name="net_broadcast">
+ <ref name="featurestate"/>
+ </element>
+ </optional>
+ <optional>
+ <element name="net_raw">
+ <ref name="featurestate"/>
+ </element>
+ </optional>
+ <optional>
+ <element name="setgid">
+ <ref name="featurestate"/>
+ </element>
+ </optional>
+ <optional>
+ <element name="setfcap">
+ <ref name="featurestate"/>
+ </element>
+ </optional>
+ <optional>
+ <element name="setpcap">
+ <ref name="featurestate"/>
+ </element>
+ </optional>
+ <optional>
+ <element name="setuid">
+ <ref name="featurestate"/>
+ </element>
+ </optional>
+ <optional>
+ <element name="sys_admin">
+ <ref name="featurestate"/>
+ </element>
+ </optional>
+ <optional>
+ <element name="sys_boot">
+ <ref name="featurestate"/>
+ </element>
+ </optional>
+ <optional>
+ <element name="sys_chroot">
+ <ref name="featurestate"/>
+ </element>
+ </optional>
+ <optional>
+ <element name="sys_module">
+ <ref name="featurestate"/>
+ </element>
+ </optional>
+ <optional>
+ <element name="sys_nice">
+ <ref name="featurestate"/>
+ </element>
+ </optional>
+ <optional>
+ <element name="sys_pacct">
+ <ref name="featurestate"/>
+ </element>
+ </optional>
+ <optional>
+ <element name="sys_ptrace">
+ <ref name="featurestate"/>
+ </element>
+ </optional>
+ <optional>
+ <element name="sys_rawio">
+ <ref name="featurestate"/>
+ </element>
+ </optional>
+ <optional>
+ <element name="sys_resource">
+ <ref name="featurestate"/>
+ </element>
+ </optional>
+ <optional>
+ <element name="sys_time">
+ <ref name="featurestate"/>
+ </element>
+ </optional>
+ <optional>
+ <element name="sys_tty_config">
+ <ref name="featurestate"/>
+ </element>
+ </optional>
+ <optional>
+ <element name="syslog">
+ <ref name="featurestate"/>
+ </element>
+ </optional>
+ <optional>
+ <element name="wake_alarm">
+ <ref name="featurestate"/>
+ </element>
+ </optional>
+ </interleave>
+ </element>
+ </define>
+
<define name="featurestate">
<attribute name="state">
<choice>
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index ff2d447..5de4bd8 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -147,7 +147,8 @@ VIR_ENUM_IMPL(virDomainFeature, VIR_DOMAIN_FEATURE_LAST,
"viridian",
"privnet",
"hyperv",
- "pvspinlock")
+ "pvspinlock",
+ "capabilities")
VIR_ENUM_IMPL(virDomainFeatureState, VIR_DOMAIN_FEATURE_STATE_LAST,
"default",
@@ -159,6 +160,45 @@ VIR_ENUM_IMPL(virDomainHyperv, VIR_DOMAIN_HYPERV_LAST,
"vapic",
"spinlocks")
+VIR_ENUM_IMPL(virDomainCapsFeature, VIR_DOMAIN_CAPS_FEATURE_LAST,
+ "audit_control",
+ "audit_write",
+ "block_suspend",
+ "chown",
+ "dac_override",
+ "dac_read_search",
+ "fowner",
+ "fsetid",
+ "ipc_lock",
+ "ipc_owner",
+ "kill",
+ "lease",
+ "linux_immutable",
+ "mac_admin",
+ "mac_override",
+ "mknod",
+ "net_admin",
+ "net_bind_service",
+ "net_broadcast",
+ "net_raw",
+ "setgid",
+ "setfcap",
+ "setpcap",
+ "setuid",
+ "sys_admin",
+ "sys_boot",
+ "sys_chroot",
+ "sys_module",
+ "sys_nice",
+ "sys_pacct",
+ "sys_ptrace",
+ "sys_rawio",
+ "sys_resource",
+ "sys_time",
+ "sys_tty_config",
+ "syslog",
+ "wake_alarm")
+
VIR_ENUM_IMPL(virDomainLifecycle, VIR_DOMAIN_LIFECYCLE_LAST,
"destroy",
"restart",
@@ -11874,6 +11914,7 @@ virDomainDefParseXML(xmlDocPtr xml,
case VIR_DOMAIN_FEATURE_VIRIDIAN:
case VIR_DOMAIN_FEATURE_PRIVNET:
case VIR_DOMAIN_FEATURE_HYPERV:
+ case VIR_DOMAIN_FEATURE_CAPABILITIES:
def->features[val] = VIR_DOMAIN_FEATURE_STATE_ON;
break;
@@ -11985,6 +12026,39 @@ virDomainDefParseXML(xmlDocPtr xml,
ctxt->node = node;
}
+ if (def->features[VIR_DOMAIN_FEATURE_CAPABILITIES] == VIR_DOMAIN_FEATURE_STATE_ON)
{
+ if ((n = virXPathNodeSet("./features/capabilities/*", ctxt,
&nodes)) < 0)
+ goto error;
+
+ for (i = 0; i < n; i++) {
+ int val = virDomainCapsFeatureTypeFromString((const char
*)nodes[i]->name);
+ if (val < 0) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("unexpected capability feature '%s'"),
nodes[i]->name);
+ goto error;
+ }
+
+ if (val >= 0 && val < VIR_DOMAIN_CAPS_FEATURE_LAST) {
+ node = ctxt->node;
+ ctxt->node = nodes[i];
+
+ if ((tmp = virXPathString("string(./@state)", ctxt))) {
+ if ((def->caps_features[val] =
virDomainFeatureStateTypeFromString(tmp)) == -1) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("unknown state attribute '%s' of
feature capability '%s'"),
+ tmp, virDomainFeatureTypeToString(val));
+ goto error;
+ }
+ VIR_FREE(tmp);
+ } else {
+ def->caps_features[val] = VIR_DOMAIN_FEATURE_STATE_ON;
+ }
+ ctxt->node = node;
+ }
+ }
+ VIR_FREE(nodes);
+ }
+
if (virDomainEventActionParseXML(ctxt, "on_reboot",
"string(./on_reboot[1])",
&def->onReboot,
@@ -17694,6 +17768,23 @@ virDomainDefFormatInternal(virDomainDefPtr def,
virBufferAddLit(buf, "</hyperv>\n");
break;
+ case VIR_DOMAIN_FEATURE_CAPABILITIES:
+ if (def->features[i] != VIR_DOMAIN_FEATURE_STATE_ON)
+ break;
+
+ virBufferAddLit(buf, "<capabilities>\n");
+ virBufferAdjustIndent(buf, 2);
+ for (j = 0; j < VIR_DOMAIN_CAPS_FEATURE_LAST; j++) {
+ if (def->caps_features[j] != VIR_DOMAIN_FEATURE_STATE_DEFAULT)
+ virBufferAsprintf(buf, "<%s
state='%s'/>\n",
+ virDomainCapsFeatureTypeToString(j),
+ virDomainFeatureStateTypeToString(
+ def->caps_features[j]));
+ }
+ virBufferAdjustIndent(buf, -2);
+ virBufferAddLit(buf, "</capabilities>\n");
+ break;
+
case VIR_DOMAIN_FEATURE_LAST:
break;
}
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index a6ac95a..70044d6 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -1525,6 +1525,7 @@ typedef enum {
VIR_DOMAIN_FEATURE_PRIVNET,
VIR_DOMAIN_FEATURE_HYPERV,
VIR_DOMAIN_FEATURE_PVSPINLOCK,
+ VIR_DOMAIN_FEATURE_CAPABILITIES,
VIR_DOMAIN_FEATURE_LAST
} virDomainFeature;
@@ -1545,6 +1546,48 @@ typedef enum {
VIR_DOMAIN_HYPERV_LAST
} virDomainHyperv;
+/* The capabilities are ordered alphabetically to help check for new ones */
+typedef enum {
+ VIR_DOMAIN_CAPS_FEATURE_AUDIT_CONTROL = 0,
+ VIR_DOMAIN_CAPS_FEATURE_AUDIT_WRITE,
+ VIR_DOMAIN_CAPS_FEATURE_BLOCK_SUSPEND,
+ VIR_DOMAIN_CAPS_FEATURE_CHOWN,
+ VIR_DOMAIN_CAPS_FEATURE_DAC_OVERRIDE,
+ VIR_DOMAIN_CAPS_FEATURE_DAC_READ_SEARCH,
+ VIR_DOMAIN_CAPS_FEATURE_FOWNER,
+ VIR_DOMAIN_CAPS_FEATURE_FSETID,
+ VIR_DOMAIN_CAPS_FEATURE_IPC_LOCK,
+ VIR_DOMAIN_CAPS_FEATURE_IPC_OWNER,
+ VIR_DOMAIN_CAPS_FEATURE_KILL,
+ VIR_DOMAIN_CAPS_FEATURE_LEASE,
+ VIR_DOMAIN_CAPS_FEATURE_LINUX_IMMUTABLE,
+ VIR_DOMAIN_CAPS_FEATURE_MAC_ADMIN,
+ VIR_DOMAIN_CAPS_FEATURE_MAC_OVERRIDE,
+ VIR_DOMAIN_CAPS_FEATURE_MKNOD,
+ VIR_DOMAIN_CAPS_FEATURE_NET_ADMIN,
+ VIR_DOMAIN_CAPS_FEATURE_NET_BIND_SERVICE,
+ VIR_DOMAIN_CAPS_FEATURE_NET_BROADCAST,
+ VIR_DOMAIN_CAPS_FEATURE_NET_RAW,
+ VIR_DOMAIN_CAPS_FEATURE_SETGID,
+ VIR_DOMAIN_CAPS_FEATURE_SETFCAP,
+ VIR_DOMAIN_CAPS_FEATURE_SETPCAP,
+ VIR_DOMAIN_CAPS_FEATURE_SETUID,
+ VIR_DOMAIN_CAPS_FEATURE_SYS_ADMIN,
+ VIR_DOMAIN_CAPS_FEATURE_SYS_BOOT,
+ VIR_DOMAIN_CAPS_FEATURE_SYS_CHROOT,
+ VIR_DOMAIN_CAPS_FEATURE_SYS_MODULE,
+ VIR_DOMAIN_CAPS_FEATURE_SYS_NICE,
+ VIR_DOMAIN_CAPS_FEATURE_SYS_PACCT,
+ VIR_DOMAIN_CAPS_FEATURE_SYS_PTRACE,
+ VIR_DOMAIN_CAPS_FEATURE_SYS_RAWIO,
+ VIR_DOMAIN_CAPS_FEATURE_SYS_RESOURCE,
+ VIR_DOMAIN_CAPS_FEATURE_SYS_TIME,
+ VIR_DOMAIN_CAPS_FEATURE_SYS_TTY_CONFIG,
+ VIR_DOMAIN_CAPS_FEATURE_SYSLOG,
+ VIR_DOMAIN_CAPS_FEATURE_WAKE_ALARM,
+ VIR_DOMAIN_CAPS_FEATURE_LAST
+} virDomainCapsFeature;
+
typedef enum {
VIR_DOMAIN_LIFECYCLE_DESTROY,
VIR_DOMAIN_LIFECYCLE_RESTART,
@@ -1914,6 +1957,9 @@ struct _virDomainDef {
int hyperv_features[VIR_DOMAIN_HYPERV_LAST];
unsigned int hyperv_spinlocks;
+ /* This options are of type virDomainFeatureState: ON = keep, OFF = drop */
+ int caps_features[VIR_DOMAIN_CAPS_FEATURE_LAST];
+
virDomainClockDef clock;
size_t ngraphics;
@@ -2534,6 +2580,7 @@ VIR_ENUM_DECL(virDomainBoot)
VIR_ENUM_DECL(virDomainBootMenu)
VIR_ENUM_DECL(virDomainFeature)
VIR_ENUM_DECL(virDomainFeatureState)
+VIR_ENUM_DECL(virDomainCapsFeature)
VIR_ENUM_DECL(virDomainLifecycle)
VIR_ENUM_DECL(virDomainLifecycleCrash)
VIR_ENUM_DECL(virDomainPMState)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 122c572..a411766 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -133,6 +133,7 @@ virDomainBlockedReasonTypeFromString;
virDomainBlockedReasonTypeToString;
virDomainBootMenuTypeFromString;
virDomainBootMenuTypeToString;
+virDomainCapsFeatureTypeToString;
virDomainChrConsoleTargetTypeFromString;
virDomainChrConsoleTargetTypeToString;
virDomainChrDefForeach;
diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c
index 8dfdc60..71a0d61 100644
--- a/src/lxc/lxc_cgroup.c
+++ b/src/lxc/lxc_cgroup.c
@@ -357,6 +357,11 @@ static int virLXCCgroupSetupDeviceACL(virDomainDefPtr def,
{'c', LXC_DEV_MAJ_FUSE, LXC_DEV_MIN_FUSE},
{0, 0, 0}};
+ /* No white list if CAP_MKNOD has to be kept */
+ int capMknod = def->caps_features[VIR_DOMAIN_CAPS_FEATURE_MKNOD];
+ if (capMknod == VIR_DOMAIN_FEATURE_STATE_ON)
+ return 0;
+
if (virCgroupDenyAllDevices(cgroup) < 0)
goto cleanup;
diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
index fd8ab16..de65ac8 100644
--- a/src/lxc/lxc_container.c
+++ b/src/lxc/lxc_container.c
@@ -1732,25 +1732,80 @@ static int lxcContainerResolveSymlinks(virDomainDefPtr vmDef)
* host system, since they are not currently "containerized"
*/
#if WITH_CAPNG
-static int lxcContainerDropCapabilities(bool keepReboot)
+static int lxcContainerDropCapabilities(virDomainDefPtr def,
+ bool keepReboot)
{
- int ret;
+ int ret, i;
+
+ /* Maps virDomainCapsFeature to CAPS_* */
+ static unsigned int capsMapping[] = {CAP_AUDIT_CONTROL,
+ CAP_AUDIT_WRITE,
+ CAP_BLOCK_SUSPEND,
+ CAP_CHOWN,
+ CAP_DAC_OVERRIDE,
+ CAP_DAC_READ_SEARCH,
+ CAP_FOWNER,
+ CAP_FSETID,
+ CAP_IPC_LOCK,
+ CAP_IPC_OWNER,
+ CAP_KILL,
+ CAP_LEASE,
+ CAP_LINUX_IMMUTABLE,
+ CAP_MAC_ADMIN,
+ CAP_MAC_OVERRIDE,
+ CAP_MKNOD,
+ CAP_NET_ADMIN,
+ CAP_NET_BIND_SERVICE,
+ CAP_NET_BROADCAST,
+ CAP_NET_RAW,
+ CAP_SETGID,
+ CAP_SETFCAP,
+ CAP_SETPCAP,
+ CAP_SETUID,
+ CAP_SYS_ADMIN,
+ CAP_SYS_BOOT,
+ CAP_SYS_CHROOT,
+ CAP_SYS_MODULE,
+ CAP_SYS_NICE,
+ CAP_SYS_PACCT,
+ CAP_SYS_PTRACE,
+ CAP_SYS_RAWIO,
+ CAP_SYS_RESOURCE,
+ CAP_SYS_TIME,
+ CAP_SYS_TTY_CONFIG,
+ CAP_SYSLOG,
+ CAP_WAKE_ALARM};
capng_get_caps_process();
- if ((ret = capng_updatev(CAPNG_DROP,
- CAPNG_EFFECTIVE | CAPNG_PERMITTED |
- CAPNG_INHERITABLE | CAPNG_BOUNDING_SET,
- CAP_SYS_MODULE, /* No kernel module loading */
- CAP_SYS_TIME, /* No changing the clock */
- CAP_MKNOD, /* No creating device nodes */
- CAP_AUDIT_CONTROL, /* No messing with auditing status */
- CAP_MAC_ADMIN, /* No messing with LSM config */
- keepReboot ? -1 : CAP_SYS_BOOT, /* No use of reboot */
- -1)) < 0) {
- virReportError(VIR_ERR_INTERNAL_ERROR,
- _("Failed to remove capabilities: %d"), ret);
- return -1;
+ for (i = 0; i < VIR_DOMAIN_CAPS_FEATURE_LAST; i++) {
+ bool toDrop = false;
+ int state = def->caps_features[i];
+
+ switch ((virDomainCapsFeature) i) {
+ case VIR_DOMAIN_CAPS_FEATURE_SYS_BOOT: /* No use of reboot */
+ toDrop = !keepReboot && (state != VIR_DOMAIN_FEATURE_STATE_ON);
+ break;
+ case VIR_DOMAIN_CAPS_FEATURE_SYS_MODULE: /* No kernel module loading */
+ case VIR_DOMAIN_CAPS_FEATURE_SYS_TIME: /* No changing the clock */
+ case VIR_DOMAIN_CAPS_FEATURE_MKNOD: /* No creating device nodes */
+ case VIR_DOMAIN_CAPS_FEATURE_AUDIT_CONTROL: /* No messing with auditing status
*/
+ case VIR_DOMAIN_CAPS_FEATURE_MAC_ADMIN: /* No messing with LSM config */
+ toDrop = (state != VIR_DOMAIN_FEATURE_STATE_ON);
+ break;
+ default: /* User specified capabilities to drop */
+ toDrop = (state == VIR_DOMAIN_FEATURE_STATE_OFF);
+ }
+
+ if (toDrop && (ret = capng_update(CAPNG_DROP,
+ CAPNG_EFFECTIVE | CAPNG_PERMITTED |
+ CAPNG_INHERITABLE | CAPNG_BOUNDING_SET,
+ capsMapping[i])) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Failed to remove capability %s: %d"),
+ virDomainCapsFeatureTypeToString(i), ret);
+ return -1;
+ }
}
if ((ret = capng_apply(CAPNG_SELECT_BOTH)) < 0) {
@@ -1768,7 +1823,8 @@ static int lxcContainerDropCapabilities(bool keepReboot)
return 0;
}
#else
-static int lxcContainerDropCapabilities(bool keepReboot ATTRIBUTE_UNUSED)
+static int lxcContainerDropCapabilities(virDomainDefPtr def ATTRIBUTE_UNUSED,
+ bool keepReboot ATTRIBUTE_UNUSED)
{
VIR_WARN("libcap-ng support not compiled in, unable to clear
capabilities");
return 0;
@@ -1874,7 +1930,7 @@ static int lxcContainerChild(void *data)
}
/* drop a set of root capabilities */
- if (lxcContainerDropCapabilities(!!hasReboot) < 0)
+ if (lxcContainerDropCapabilities(vmDef, !!hasReboot) < 0)
goto cleanup;
if (lxcContainerSendContinue(argv->handshakefd) < 0) {
diff --git a/tests/domainschemadata/domain-caps-features.xml
b/tests/domainschemadata/domain-caps-features.xml
new file mode 100644
index 0000000..c62c767
--- /dev/null
+++ b/tests/domainschemadata/domain-caps-features.xml
@@ -0,0 +1,28 @@
+<domain type='lxc'>
+ <name>demo</name>
+ <uuid>8369f1ac-7e46-e869-4ca5-759d51478066</uuid>
+ <os>
+ <type>exe</type>
+ <init>/sh</init>
+ </os>
+ <features>
+ <capabilities>
+ <mknod state="on"/>
+ </capabilities>
+ </features>
+ <resource>
+ <partition>/virtualmachines</partition>
+ </resource>
+ <memory unit='KiB'>500000</memory>
+ <devices>
+ <filesystem type='mount'>
+ <source dir='/root/container'/>
+ <target dir='/'/>
+ </filesystem>
+ <filesystem type='mount'>
+ <source dir='/home'/>
+ <target dir='/home'/>
+ </filesystem>
+ <console type='pty'/>
+ </devices>
+</domain>
--
1.8.4.5