From: "Daniel P. Berrange" <berrange(a)redhat.com>
Currently LXC guests can be given arbitrary pre-mounted
filesystems, however, for some usecases it is more appropriate
to provide block devices which the container can mount itself.
This first impl only allows for <disk type='block'>, in other
words exposing a host disk device to a container. Since LXC
does not have device namespace virtualization, we are cheating
a little bit. If the XML specifies /dev/sdc4 to be given to
the container as /dev/sda1, when we do the mknod /dev/sda1
in the container's /dev, we actually use the major:minor
number of /dev/sdc4, not /dev/sda1.
Signed-off-by: Daniel P. Berrange <berrange(a)redhat.com>
---
src/lxc/lxc_cgroup.c | 18 +++++++++
src/lxc/lxc_container.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 122 insertions(+)
diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c
index 767ef26..0636869 100644
--- a/src/lxc/lxc_cgroup.c
+++ b/src/lxc/lxc_cgroup.c
@@ -332,6 +332,24 @@ static int virLXCCgroupSetupDeviceACL(virDomainDefPtr def,
}
}
+ for (i = 0 ; i < def->ndisks ; i++) {
+ if (def->disks[i]->type != VIR_DOMAIN_DISK_TYPE_BLOCK)
+ continue;
+
+ rc = virCgroupAllowDevicePath(cgroup,
+ def->disks[i]->src,
+ (def->disks[i]->readonly ?
+ VIR_CGROUP_DEVICE_READ :
+ VIR_CGROUP_DEVICE_RW) |
+ VIR_CGROUP_DEVICE_MKNOD);
+ if (rc != 0) {
+ virReportSystemError(-rc,
+ _("Unable to allow device %s for domain %s"),
+ def->disks[i]->src, def->name);
+ goto cleanup;
+ }
+ }
+
for (i = 0 ; i < def->nfss ; i++) {
if (def->fss[i]->type != VIR_DOMAIN_FS_TYPE_BLOCK)
continue;
diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
index 3014564..be26de9 100644
--- a/src/lxc/lxc_container.c
+++ b/src/lxc/lxc_container.c
@@ -1211,6 +1211,106 @@ static int lxcContainerMountAllFS(virDomainDefPtr vmDef,
}
+static int lxcContainerSetupDisk(virDomainDefPtr vmDef,
+ virDomainDiskDefPtr def,
+ const char *dstprefix,
+ virSecurityManagerPtr securityDriver)
+{
+ char *src = NULL;
+ char *dst = NULL;
+ int ret = -1;
+ struct stat sb;
+ mode_t mode;
+ char *tmpsrc = def->src;
+
+ if (def->type != VIR_DOMAIN_DISK_TYPE_BLOCK) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("Can't setup disk for non-block device"));
+ goto cleanup;
+ }
+ if (def->src == NULL) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("Can't setup disk without media"));
+ goto cleanup;
+ }
+
+ if (virAsprintf(&src, "%s/%s", dstprefix, def->src) < 0) {
+ virReportOOMError();
+ goto cleanup;
+ }
+
+ if (virAsprintf(&dst, "/dev/%s", def->dst) < 0) {
+ virReportOOMError();
+ goto cleanup;
+ }
+
+ if (stat(src, &sb) < 0) {
+ virReportSystemError(errno,
+ _("Unable to access %s"), def->src);
+ goto cleanup;
+ }
+
+ if (!S_ISCHR(sb.st_mode) && !S_ISBLK(sb.st_mode)) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Disk source %s must be a character/block device"),
+ def->src);
+ goto cleanup;
+ }
+
+ mode = 0700;
+ if (S_ISCHR(sb.st_mode))
+ mode |= S_IFCHR;
+ else
+ mode |= S_IFBLK;
+
+ /* Yes, the device name we're creating may not
+ * actually correspond to the major:minor number
+ * we're using, but we've no other option at this
+ * time. Just have to hope that containerized apps
+ * don't get upset that the major:minor is different
+ * to that normally implied by the device name
+ */
+ VIR_DEBUG("Creating dev %s (%d,%d) from %s",
+ dst, major(sb.st_rdev), minor(sb.st_rdev), src);
+ if (mknod(dst, mode, sb.st_rdev) < 0) {
+ virReportSystemError(errno,
+ _("Unable to create device %s"),
+ dst);
+ goto cleanup;
+ }
+ /* Labelling normally operates on src, but we need
+ * to actally label the dst here, so hack the config */
+ def->src = dst;
+ if (virSecurityManagerSetImageLabel(securityDriver, vmDef, def) < 0)
+ goto cleanup;
+
+ ret = 0;
+
+cleanup:
+ def->src = tmpsrc;
+ VIR_FREE(src);
+ VIR_FREE(dst);
+ return ret;
+}
+
+static int lxcContainerSetupAllDisks(virDomainDefPtr vmDef,
+ const char *dstprefix,
+ virSecurityManagerPtr securityDriver)
+{
+ size_t i;
+ VIR_DEBUG("Setting up disks %s", dstprefix);
+
+ for (i = 0 ; i < vmDef->ndisks ; i++) {
+ if (lxcContainerSetupDisk(vmDef, vmDef->disks[i],
+ dstprefix, securityDriver) < 0)
+ return -1;
+ }
+
+ VIR_DEBUG("Setup all disks");
+ return 0;
+}
+
+
static int lxcContainerGetSubtree(const char *prefix,
char ***mountsret,
size_t *nmountsret)
@@ -1606,6 +1706,10 @@ static int lxcContainerSetupPivotRoot(virDomainDefPtr vmDef,
if (lxcContainerMountAllFS(vmDef, "/.oldroot", true, sec_mount_options)
< 0)
goto cleanup;
+ /* Sets up any extra disks from guest config */
+ if (lxcContainerSetupAllDisks(vmDef, "/.oldroot", securityDriver) < 0)
+ goto cleanup;
+
/* Gets rid of all remaining mounts from host OS, including /.oldroot itself */
if (lxcContainerUnmountSubtree("/.oldroot", true) < 0)
goto cleanup;
--
1.8.0.1