sVirt provides SELinux MAC isolation for Qemu guest processes and their
corresponding resources (image files). sVirt provides this support
by labeling guests and resources with security labels that are stored
in file system extended attributes. Some file systems, such as NFS, do
not support the extended attribute security namespace, which is needed
for image file isolation when using the sVirt SELinux security driver
in libvirt.
The proposed solution entails a combination of Qemu, libvirt, and
SELinux patches that work together to isolate multiple guests' images
when they're stored in the same NFS mount. This results in an
environment where sVirt isolation and NFS image file isolation can both
be provided.
Currently, Qemu opens an image file in addition to performing the
necessary read and write operations. The proposed solution will move
the open out of Qemu and into libvirt. Once libvirt opens an image
file for the guest, it will pass the file descriptor to Qemu via a
new fd: protocol.
If the image file resides in an NFS mount, the following SELinux policy
changes will provide image isolation:
- A new SELinux boolean is created (e.g. virt_read_write_nfs) to
allow Qemu (svirt_t) to only have SELinux read and write
permissions on nfs_t files
- Qemu (svirt_t) also gets SELinux use permissions on libvirt
(virtd_t) file descriptors
Following is a sample invocation of Qemu using the fd: protocol on
the command line:
qemu -drive file=fd:4,format=qcow2
The fd: protocol is also supported by the drive_add monitor command.
This requires that the specified file descriptor is passed to the
monitor alongside a prior getfd monitor command.
This patch also supports the following features for the fd: protocol:
- -snapshot command line option
- savevm monitor command
This patch does not contain support for the following features, all
of which are planned to be supported in the future:
- Copy-on-write backing files
- snapshot_blkdev monitor command
- -cdrom command line option
- -drive command line option with media=cdrom
- change monitor command
v2:
- Add drive_add monitor command support
- Fence off unsupported features that re-open image file
v3:
- Fence off cdrom and change monitor command support
v4:
- Removed checks that fenced off features for fd: protocol
- Enabled -snapshot and savevm support
Signed-off-by: Corey Bryant <coreyb(a)linux.vnet.ibm.com>
---
block.c | 9 ++---
block/raw-posix.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++++----
migration-fd.c | 2 +-
monitor.c | 39 ++++++++++++--------
monitor.h | 3 +-
net.c | 2 +-
qemu-options.hx | 8 +++--
qemu-tool.c | 5 +++
8 files changed, 131 insertions(+), 36 deletions(-)
diff --git a/block.c b/block.c
index 785c88e..e9e5613 100644
--- a/block.c
+++ b/block.c
@@ -555,7 +555,6 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
if (flags & BDRV_O_SNAPSHOT) {
BlockDriverState *bs1;
int64_t total_size;
- int is_protocol = 0;
BlockDriver *bdrv_qcow2;
QEMUOptionParameter *options;
char tmp_filename[PATH_MAX];
@@ -573,19 +572,17 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int
flags,
}
total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
- if (bs1->drv && bs1->drv->protocol_name)
- is_protocol = 1;
-
bdrv_delete(bs1);
get_tmp_filename(tmp_filename, sizeof(tmp_filename));
/* Real path is meaningless for protocols */
- if (is_protocol)
+ if (path_has_protocol(filename)) {
snprintf(backing_filename, sizeof(backing_filename),
"%s", filename);
- else if (!realpath(filename, backing_filename))
+ } else if (!realpath(filename, backing_filename)) {
return -errno;
+ }
bdrv_qcow2 = bdrv_find_format("qcow2");
options = parse_option_parameters("", bdrv_qcow2->create_options,
NULL);
diff --git a/block/raw-posix.c b/block/raw-posix.c
index c5c9944..42ae2f4 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -28,6 +28,7 @@
#include "block_int.h"
#include "module.h"
#include "block/raw-posix-aio.h"
+#include "monitor.h"
#ifdef CONFIG_COCOA
#include <paths.h>
@@ -185,7 +186,8 @@ static int raw_open_common(BlockDriverState *bs, const char
*filename,
int bdrv_flags, int open_flags)
{
BDRVRawState *s = bs->opaque;
- int fd, ret;
+ int fd = -1;
+ int ret;
ret = raw_normalize_devicepath(&filename);
if (ret != 0) {
@@ -207,13 +209,21 @@ static int raw_open_common(BlockDriverState *bs, const char
*filename,
if (!(bdrv_flags & BDRV_O_CACHE_WB))
s->open_flags |= O_DSYNC;
- s->fd = -1;
- fd = qemu_open(filename, s->open_flags, 0644);
- if (fd < 0) {
- ret = -errno;
- if (ret == -EROFS)
- ret = -EACCES;
- return ret;
+ if (s->fd == -1) {
+ fd = qemu_open(filename, s->open_flags, 0644);
+ if (fd < 0) {
+ ret = -errno;
+ if (ret == -EROFS) {
+ ret = -EACCES;
+ }
+ return ret;
+ }
+ } else {
+ fd = dup(s->fd);
+ if (fd < 0) {
+ ret = -errno;
+ return ret;
+ }
}
s->fd = fd;
s->aligned_buf = NULL;
@@ -271,6 +281,7 @@ static int raw_open(BlockDriverState *bs, const char *filename, int
flags)
{
BDRVRawState *s = bs->opaque;
+ s->fd = -1;
s->type = FTYPE_FILE;
return raw_open_common(bs, filename, flags, 0);
}
@@ -904,6 +915,74 @@ static BlockDriver bdrv_file = {
.create_options = raw_create_options,
};
+static int raw_open_fd(BlockDriverState *bs, const char *filename, int flags)
+{
+ BDRVRawState *s = bs->opaque;
+ const char *fd_str;
+ int fd;
+
+ /* extract the file descriptor - fail if it's not fd: */
+ if (!strstart(filename, "fd:", &fd_str)) {
+ return -EINVAL;
+ }
+
+ if (!qemu_isdigit(fd_str[0])) {
+ /* get fd from monitor, but don't remove from monitor's fd list */
+ fd = qemu_get_fd(fd_str, 0);
+ if (fd == -1) {
+ return -EBADF;
+ }
+ } else {
+ char *endptr = NULL;
+
+ fd = strtol(fd_str, &endptr, 10);
+ if (*endptr || (fd == 0 && fd_str == endptr)) {
+ return -EBADF;
+ }
+ }
+
+ s->fd = fd;
+ s->type = FTYPE_FILE;
+
+ return raw_open_common(bs, filename, flags, 0);
+}
+
+static void raw_close_fd(BlockDriverState *bs)
+{
+ const char *fd_str;
+
+ /* if file descriptor is an fdname, remove it from monitor's fd list */
+ if (strstart(bs->filename, "fd:", &fd_str)) {
+ if (!qemu_isdigit(fd_str[0])) {
+ qemu_get_fd(fd_str, 1);
+ }
+ }
+
+ raw_close(bs);
+}
+
+static BlockDriver bdrv_file_fd = {
+ .format_name = "file",
+ .protocol_name = "fd",
+ .instance_size = sizeof(BDRVRawState),
+ .bdrv_probe = NULL, /* no probe for protocols */
+ .bdrv_file_open = raw_open_fd,
+ .bdrv_read = raw_read,
+ .bdrv_write = raw_write,
+ .bdrv_close = raw_close_fd,
+ .bdrv_flush = raw_flush,
+ .bdrv_discard = raw_discard,
+
+ .bdrv_aio_readv = raw_aio_readv,
+ .bdrv_aio_writev = raw_aio_writev,
+ .bdrv_aio_flush = raw_aio_flush,
+
+ .bdrv_truncate = raw_truncate,
+ .bdrv_getlength = raw_getlength,
+
+ .create_options = raw_create_options,
+};
+
/***********************************************/
/* host device */
@@ -1012,6 +1091,7 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int
flags)
}
#endif
+ s->fd = -1;
s->type = FTYPE_FILE;
#if defined(__linux__)
{
@@ -1184,6 +1264,7 @@ static int floppy_open(BlockDriverState *bs, const char *filename,
int flags)
BDRVRawState *s = bs->opaque;
int ret;
+ s->fd = -1;
s->type = FTYPE_FD;
/* open will not fail even if no floppy is inserted, so add O_NONBLOCK */
@@ -1302,6 +1383,7 @@ static int cdrom_open(BlockDriverState *bs, const char *filename,
int flags)
{
BDRVRawState *s = bs->opaque;
+ s->fd = -1;
s->type = FTYPE_CD;
/* open will not fail even if no CD is inserted, so add O_NONBLOCK */
@@ -1527,6 +1609,7 @@ static void bdrv_file_init(void)
* Register all the drivers. Note that order is important, the driver
* registered last will get probed first.
*/
+ bdrv_register(&bdrv_file_fd);
bdrv_register(&bdrv_file);
bdrv_register(&bdrv_host_device);
#ifdef __linux__
diff --git a/migration-fd.c b/migration-fd.c
index aee690a..b1ba625 100644
--- a/migration-fd.c
+++ b/migration-fd.c
@@ -61,7 +61,7 @@ MigrationState *fd_start_outgoing_migration(Monitor *mon,
s = g_malloc0(sizeof(*s));
- s->fd = monitor_get_fd(mon, fdname);
+ s->fd = monitor_get_fd(mon, fdname, 1);
if (s->fd == -1) {
DPRINTF("fd_migration: invalid file descriptor identifier\n");
goto err_after_alloc;
diff --git a/monitor.c b/monitor.c
index 39791dc..d03e762 100644
--- a/monitor.c
+++ b/monitor.c
@@ -1198,21 +1198,21 @@ static int add_graphics_client(Monitor *mon, const QDict *qdict,
QObject **ret_d
qerror_report(QERR_DEVICE_NOT_ACTIVE, "spice");
return -1;
}
- qerror_report(QERR_ADD_CLIENT_FAILED);
- return -1;
+ qerror_report(QERR_ADD_CLIENT_FAILED);
+ return -1;
#ifdef CONFIG_VNC
} else if (strcmp(protocol, "vnc") == 0) {
- int fd = monitor_get_fd(mon, fdname);
- vnc_display_add_client(NULL, fd, skipauth);
- return 0;
+ int fd = monitor_get_fd(mon, fdname, 1);
+ vnc_display_add_client(NULL, fd, skipauth);
+ return 0;
#endif
} else if ((s = qemu_chr_find(protocol)) != NULL) {
- int fd = monitor_get_fd(mon, fdname);
- if (qemu_chr_add_client(s, fd) < 0) {
- qerror_report(QERR_ADD_CLIENT_FAILED);
- return -1;
- }
- return 0;
+ int fd = monitor_get_fd(mon, fdname, 1);
+ if (qemu_chr_add_client(s, fd) < 0) {
+ qerror_report(QERR_ADD_CLIENT_FAILED);
+ return -1;
+ }
+ return 0;
}
qerror_report(QERR_INVALID_PARAMETER, "protocol");
@@ -2833,7 +2833,7 @@ static void do_loadvm(Monitor *mon, const QDict *qdict)
}
}
-int monitor_get_fd(Monitor *mon, const char *fdname)
+int monitor_get_fd(Monitor *mon, const char *fdname, unsigned char remove)
{
mon_fd_t *monfd;
@@ -2846,10 +2846,12 @@ int monitor_get_fd(Monitor *mon, const char *fdname)
fd = monfd->fd;
- /* caller takes ownership of fd */
- QLIST_REMOVE(monfd, next);
- g_free(monfd->name);
- g_free(monfd);
+ if (remove) {
+ /* caller takes ownership of fd */
+ QLIST_REMOVE(monfd, next);
+ g_free(monfd->name);
+ g_free(monfd);
+ }
return fd;
}
@@ -2857,6 +2859,11 @@ int monitor_get_fd(Monitor *mon, const char *fdname)
return -1;
}
+int qemu_get_fd(const char *fdname, unsigned char remove)
+{
+ return cur_mon ? monitor_get_fd(cur_mon, fdname, remove) : -1;
+}
+
static const mon_cmd_t mon_cmds[] = {
#include "hmp-commands.h"
{ NULL, NULL, },
diff --git a/monitor.h b/monitor.h
index 4f2d328..f18de19 100644
--- a/monitor.h
+++ b/monitor.h
@@ -50,7 +50,8 @@ int monitor_read_bdrv_key_start(Monitor *mon, BlockDriverState *bs,
BlockDriverCompletionFunc *completion_cb,
void *opaque);
-int monitor_get_fd(Monitor *mon, const char *fdname);
+int monitor_get_fd(Monitor *mon, const char *fdname, unsigned char remove);
+int qemu_get_fd(const char *fdname, unsigned char remove);
void monitor_vprintf(Monitor *mon, const char *fmt, va_list ap)
GCC_FMT_ATTR(2, 0);
diff --git a/net.c b/net.c
index d05930c..3482598 100644
--- a/net.c
+++ b/net.c
@@ -727,7 +727,7 @@ int net_handle_fd_param(Monitor *mon, const char *param)
if (!qemu_isdigit(param[0]) && mon) {
- fd = monitor_get_fd(mon, param);
+ fd = monitor_get_fd(mon, param, 1);
if (fd == -1) {
error_report("No file descriptor named %s found", param);
return -1;
diff --git a/qemu-options.hx b/qemu-options.hx
index d86815d..869320b 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -131,7 +131,7 @@ using @file{/dev/cdrom} as filename (@pxref{host_drives}).
ETEXI
DEF("drive", HAS_ARG, QEMU_OPTION_drive,
- "-drive [file=file][,if=type][,bus=n][,unit=m][,media=d][,index=i]\n"
+ "-drive [file=[fd:]file][,if=type][,bus=n][,unit=m][,media=d][,index=i]\n"
" [,cyls=c,heads=h,secs=s[,trans=t]][,snapshot=on|off]\n"
" [,cache=writethrough|writeback|none|unsafe][,format=f]\n"
" [,serial=s][,addr=A][,id=name][,aio=threads|native]\n"
@@ -144,10 +144,12 @@ STEXI
Define a new drive. Valid options are:
@table @option
-@item file=@var{file}
+@item file=[fd:]@var{file}
This option defines which disk image (@pxref{disk_images}) to use with
this drive. If the filename contains comma, you must double it
-(for instance, "file=my,,file" to use file "my,file").
+(for instance, "file=my,,file" to use file "my,file").
@option{fd:}@var{file}
+specifies the file descriptor of an already open disk image.
+@option{format=}@var{format} is required by @option{fd:}@var{file}.
@item if=@var{interface}
This option defines on which type on interface the drive is connected.
Available types are: ide, scsi, sd, mtd, floppy, pflash, virtio.
diff --git a/qemu-tool.c b/qemu-tool.c
index eb89fe0..a2a0129 100644
--- a/qemu-tool.c
+++ b/qemu-tool.c
@@ -96,3 +96,8 @@ int64_t qemu_get_clock_ns(QEMUClock *clock)
{
return 0;
}
+
+int qemu_get_fd(const char *fdname, unsigned char remove)
+{
+ return -1;
+}
--
1.7.3.4