On 01/29/2013 09:52 AM, Stefan Berger wrote:
Add support for file descriptor sets by converting some of the
command line parameters to use /dev/fdset/%d if -add-fd is found
to be supported by QEMU. For those devices libvirt now open()s
the device to obtain the file descriptor and 'transfers' the
fd using virCommand.
For the following fragments of domain XML
<disk type='file' device='disk'>
<driver name='qemu' type='raw'/>
<source file='/var/lib/libvirt/images/fc14-x86_64.img'/>
<target dev='hda' bus='ide'/>
<address type='drive' controller='0' bus='0'
target='0' unit='0'/>
</disk>
<serial type='dev'>
<source path='/dev/ttyS0'/>
<target port='0'/>
</serial>
<serial type='pipe'>
<source path='/tmp/testpipe'/>
<target port='1'/>
</serial>
now create the following parts of command line:
Rearranging things just a bit:
-add-fd set=1,fd=30,opaque=/dev/ttyS0 -chardev
tty,id=charserial0,path=/dev/fdset/1
-add-fd set=2,fd=32,opaque=/tmp/testpipe -chardev
pipe,id=charserial1,path=/dev/fdset/2
These two conversions looks sane; although it might help to list the old
style next to the new style in the commit message:
old: -chardev tty,id=charserial0,path=/dev/ttyS0
new: -add-fd set=1,fd=30,opaque=/dev/ttyS0 -chardev
tty,id=charserial0,path=/dev/fdset/1
-add-fd set=0,fd=23,opaque=/var/lib/libvirt/images/fc14-x86_64.img -drive
file=/dev/fdset/0,if=none,id=drive-ide0-0-0,format=raw
This conversion feels insufficient for qemu 1.3. As written, your patch
can only ever tie one fd to a set (that is, each qemu fdset contains
exactly one passed fd). But my understanding of the qemu 1.3 code is
that for -drive arguments, qemu_open() is called twice: once with
O_RDONLY to probe the file format, and again with O_RDWR to actually use
the file; furthermore, qemu_open() refuses to use an O_RDWR fd for
O_RDONLY operations (in the name of safety). Arguably, this is a flaw
in the qemu design (qemu shouldn't need to open() a file twice to use
it, and I argued at the time that qemu implemented fdset that an O_RDWR
fd should serve just fine for an O_RDONLY request); but since it exists,
we must live with it. That means that for THIS line, the proper
translation really has to be:
old: -drive
file=/var/lib/libvirt/images/fc14-x86_64.img,if=none,id=drive-ide0-0-0,format=raw
new: -add-fd
set=0,fd=23,opaque=RDONLY:/var/lib/libvirt/images/fc14-x86_64.img
-add-fd set=0,fd=24,opaque=RDWR:/var/lib/libvirt/images/fc14-x86_64.img
-drive file=/dev/fdset/0,if=none,id=drive-ide0-0-0,format=raw
where we really are passing two fds, 23 as O_RDONLY, and 24 as O_RDWR,
within the single fdset 0.
---
src/qemu/qemu_command.c | 224 +++++++++++++++++++++++++++++++++++++----------
src/qemu/qemu_command.h | 13 ++
src/qemu/qemu_driver.c | 6 +
src/qemu/qemu_hotplug.c | 9 +
src/qemu/qemu_process.c | 3
tests/qemuxml2argvtest.c | 8 +
tests/qemuxmlnstest.c | 7 +
We need to add a new test: a new tests/qemuxml2argvdata .args file, with
the matching .xml file copied from an existing test, along with a line
in qemuxml2argvtest.c to run the new test using the new capability right
next to the existing test run without the capability. Doing this will
make it more obvious that we are generating appropriate command lines
with or without the capability detected. I guess it gets tricky, though
- the test cannot actually open /dev/ttyS0, but must limit itself to
either opening files under control of the testsuite, or factoring the
qemu_command.c code so that we can generate the command line that would
exist if an fd were open but without actually opening the fd.
7 files changed, 215 insertions(+), 55 deletions(-)
Index: libvirt/src/qemu/qemu_command.c
===================================================================
--- libvirt.orig/src/qemu/qemu_command.c
+++ libvirt/src/qemu/qemu_command.c
@@ -46,6 +46,7 @@
#include "base64.h"
#include "device_conf.h"
#include "virstoragefile.h"
+#include "virintset.h"
#include <sys/stat.h>
#include <fcntl.h>
@@ -133,6 +134,65 @@ VIR_ENUM_IMPL(qemuDomainFSDriver, VIR_DO
/**
+ * qemuVirCommandGetFDSet:
+ * @cmd: the command to modify
+ * @fd: fd to reassign to the child
Missing documentation for @opaque; but at the level of libvirt, we know
what it is, and might as well call it 'name' instead of 'opaque' (it is
only qemu that is treating it as opaque).
+ *
+ * Get the parameters for the the QEMU -add-fd command line option
+ * for the given file descriptor. The file descriptor must previously
+ * have been 'transferred' in a virCommandTransfer() call.
+ * This function for example returns "set=10,fd=20".
+ */
+static char *
+qemuVirCommandGetFDSet(virIntSetPtr fdset, int fd,
+ const char *opaque)
+{
+ char *result = NULL;
+ int idx = virIntSetIndexOf(fdset, fd);
+
+ if (idx >= 0) {
+ if (virAsprintf(&result, "set=%d,fd=%d%s%s", idx, fd,
+ (opaque) ? ",opaque=" : "",
+ (opaque) ? opaque : "") < 0) {
Careful - opaque might contain commas, in which case you need to pass it
through virBufferEscape(buf, ',', ",", "%s", opaque) instead
of using it
directly.
+ virReportOOMError();
+ }
+ } else {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("file descriptor %d not inn fdset"), fd);
s/inn/in/
+ }
+
+ return result;
+}
+
+/**
+ * qemuVirCommandGetDevSet:
+ * @cmd: the command to modify
+ * @fd: fd to reassign to the child
+ *
+ * Get the parameters for the the QEMU path= parameter where a file
+ * descriptor is accessed via a file descriptor set, for example
+ * /dev/fdset/10. The file descriptor must previously have been
+ * 'transferred' in a virCommandTransfer() call.
+ */
+static char *
+qemuVirCommandGetDevSet(virIntSetPtr fdset, int fd)
+{
+ char *result = NULL;
+ int idx = virIntSetIndexOf(fdset, fd);
+
+ if (idx >= 0) {
+ if (virAsprintf(&result, "/dev/fdset/%d", idx) < 0) {
+ virReportOOMError();
+ }
+ } else {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("file descriptor %d not in fdset"), fd);
+ }
+ return result;
+}
+
Do these two functions need 'vir' in their name, or could we get by with
qemuCommandGetFDSet and qemuCommandGetDevSet?
+
+/**
* qemuPhysIfaceConnect:
* @def: the definition of the VM (needed by 802.1Qbh and audit)
* @driver: pointer to the driver instance
@@ -2223,11 +2283,65 @@ no_memory:
goto cleanup;
}
+static char *
+qemuCreatePathForFilePath(const char *fmt, const char *path,
+ virCommandPtr cmd, virIntSetPtr fdset,
+ qemuCapsPtr caps)
+{
+ char *res = NULL;
+ char *devset = NULL;
+ char *fdsetstr = NULL;
+
+ /*
+ * cmd == NULL hints at hotplugging; use old way of doing things
+ */
+ if (!cmd || !qemuCapsGet(caps, QEMU_CAPS_ADD_FD)) {
+ if (virAsprintf(&res, fmt, path) < 0) {
+ virReportOOMError();
+ return NULL;
+ }
See my thoughts below about printing into an existing virBuffer instead
of allocating a new string with virAsprintf.
+ } else {
+ int fd = open(path, O_RDWR);
Won't work to call raw open(); you MUST go through qemuOpenFile() (which
works around situations such as libvirtd running as root but needing to
open a file on a root-squash NFS share); also, this would be the point
where we apply security manager labeling to the opened fd. In fact, the
caller probably ought to be able supply additional flags to open, such
as deciding whether O_CREAT, O_TRUNC, etc. makes sense on a per-caller
basis.
+ if (fd < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Could not open %s"), path);
When a system call fails, we should report it with
virReportSystemError(errno, _("..."))
+ return NULL;
+ }
+ virCommandTransferFD(cmd, fd);
+ if (virIntSetAdd(fdset, fd) == -ENOMEM) {
I would check for < 0, not just == -ENOMEM, to catch all possible errors
(although ENOMEM is the only current error).
+ virReportOOMError();
+ return NULL;
+ }
+ devset = qemuVirCommandGetDevSet(fdset, fd);
+ if (!devset)
+ return NULL;
+ if (virAsprintf(&res, fmt, devset) < 0)
+ goto error;
Missed a virReportOOMError().
+
+ fdsetstr = qemuVirCommandGetFDSet(fdset, fd, path);
+ if (!fdsetstr)
+ goto error;
+ virCommandAddArgList(cmd, "-add-fd", fdsetstr, NULL);
+ }
+
Below here...
+ VIR_FREE(devset);
+ VIR_FREE(fdsetstr);
+
+ return res;
+
+error:
+ VIR_FREE(devset);
+ VIR_FREE(fdset);
+ return NULL;
...it is simpler to write:
cleanup:
VIR_FREE(devset);
VIR_FREE(fdsetstr);
return res;
since all error paths already ensured res was NULL; then you aren't
duplicating the cleanup paths.
+}
+
char *
qemuBuildDriveStr(virConnectPtr conn ATTRIBUTE_UNUSED,
virDomainDiskDefPtr disk,
bool bootable,
- qemuCapsPtr caps)
+ qemuCapsPtr caps,
+ virCommandPtr cmd,
+ virIntSetPtr fdset)
{
virBuffer opt = VIR_BUFFER_INITIALIZER;
const char *bus = virDomainDiskQEMUBusTypeToString(disk->bus);
@@ -2235,6 +2349,7 @@ qemuBuildDriveStr(virConnectPtr conn ATT
virDomainDiskGeometryTransTypeToString(disk->geometry.trans);
int idx = virDiskNameToIndex(disk->dst);
int busid = -1, unitid = -1;
+ char *pathfdset = NULL;
if (idx < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR,
@@ -2389,7 +2504,12 @@ qemuBuildDriveStr(virConnectPtr conn ATT
"block type disk"));
goto error;
}
- virBufferEscape(&opt, ',', ",", "file=%s,",
disk->src);
+ pathfdset = qemuCreatePathForFilePath("file=%s", disk->src,
+ cmd, fdset, caps);
+ if (!pathfdset)
+ goto error;
+ virBufferEscape(&opt, ',', ",", "%s,",
pathfdset);
Hmm, rather than malloc'ing pathfdset only to then copy it into the
malloc'd opt buffer, why not just print the path directly into the opt
buffer to begin with? In other words, instead of having
qemuCreatePathForFilePath return a char*, instead, have it take a
virBufferPtr argument (the existing &opt), and merely append the right
contents (path with comma escaping for old qemu, and /dev/fdset/%n for
new qemu).
+ VIR_FREE(pathfdset);
}
}
if (qemuCapsGet(caps, QEMU_CAPS_DEVICE))
@@ -2886,11 +3006,14 @@ error:
char *qemuBuildFSStr(virDomainFSDefPtr fs,
- qemuCapsPtr caps ATTRIBUTE_UNUSED)
+ qemuCapsPtr caps ATTRIBUTE_UNUSED,
+ virCommandPtr cmd,
+ virIntSetPtr fdset)
{
virBuffer opt = VIR_BUFFER_INITIALIZER;
const char *driver = qemuDomainFSDriverTypeToString(fs->fsdriver);
const char *wrpolicy = virDomainFSWrpolicyTypeToString(fs->wrpolicy);
+ char *pathfdset = NULL;
if (fs->type != VIR_DOMAIN_FS_TYPE_MOUNT) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
@@ -2935,7 +3058,10 @@ char *qemuBuildFSStr(virDomainFSDefPtr f
}
virBufferAsprintf(&opt, ",id=%s%s", QEMU_FSDEV_HOST_PREFIX,
fs->info.alias);
- virBufferAsprintf(&opt, ",path=%s", fs->src);
+ pathfdset = qemuCreatePathForFilePath("path=%s", fs->src, cmd, fdset,
caps);
+ if (!pathfdset)
+ goto error;
+ virBufferAsprintf(&opt, ",%s", pathfdset);
Hmm - does fd passing really work for filesystem pass through? After
all, that implies that qemu will be opening multiple files beneath the
directory being passed as the source name. It may be that fdset and fs
passthrough are incompatible.
if (fs->readonly) {
if (qemuCapsGet(caps, QEMU_CAPS_FSDEV_READONLY)) {
@@ -2952,10 +3078,12 @@ char *qemuBuildFSStr(virDomainFSDefPtr f
virReportOOMError();
goto error;
}
+ VIR_FREE(pathfdset);
return virBufferContentAndReset(&opt);
error:
+ VIR_FREE(pathfdset);
virBufferFreeAndReset(&opt);
return NULL;
}
@@ -3884,15 +4012,16 @@ qemuBuildUSBHostdevUsbDevStr(virDomainHo
}
-
/* This function outputs a -chardev command line option which describes only the
* host side of the character device */
static char *
qemuBuildChrChardevStr(virDomainChrSourceDefPtr dev, const char *alias,
- qemuCapsPtr caps)
+ qemuCapsPtr caps, virCommandPtr cmd,
+ virIntSetPtr fdset)
{
virBuffer buf = VIR_BUFFER_INITIALIZER;
bool telnet;
+ char *pathfdset = NULL;
switch (dev->type) {
case VIR_DOMAIN_CHR_TYPE_NULL:
@@ -3908,19 +4037,31 @@ qemuBuildChrChardevStr(virDomainChrSourc
break;
case VIR_DOMAIN_CHR_TYPE_DEV:
- virBufferAsprintf(&buf, "%s,id=char%s,path=%s",
+ pathfdset = qemuCreatePathForFilePath("path=%s",
dev->data.file.path,
+ cmd, fdset, caps);
+ if (!pathfdset)
+ goto error;
+ virBufferAsprintf(&buf, "%s,id=char%s,%s",
STRPREFIX(alias, "parallel") ? "parport" :
"tty",
- alias, dev->data.file.path);
+ alias, pathfdset);
Again, printing directly into the existing &buf, instead of allocating a
new string only to copy pathfdset into the existing &buf, would be
nicer. Something like:
virBufferAsprintf(&buf, "%s,id=char%s,",
STRPREFIX(alias, "parallel") ? "parport" :
"tty",
alias);
if (qemuCreatePathForFile(&buf, "path=%s", dev->data.file.path,
cmd, fdset, caps) < 0)
goto error;
I'm also working on some patches to expose the QMP commands via
src/qemu/qemu_monitor.h, which will work in tandem with this series,
once we figure out in what direction we are heading.
--
Eric Blake eblake redhat com +1-919-301-3266
Libvirt virtualization library
http://libvirt.org