[libvirt] [RFC PATCH] qemu: Add suport for pci-assign.configfd option

Note: The configfd option for qemu is still pending upstream, so this patch will depend on that being accepted. This patch also depends on the the s/tapfd/vmfd/ rename patch posted to the list. This allows libvirt to open the PCI device sysfs config file prior to dropping privileges so qemu can access the full config space. Without this, a de-privileged qemu can only access the first 64 bytes of config space. Signed-off-by: Alex Williamson <alex.williamson@redhat.com> --- src/qemu/qemu_conf.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++- src/qemu/qemu_conf.h | 8 ++++- src/qemu/qemu_driver.c | 2 + 3 files changed, 91 insertions(+), 4 deletions(-) diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c index d7bc798..a41012c 100644 --- a/src/qemu/qemu_conf.c +++ b/src/qemu/qemu_conf.c @@ -1345,6 +1345,48 @@ fail: return -1; } +void qemudParsePCIDeviceStrs(const char *qemu, unsigned long long *flags) +{ + const char *const qemuarg[] = { qemu, "-device pci-assign,?", NULL }; + const char *const qemuenv[] = { "LC_ALL=C", NULL }; + pid_t child; + int status; + int newstderr = -1; + + if (virExec(qemuarg, qemuenv, NULL, + &child, -1, NULL, &newstderr, VIR_EXEC_CLEAR_CAPS) < 0) + return; + + char *pciassign = NULL; + enum { MAX_PCI_OUTPUT_SIZE = 1024*4 }; + int len = virFileReadLimFD(newstderr, MAX_PCI_OUTPUT_SIZE, &pciassign); + if (len < 0) { + virReportSystemError(errno, + _("Unable to read %s pci-assign device output"), + qemu); + goto cleanup; + } + + if (strstr(pciassign, "pci-assign.configfd")) + *flags |= QEMUD_CMD_FLAG_PCI_CONFIGFD; + +cleanup: + VIR_FREE(pciassign); + close(newstderr); +rewait: + if (waitpid(child, &status, 0) != child) { + if (errno == EINTR) + goto rewait; + + VIR_ERROR(_("Unexpected exit status from qemu %d pid %lu"), + WEXITSTATUS(status), (unsigned long)child); + } + if (WEXITSTATUS(status) != 0) { + VIR_WARN("Unexpected exit status '%d', qemu probably failed", + WEXITSTATUS(status)); + } +} + int qemudExtractVersionInfo(const char *qemu, unsigned int *retversion, unsigned long long *retflags) { @@ -1378,6 +1420,9 @@ int qemudExtractVersionInfo(const char *qemu, &version, &is_kvm, &kvm_version) == -1) goto cleanup2; + if (flags & QEMUD_CMD_FLAG_DEVICE) + qemudParsePCIDeviceStrs(qemu, &flags); + if (retversion) *retversion = version; if (retflags) @@ -2887,8 +2932,29 @@ error: } +int +qemudOpenPCIConfig(virDomainHostdevDefPtr dev) +{ + char *path = NULL; + int configfd = -1; + + /* XXX don't hard code segment */ + if (virAsprintf(&path, "/sys/bus/pci/devices/0000:%02x:%02x.%01x/config", + dev->source.subsys.u.pci.bus, dev->source.subsys.u.pci.slot, + dev->source.subsys.u.pci.function) < 0) { + virReportOOMError(); + return -1; + } + + configfd = open(path, O_RDWR, 0); + + VIR_FREE(path); + + return configfd; +} + char * -qemuBuildPCIHostdevDevStr(virDomainHostdevDefPtr dev) +qemuBuildPCIHostdevDevStr(virDomainHostdevDefPtr dev, int configfd) { virBuffer buf = VIR_BUFFER_INITIALIZER; @@ -2898,6 +2964,8 @@ qemuBuildPCIHostdevDevStr(virDomainHostdevDefPtr dev) dev->source.subsys.u.pci.slot, dev->source.subsys.u.pci.function); virBufferVSprintf(&buf, ",id=%s", dev->info.alias); + if (configfd >= 0) + virBufferVSprintf(&buf, ",configfd=%d", configfd); if (qemuBuildDeviceAddressStr(&buf, &dev->info) < 0) goto error; @@ -4600,8 +4668,21 @@ int qemudBuildCommandLine(virConnectPtr conn, if (hostdev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS && hostdev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) { if (qemuCmdFlags & QEMUD_CMD_FLAG_DEVICE) { + int configfd = -1; + if (qemuCmdFlags & QEMUD_CMD_FLAG_PCI_CONFIGFD) { + configfd = qemudOpenPCIConfig(hostdev); + + if (configfd >= 0) { + if (VIR_REALLOC_N(*vmfds, (*nvmfds)+1) < 0) { + close(configfd); + goto no_memory; + } + + (*vmfds)[(*nvmfds)++] = configfd; + } + } ADD_ARG_LIT("-device"); - if (!(devstr = qemuBuildPCIHostdevDevStr(hostdev))) + if (!(devstr = qemuBuildPCIHostdevDevStr(hostdev, configfd))) goto error; ADD_ARG(devstr); } else if (qemuCmdFlags & QEMUD_CMD_FLAG_PCIDEVICE) { diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h index a101e47..64fab60 100644 --- a/src/qemu/qemu_conf.h +++ b/src/qemu/qemu_conf.h @@ -88,6 +88,7 @@ enum qemud_cmd_flags { QEMUD_CMD_FLAG_NO_HPET = (1LL << 33), /* -no-hpet flag is supported */ QEMUD_CMD_FLAG_NO_KVM_PIT = (1LL << 34), /* -no-kvm-pit-reinjection supported */ QEMUD_CMD_FLAG_TDF = (1LL << 35), /* -tdf flag (user-mode pit catchup) */ + QEMUD_CMD_FLAG_PCI_CONFIGFD = (1LL << 36), /* pci-assign.configfd */ }; /* Main driver state */ @@ -190,6 +191,9 @@ int qemudParseHelpStr (const char *qemu, unsigned int *is_kvm, unsigned int *kvm_version); +void qemudParsePCIDeviceStrs (const char *qemu, + unsigned long long *qemuCmdFlags); + int qemudBuildCommandLine (virConnectPtr conn, struct qemud_driver *driver, virDomainDefPtr def, @@ -242,7 +246,9 @@ char * qemuBuildSoundDevStr(virDomainSoundDefPtr sound); /* Legacy, pre device support */ char * qemuBuildPCIHostdevPCIDevStr(virDomainHostdevDefPtr dev); /* Current, best practice */ -char * qemuBuildPCIHostdevDevStr(virDomainHostdevDefPtr dev); +char * qemuBuildPCIHostdevDevStr(virDomainHostdevDefPtr dev, int configfd); + +int qemudOpenPCIConfig(virDomainHostdevDefPtr dev); /* Current, best practice */ char * qemuBuildChrChardevStr(virDomainChrDefPtr dev); diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index dd5bd24..219a973 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -7567,7 +7567,7 @@ static int qemudDomainAttachHostPciDevice(struct qemud_driver *driver, if (qemuDomainPCIAddressEnsureAddr(priv->pciaddrs, &hostdev->info) < 0) goto error; - if (!(devstr = qemuBuildPCIHostdevDevStr(hostdev))) + if (!(devstr = qemuBuildPCIHostdevDevStr(hostdev, -1))) goto error; }

* Alex Williamson (alex.williamson@redhat.com) wrote:
@@ -1378,6 +1420,9 @@ int qemudExtractVersionInfo(const char *qemu, &version, &is_kvm, &kvm_version) == -1) goto cleanup2;
+ if (flags & QEMUD_CMD_FLAG_DEVICE) + qemudParsePCIDeviceStrs(qemu, &flags); +
looks like this is called early enough for both cmdline and monitor interface
if (retversion) *retversion = version; if (retflags) @@ -2887,8 +2932,29 @@ error: }
+int +qemudOpenPCIConfig(virDomainHostdevDefPtr dev) +{ + char *path = NULL; + int configfd = -1; + + /* XXX don't hard code segment */
You don't need to, just...
+ if (virAsprintf(&path, "/sys/bus/pci/devices/0000:%02x:%02x.%01x/config", / s/0000/%04x/ --------------------------------------
+ dev->source.subsys.u.pci.bus, dev->source.subsys.u.pci.slot,
and add dev->source.subsys.u.pci.domain
+ dev->source.subsys.u.pci.function) < 0) { + virReportOOMError(); + return -1; + } + + configfd = open(path, O_RDWR, 0);
Should probably report an open failure to aid debugging.
+ VIR_FREE(path); + + return configfd; +} + char * -qemuBuildPCIHostdevDevStr(virDomainHostdevDefPtr dev) +qemuBuildPCIHostdevDevStr(virDomainHostdevDefPtr dev, int configfd) { virBuffer buf = VIR_BUFFER_INITIALIZER;
@@ -2898,6 +2964,8 @@ qemuBuildPCIHostdevDevStr(virDomainHostdevDefPtr dev) dev->source.subsys.u.pci.slot, dev->source.subsys.u.pci.function); virBufferVSprintf(&buf, ",id=%s", dev->info.alias); + if (configfd >= 0) + virBufferVSprintf(&buf, ",configfd=%d", configfd); if (qemuBuildDeviceAddressStr(&buf, &dev->info) < 0) goto error;
@@ -4600,8 +4668,21 @@ int qemudBuildCommandLine(virConnectPtr conn, if (hostdev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS && hostdev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) { if (qemuCmdFlags & QEMUD_CMD_FLAG_DEVICE) { + int configfd = -1; + if (qemuCmdFlags & QEMUD_CMD_FLAG_PCI_CONFIGFD) { + configfd = qemudOpenPCIConfig(hostdev); + + if (configfd >= 0) { + if (VIR_REALLOC_N(*vmfds, (*nvmfds)+1) < 0) { + close(configfd); + goto no_memory; + } + + (*vmfds)[(*nvmfds)++] = configfd; + } + } ADD_ARG_LIT("-device"); - if (!(devstr = qemuBuildPCIHostdevDevStr(hostdev))) + if (!(devstr = qemuBuildPCIHostdevDevStr(hostdev, configfd))) goto error; ADD_ARG(devstr); } else if (qemuCmdFlags & QEMUD_CMD_FLAG_PCIDEVICE) { diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h index a101e47..64fab60 100644 --- a/src/qemu/qemu_conf.h +++ b/src/qemu/qemu_conf.h @@ -88,6 +88,7 @@ enum qemud_cmd_flags { QEMUD_CMD_FLAG_NO_HPET = (1LL << 33), /* -no-hpet flag is supported */ QEMUD_CMD_FLAG_NO_KVM_PIT = (1LL << 34), /* -no-kvm-pit-reinjection supported */ QEMUD_CMD_FLAG_TDF = (1LL << 35), /* -tdf flag (user-mode pit catchup) */ + QEMUD_CMD_FLAG_PCI_CONFIGFD = (1LL << 36), /* pci-assign.configfd */ };
/* Main driver state */ @@ -190,6 +191,9 @@ int qemudParseHelpStr (const char *qemu, unsigned int *is_kvm, unsigned int *kvm_version);
+void qemudParsePCIDeviceStrs (const char *qemu, + unsigned long long *qemuCmdFlags); + int qemudBuildCommandLine (virConnectPtr conn, struct qemud_driver *driver, virDomainDefPtr def, @@ -242,7 +246,9 @@ char * qemuBuildSoundDevStr(virDomainSoundDefPtr sound); /* Legacy, pre device support */ char * qemuBuildPCIHostdevPCIDevStr(virDomainHostdevDefPtr dev); /* Current, best practice */ -char * qemuBuildPCIHostdevDevStr(virDomainHostdevDefPtr dev); +char * qemuBuildPCIHostdevDevStr(virDomainHostdevDefPtr dev, int configfd); + +int qemudOpenPCIConfig(virDomainHostdevDefPtr dev);
/* Current, best practice */ char * qemuBuildChrChardevStr(virDomainChrDefPtr dev); diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index dd5bd24..219a973 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -7567,7 +7567,7 @@ static int qemudDomainAttachHostPciDevice(struct qemud_driver *driver, if (qemuDomainPCIAddressEnsureAddr(priv->pciaddrs, &hostdev->info) < 0) goto error;
- if (!(devstr = qemuBuildPCIHostdevDevStr(hostdev))) + if (!(devstr = qemuBuildPCIHostdevDevStr(hostdev, -1)))
Looks like this is dynamically assigning host PCI device to running guest, we should still be able to pass fd here. That will complicate the qemu side to be able to accept the fd. thanks, -chris

On Wed, 2010-05-19 at 22:34 -0700, Chris Wright wrote:
* Alex Williamson (alex.williamson@redhat.com) wrote:
@@ -1378,6 +1420,9 @@ int qemudExtractVersionInfo(const char *qemu, &version, &is_kvm, &kvm_version) == -1) goto cleanup2;
+ if (flags & QEMUD_CMD_FLAG_DEVICE) + qemudParsePCIDeviceStrs(qemu, &flags); +
looks like this is called early enough for both cmdline and monitor interface
Yes, it should be.
+ /* XXX don't hard code segment */
You don't need to, just...
+ if (virAsprintf(&path, "/sys/bus/pci/devices/0000:%02x:%02x.%01x/config", / s/0000/%04x/ --------------------------------------
+ dev->source.subsys.u.pci.bus, dev->source.subsys.u.pci.slot,
and add dev->source.subsys.u.pci.domain
Oh good, I meant to look, but obviously forgot. Fixed.
+ dev->source.subsys.u.pci.function) < 0) { + virReportOOMError(); + return -1; + } + + configfd = open(path, O_RDWR, 0);
Should probably report an open failure to aid debugging.
Done.
--- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -7567,7 +7567,7 @@ static int qemudDomainAttachHostPciDevice(struct qemud_driver *driver, if (qemuDomainPCIAddressEnsureAddr(priv->pciaddrs, &hostdev->info) < 0) goto error;
- if (!(devstr = qemuBuildPCIHostdevDevStr(hostdev))) + if (!(devstr = qemuBuildPCIHostdevDevStr(hostdev, -1)))
Looks like this is dynamically assigning host PCI device to running guest, we should still be able to pass fd here. That will complicate the qemu side to be able to accept the fd.
We could certainly open the file and pass an fd, but I don't know how to let qemu make use of it after it's been spawned, so I reverted to existing behavior here. Thanks, Alex

* Alex Williamson (alex.williamson@redhat.com) wrote:
On Wed, 2010-05-19 at 22:34 -0700, Chris Wright wrote:
* Alex Williamson (alex.williamson@redhat.com) wrote:
@@ -1378,6 +1420,9 @@ int qemudExtractVersionInfo(const char *qemu, &version, &is_kvm, &kvm_version) == -1) goto cleanup2;
+ if (flags & QEMUD_CMD_FLAG_DEVICE) + qemudParsePCIDeviceStrs(qemu, &flags); +
looks like this is called early enough for both cmdline and monitor interface
Yes, it should be.
+ /* XXX don't hard code segment */
You don't need to, just...
+ if (virAsprintf(&path, "/sys/bus/pci/devices/0000:%02x:%02x.%01x/config", / s/0000/%04x/ --------------------------------------
+ dev->source.subsys.u.pci.bus, dev->source.subsys.u.pci.slot,
and add dev->source.subsys.u.pci.domain
Oh good, I meant to look, but obviously forgot. Fixed.
+ dev->source.subsys.u.pci.function) < 0) { + virReportOOMError(); + return -1; + } + + configfd = open(path, O_RDWR, 0);
Should probably report an open failure to aid debugging.
Done.
--- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -7567,7 +7567,7 @@ static int qemudDomainAttachHostPciDevice(struct qemud_driver *driver, if (qemuDomainPCIAddressEnsureAddr(priv->pciaddrs, &hostdev->info) < 0) goto error;
- if (!(devstr = qemuBuildPCIHostdevDevStr(hostdev))) + if (!(devstr = qemuBuildPCIHostdevDevStr(hostdev, -1)))
Looks like this is dynamically assigning host PCI device to running guest, we should still be able to pass fd here. That will complicate the qemu side to be able to accept the fd.
We could certainly open the file and pass an fd, but I don't know how to let qemu make use of it after it's been spawned, so I reverted to existing behavior here. Thanks,
that needs to call to MonitorWriteWithFD or whatever the heck its called. and qemu can recv the fd w/ do_getfd path. however this will take more work on the qemu side thanks, -chris

This allows libvirt to open the PCI device sysfs config file prior to dropping privileges so qemu can access the full config space. Without this, a de-privileged qemu can only access the first 64 bytes of config space. Signed-off-by: Alex Williamson <alex.williamson@redhat.com> --- v2: - fix qemuargs, two separate args (thanks Chris) - don't hardcode pci segment - error message of open() fail src/qemu/qemu_conf.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++- src/qemu/qemu_conf.h | 8 ++++ src/qemu/qemu_driver.c | 2 + 3 files changed, 95 insertions(+), 4 deletions(-) diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c index d7bc798..24c360e 100644 --- a/src/qemu/qemu_conf.c +++ b/src/qemu/qemu_conf.c @@ -1345,6 +1345,48 @@ fail: return -1; } +void qemudParsePCIDeviceStrs(const char *qemu, unsigned long long *flags) +{ + const char *const qemuarg[] = { qemu, "-device", "pci-assign,?", NULL }; + const char *const qemuenv[] = { "LC_ALL=C", NULL }; + pid_t child; + int status; + int newstderr = -1; + + if (virExec(qemuarg, qemuenv, NULL, + &child, -1, NULL, &newstderr, VIR_EXEC_CLEAR_CAPS) < 0) + return; + + char *pciassign = NULL; + enum { MAX_PCI_OUTPUT_SIZE = 1024*4 }; + int len = virFileReadLimFD(newstderr, MAX_PCI_OUTPUT_SIZE, &pciassign); + if (len < 0) { + virReportSystemError(errno, + _("Unable to read %s pci-assign device output"), + qemu); + goto cleanup; + } + + if (strstr(pciassign, "pci-assign.configfd")) + *flags |= QEMUD_CMD_FLAG_PCI_CONFIGFD; + +cleanup: + VIR_FREE(pciassign); + close(newstderr); +rewait: + if (waitpid(child, &status, 0) != child) { + if (errno == EINTR) + goto rewait; + + VIR_ERROR(_("Unexpected exit status from qemu %d pid %lu"), + WEXITSTATUS(status), (unsigned long)child); + } + if (WEXITSTATUS(status) != 0) { + VIR_WARN("Unexpected exit status '%d', qemu probably failed", + WEXITSTATUS(status)); + } +} + int qemudExtractVersionInfo(const char *qemu, unsigned int *retversion, unsigned long long *retflags) { @@ -1378,6 +1420,9 @@ int qemudExtractVersionInfo(const char *qemu, &version, &is_kvm, &kvm_version) == -1) goto cleanup2; + if (flags & QEMUD_CMD_FLAG_DEVICE) + qemudParsePCIDeviceStrs(qemu, &flags); + if (retversion) *retversion = version; if (retflags) @@ -2887,8 +2932,33 @@ error: } +int +qemudOpenPCIConfig(virDomainHostdevDefPtr dev) +{ + char *path = NULL; + int configfd = -1; + + if (virAsprintf(&path, "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/config", + dev->source.subsys.u.pci.domain, + dev->source.subsys.u.pci.bus, + dev->source.subsys.u.pci.slot, + dev->source.subsys.u.pci.function) < 0) { + virReportOOMError(); + return -1; + } + + configfd = open(path, O_RDWR, 0); + + if (configfd < 0) + virReportSystemError(errno, _("Failed opening %s"), path); + + VIR_FREE(path); + + return configfd; +} + char * -qemuBuildPCIHostdevDevStr(virDomainHostdevDefPtr dev) +qemuBuildPCIHostdevDevStr(virDomainHostdevDefPtr dev, int configfd) { virBuffer buf = VIR_BUFFER_INITIALIZER; @@ -2898,6 +2968,8 @@ qemuBuildPCIHostdevDevStr(virDomainHostdevDefPtr dev) dev->source.subsys.u.pci.slot, dev->source.subsys.u.pci.function); virBufferVSprintf(&buf, ",id=%s", dev->info.alias); + if (configfd >= 0) + virBufferVSprintf(&buf, ",configfd=%d", configfd); if (qemuBuildDeviceAddressStr(&buf, &dev->info) < 0) goto error; @@ -4600,8 +4672,21 @@ int qemudBuildCommandLine(virConnectPtr conn, if (hostdev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS && hostdev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) { if (qemuCmdFlags & QEMUD_CMD_FLAG_DEVICE) { + int configfd = -1; + if (qemuCmdFlags & QEMUD_CMD_FLAG_PCI_CONFIGFD) { + configfd = qemudOpenPCIConfig(hostdev); + + if (configfd >= 0) { + if (VIR_REALLOC_N(*vmfds, (*nvmfds)+1) < 0) { + close(configfd); + goto no_memory; + } + + (*vmfds)[(*nvmfds)++] = configfd; + } + } ADD_ARG_LIT("-device"); - if (!(devstr = qemuBuildPCIHostdevDevStr(hostdev))) + if (!(devstr = qemuBuildPCIHostdevDevStr(hostdev, configfd))) goto error; ADD_ARG(devstr); } else if (qemuCmdFlags & QEMUD_CMD_FLAG_PCIDEVICE) { diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h index a101e47..64fab60 100644 --- a/src/qemu/qemu_conf.h +++ b/src/qemu/qemu_conf.h @@ -88,6 +88,7 @@ enum qemud_cmd_flags { QEMUD_CMD_FLAG_NO_HPET = (1LL << 33), /* -no-hpet flag is supported */ QEMUD_CMD_FLAG_NO_KVM_PIT = (1LL << 34), /* -no-kvm-pit-reinjection supported */ QEMUD_CMD_FLAG_TDF = (1LL << 35), /* -tdf flag (user-mode pit catchup) */ + QEMUD_CMD_FLAG_PCI_CONFIGFD = (1LL << 36), /* pci-assign.configfd */ }; /* Main driver state */ @@ -190,6 +191,9 @@ int qemudParseHelpStr (const char *qemu, unsigned int *is_kvm, unsigned int *kvm_version); +void qemudParsePCIDeviceStrs (const char *qemu, + unsigned long long *qemuCmdFlags); + int qemudBuildCommandLine (virConnectPtr conn, struct qemud_driver *driver, virDomainDefPtr def, @@ -242,7 +246,9 @@ char * qemuBuildSoundDevStr(virDomainSoundDefPtr sound); /* Legacy, pre device support */ char * qemuBuildPCIHostdevPCIDevStr(virDomainHostdevDefPtr dev); /* Current, best practice */ -char * qemuBuildPCIHostdevDevStr(virDomainHostdevDefPtr dev); +char * qemuBuildPCIHostdevDevStr(virDomainHostdevDefPtr dev, int configfd); + +int qemudOpenPCIConfig(virDomainHostdevDefPtr dev); /* Current, best practice */ char * qemuBuildChrChardevStr(virDomainChrDefPtr dev); diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index dd5bd24..219a973 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -7567,7 +7567,7 @@ static int qemudDomainAttachHostPciDevice(struct qemud_driver *driver, if (qemuDomainPCIAddressEnsureAddr(priv->pciaddrs, &hostdev->info) < 0) goto error; - if (!(devstr = qemuBuildPCIHostdevDevStr(hostdev))) + if (!(devstr = qemuBuildPCIHostdevDevStr(hostdev, -1))) goto error; }

On Thu, May 20, 2010 at 10:02:10AM -0400, Alex Williamson wrote:
This allows libvirt to open the PCI device sysfs config file prior to dropping privileges so qemu can access the full config space. Without this, a de-privileged qemu can only access the first 64 bytes of config space.
Signed-off-by: Alex Williamson <alex.williamson@redhat.com> ---
v2: - fix qemuargs, two separate args (thanks Chris) - don't hardcode pci segment - error message of open() fail
src/qemu/qemu_conf.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++- src/qemu/qemu_conf.h | 8 ++++ src/qemu/qemu_driver.c | 2 + 3 files changed, 95 insertions(+), 4 deletions(-)
diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c index d7bc798..24c360e 100644 --- a/src/qemu/qemu_conf.c +++ b/src/qemu/qemu_conf.c @@ -1345,6 +1345,48 @@ fail: return -1; }
+void qemudParsePCIDeviceStrs(const char *qemu, unsigned long long *flags)
Minor point, that this function can be declared static.
+{ + const char *const qemuarg[] = { qemu, "-device", "pci-assign,?", NULL }; + const char *const qemuenv[] = { "LC_ALL=C", NULL }; + pid_t child; + int status; + int newstderr = -1; + + if (virExec(qemuarg, qemuenv, NULL, + &child, -1, NULL, &newstderr, VIR_EXEC_CLEAR_CAPS) < 0) + return; + + char *pciassign = NULL; + enum { MAX_PCI_OUTPUT_SIZE = 1024*4 }; + int len = virFileReadLimFD(newstderr, MAX_PCI_OUTPUT_SIZE, &pciassign); + if (len < 0) { + virReportSystemError(errno, + _("Unable to read %s pci-assign device output"), + qemu); + goto cleanup; + } + + if (strstr(pciassign, "pci-assign.configfd")) + *flags |= QEMUD_CMD_FLAG_PCI_CONFIGFD; + +cleanup: + VIR_FREE(pciassign); + close(newstderr); +rewait: + if (waitpid(child, &status, 0) != child) { + if (errno == EINTR) + goto rewait; + + VIR_ERROR(_("Unexpected exit status from qemu %d pid %lu"), + WEXITSTATUS(status), (unsigned long)child); + } + if (WEXITSTATUS(status) != 0) { + VIR_WARN("Unexpected exit status '%d', qemu probably failed", + WEXITSTATUS(status)); + } +} + int qemudExtractVersionInfo(const char *qemu, unsigned int *retversion, unsigned long long *retflags) { @@ -1378,6 +1420,9 @@ int qemudExtractVersionInfo(const char *qemu, &version, &is_kvm, &kvm_version) == -1) goto cleanup2;
+ if (flags & QEMUD_CMD_FLAG_DEVICE) + qemudParsePCIDeviceStrs(qemu, &flags); + if (retversion) *retversion = version; if (retflags) @@ -2887,8 +2932,33 @@ error: }
+int +qemudOpenPCIConfig(virDomainHostdevDefPtr dev) +{ + char *path = NULL; + int configfd = -1; + + if (virAsprintf(&path, "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/config", + dev->source.subsys.u.pci.domain, + dev->source.subsys.u.pci.bus, + dev->source.subsys.u.pci.slot, + dev->source.subsys.u.pci.function) < 0) { + virReportOOMError(); + return -1; + } + + configfd = open(path, O_RDWR, 0); + + if (configfd < 0) + virReportSystemError(errno, _("Failed opening %s"), path); + + VIR_FREE(path); + + return configfd; +} + char * -qemuBuildPCIHostdevDevStr(virDomainHostdevDefPtr dev) +qemuBuildPCIHostdevDevStr(virDomainHostdevDefPtr dev, int configfd) { virBuffer buf = VIR_BUFFER_INITIALIZER;
@@ -2898,6 +2968,8 @@ qemuBuildPCIHostdevDevStr(virDomainHostdevDefPtr dev) dev->source.subsys.u.pci.slot, dev->source.subsys.u.pci.function); virBufferVSprintf(&buf, ",id=%s", dev->info.alias); + if (configfd >= 0) + virBufferVSprintf(&buf, ",configfd=%d", configfd);
For hotplug to work, it will actually be neccessary to make this take a const char * string and use %s. For CLI args the string will just be the FD number formatted normally, for hotplug the string will be a symbolic FD name.
@@ -4600,8 +4672,21 @@ int qemudBuildCommandLine(virConnectPtr conn, if (hostdev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS && hostdev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) { if (qemuCmdFlags & QEMUD_CMD_FLAG_DEVICE) { + int configfd = -1;
const char configfdstr[20];
+ if (qemuCmdFlags & QEMUD_CMD_FLAG_PCI_CONFIGFD) { + configfd = qemudOpenPCIConfig(hostdev); + + if (configfd >= 0) {
snprintf(configfdstr, sizeof(configfdstr), "%d", configfd);
+ if (VIR_REALLOC_N(*vmfds, (*nvmfds)+1) < 0) { + close(configfd); + goto no_memory; + } + + (*vmfds)[(*nvmfds)++] = configfd; + } + } ADD_ARG_LIT("-device"); - if (!(devstr = qemuBuildPCIHostdevDevStr(hostdev))) + if (!(devstr = qemuBuildPCIHostdevDevStr(hostdev, configfd)))
And pass the configfdstr here.
goto error; ADD_ARG(devstr); } else if (qemuCmdFlags & QEMUD_CMD_FLAG_PCIDEVICE) { diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h index a101e47..64fab60 100644 --- a/src/qemu/qemu_conf.h +++ b/src/qemu/qemu_conf.h @@ -88,6 +88,7 @@ enum qemud_cmd_flags { QEMUD_CMD_FLAG_NO_HPET = (1LL << 33), /* -no-hpet flag is supported */ QEMUD_CMD_FLAG_NO_KVM_PIT = (1LL << 34), /* -no-kvm-pit-reinjection supported */ QEMUD_CMD_FLAG_TDF = (1LL << 35), /* -tdf flag (user-mode pit catchup) */ + QEMUD_CMD_FLAG_PCI_CONFIGFD = (1LL << 36), /* pci-assign.configfd */ };
/* Main driver state */ @@ -190,6 +191,9 @@ int qemudParseHelpStr (const char *qemu, unsigned int *is_kvm, unsigned int *kvm_version);
+void qemudParsePCIDeviceStrs (const char *qemu, + unsigned long long *qemuCmdFlags); + int qemudBuildCommandLine (virConnectPtr conn, struct qemud_driver *driver, virDomainDefPtr def, @@ -242,7 +246,9 @@ char * qemuBuildSoundDevStr(virDomainSoundDefPtr sound); /* Legacy, pre device support */ char * qemuBuildPCIHostdevPCIDevStr(virDomainHostdevDefPtr dev); /* Current, best practice */ -char * qemuBuildPCIHostdevDevStr(virDomainHostdevDefPtr dev); +char * qemuBuildPCIHostdevDevStr(virDomainHostdevDefPtr dev, int configfd); + +int qemudOpenPCIConfig(virDomainHostdevDefPtr dev);
/* Current, best practice */ char * qemuBuildChrChardevStr(virDomainChrDefPtr dev); diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index dd5bd24..219a973 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -7567,7 +7567,7 @@ static int qemudDomainAttachHostPciDevice(struct qemud_driver *driver, if (qemuDomainPCIAddressEnsureAddr(priv->pciaddrs, &hostdev->info) < 0) goto error;
- if (!(devstr = qemuBuildPCIHostdevDevStr(hostdev))) + if (!(devstr = qemuBuildPCIHostdevDevStr(hostdev, -1))) goto error; }
This is the only thing still needing work. On the libvirt side there is a function qemuMonitorSendFileHandle() that can be used to give QEMU the pre-opened file handle. You the 'fdname' parameter is what's passed into the qemuBuildPCIHostdevDevStr() earlier as the named file handle. Regards, Daniel -- |: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://deltacloud.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|
participants (3)
-
Alex Williamson
-
Chris Wright
-
Daniel P. Berrange