This allows libvirt to open the PCI device sysfs config file prior
to dropping privileges so qemu can access the full config space.
Without this, a de-privileged qemu can only access the first 64
bytes of config space.
Signed-off-by: Alex Williamson <alex.williamson(a)redhat.com>
---
Note: this is still dependent on the configfd option being accepted
into qemu/kvm.
v2:
- fix qemuargs, two separate args (thanks Chris)
- don't hardcode pci segment
- error message of open() fail
v3:
- add hotplug support
- incorporate Daniel's comments
src/qemu/qemu_conf.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++-
src/qemu/qemu_conf.h | 6 ++-
src/qemu/qemu_driver.c | 28 +++++++++++++-
3 files changed, 128 insertions(+), 4 deletions(-)
diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c
index 2755545..926ce48 100644
--- a/src/qemu/qemu_conf.c
+++ b/src/qemu/qemu_conf.c
@@ -1350,6 +1350,48 @@ fail:
return -1;
}
+static void qemudParsePCIDeviceStrs(const char *qemu, unsigned long long *flags)
+{
+ const char *const qemuarg[] = { qemu, "-device", "pci-assign,?",
NULL };
+ const char *const qemuenv[] = { "LC_ALL=C", NULL };
+ pid_t child;
+ int status;
+ int newstderr = -1;
+
+ if (virExec(qemuarg, qemuenv, NULL,
+ &child, -1, NULL, &newstderr, VIR_EXEC_CLEAR_CAPS) < 0)
+ return;
+
+ char *pciassign = NULL;
+ enum { MAX_PCI_OUTPUT_SIZE = 1024*4 };
+ int len = virFileReadLimFD(newstderr, MAX_PCI_OUTPUT_SIZE, &pciassign);
+ if (len < 0) {
+ virReportSystemError(errno,
+ _("Unable to read %s pci-assign device output"),
+ qemu);
+ goto cleanup;
+ }
+
+ if (strstr(pciassign, "pci-assign.configfd"))
+ *flags |= QEMUD_CMD_FLAG_PCI_CONFIGFD;
+
+cleanup:
+ VIR_FREE(pciassign);
+ close(newstderr);
+rewait:
+ if (waitpid(child, &status, 0) != child) {
+ if (errno == EINTR)
+ goto rewait;
+
+ VIR_ERROR(_("Unexpected exit status from qemu %d pid %lu"),
+ WEXITSTATUS(status), (unsigned long)child);
+ }
+ if (WEXITSTATUS(status) != 0) {
+ VIR_WARN("Unexpected exit status '%d', qemu probably failed",
+ WEXITSTATUS(status));
+ }
+}
+
int qemudExtractVersionInfo(const char *qemu,
unsigned int *retversion,
unsigned long long *retflags) {
@@ -1383,6 +1425,9 @@ int qemudExtractVersionInfo(const char *qemu,
&version, &is_kvm, &kvm_version) == -1)
goto cleanup2;
+ if (flags & QEMUD_CMD_FLAG_DEVICE)
+ qemudParsePCIDeviceStrs(qemu, &flags);
+
if (retversion)
*retversion = version;
if (retflags)
@@ -2892,8 +2937,33 @@ error:
}
+int
+qemudOpenPCIConfig(virDomainHostdevDefPtr dev)
+{
+ char *path = NULL;
+ int configfd = -1;
+
+ if (virAsprintf(&path,
"/sys/bus/pci/devices/%04x:%02x:%02x.%01x/config",
+ dev->source.subsys.u.pci.domain,
+ dev->source.subsys.u.pci.bus,
+ dev->source.subsys.u.pci.slot,
+ dev->source.subsys.u.pci.function) < 0) {
+ virReportOOMError();
+ return -1;
+ }
+
+ configfd = open(path, O_RDWR, 0);
+
+ if (configfd < 0)
+ virReportSystemError(errno, _("Failed opening %s"), path);
+
+ VIR_FREE(path);
+
+ return configfd;
+}
+
char *
-qemuBuildPCIHostdevDevStr(virDomainHostdevDefPtr dev)
+qemuBuildPCIHostdevDevStr(virDomainHostdevDefPtr dev, const char *configfd)
{
virBuffer buf = VIR_BUFFER_INITIALIZER;
@@ -2903,6 +2973,8 @@ qemuBuildPCIHostdevDevStr(virDomainHostdevDefPtr dev)
dev->source.subsys.u.pci.slot,
dev->source.subsys.u.pci.function);
virBufferVSprintf(&buf, ",id=%s", dev->info.alias);
+ if (configfd && *configfd)
+ virBufferVSprintf(&buf, ",configfd=%s", configfd);
if (qemuBuildDeviceAddressStr(&buf, &dev->info) < 0)
goto error;
@@ -4605,8 +4677,30 @@ int qemudBuildCommandLine(virConnectPtr conn,
if (hostdev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS &&
hostdev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) {
if (qemuCmdFlags & QEMUD_CMD_FLAG_DEVICE) {
+ char *configfd_name = NULL;
+ if (qemuCmdFlags & QEMUD_CMD_FLAG_PCI_CONFIGFD) {
+ int configfd = qemudOpenPCIConfig(hostdev);
+
+ if (configfd >= 0) {
+ if (virAsprintf(&configfd_name, "%d", configfd)
< 0) {
+ close(configfd);
+ virReportOOMError();
+ goto no_memory;
+ }
+
+ if (VIR_REALLOC_N(*vmfds, (*nvmfds)+1) < 0) {
+ VIR_FREE(configfd_name);
+ close(configfd);
+ goto no_memory;
+ }
+
+ (*vmfds)[(*nvmfds)++] = configfd;
+ }
+ }
ADD_ARG_LIT("-device");
- if (!(devstr = qemuBuildPCIHostdevDevStr(hostdev)))
+ devstr = qemuBuildPCIHostdevDevStr(hostdev, configfd_name);
+ VIR_FREE(configfd_name);
+ if (!devstr)
goto error;
ADD_ARG(devstr);
} else if (qemuCmdFlags & QEMUD_CMD_FLAG_PCIDEVICE) {
diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h
index 8fd8d79..d5ce4ce 100644
--- a/src/qemu/qemu_conf.h
+++ b/src/qemu/qemu_conf.h
@@ -89,6 +89,7 @@ enum qemud_cmd_flags {
QEMUD_CMD_FLAG_NO_HPET = (1LL << 33), /* -no-hpet flag is supported */
QEMUD_CMD_FLAG_NO_KVM_PIT = (1LL << 34), /* -no-kvm-pit-reinjection
supported */
QEMUD_CMD_FLAG_TDF = (1LL << 35), /* -tdf flag (user-mode pit
catchup) */
+ QEMUD_CMD_FLAG_PCI_CONFIGFD = (1LL << 36), /* pci-assign.configfd */
};
/* Main driver state */
@@ -245,7 +246,10 @@ char * qemuBuildSoundDevStr(virDomainSoundDefPtr sound);
/* Legacy, pre device support */
char * qemuBuildPCIHostdevPCIDevStr(virDomainHostdevDefPtr dev);
/* Current, best practice */
-char * qemuBuildPCIHostdevDevStr(virDomainHostdevDefPtr dev);
+char * qemuBuildPCIHostdevDevStr(virDomainHostdevDefPtr dev,
+ const char *configfd);
+
+int qemudOpenPCIConfig(virDomainHostdevDefPtr dev);
/* Current, best practice */
char * qemuBuildChrChardevStr(virDomainChrDefPtr dev);
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index afdc718..52f2374 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -7655,6 +7655,8 @@ static int qemudDomainAttachHostPciDevice(struct qemud_driver
*driver,
pciDevice *pci;
int ret;
char *devstr = NULL;
+ int configfd = -1;
+ char *configfd_name = NULL;
if (VIR_REALLOC_N(vm->def->hostdevs, vm->def->nhostdevs+1) < 0) {
virReportOOMError();
@@ -7685,8 +7687,26 @@ static int qemudDomainAttachHostPciDevice(struct qemud_driver
*driver,
goto error;
if (qemuDomainPCIAddressEnsureAddr(priv->pciaddrs, &hostdev->info) <
0)
goto error;
+ if (qemuCmdFlags & QEMUD_CMD_FLAG_PCI_CONFIGFD) {
+ configfd = qemudOpenPCIConfig(hostdev);
+ if (configfd >= 0) {
+ if (virAsprintf(&configfd_name, "fd-%s",
+ hostdev->info.alias) < 0) {
+ virReportOOMError();
+ goto error;
+ }
+
+ qemuDomainObjEnterMonitorWithDriver(driver, vm);
+ if (qemuMonitorSendFileHandle(priv->mon, configfd_name,
+ configfd) < 0) {
+ qemuDomainObjExitMonitorWithDriver(driver, vm);
+ goto error;
+ }
+ qemuDomainObjExitMonitorWithDriver(driver, vm);
+ }
+ }
- if (!(devstr = qemuBuildPCIHostdevDevStr(hostdev)))
+ if (!(devstr = qemuBuildPCIHostdevDevStr(hostdev, configfd_name)))
goto error;
qemuDomainObjEnterMonitorWithDriver(driver, vm);
@@ -7710,6 +7730,9 @@ static int qemudDomainAttachHostPciDevice(struct qemud_driver
*driver,
vm->def->hostdevs[vm->def->nhostdevs++] = hostdev;
VIR_FREE(devstr);
+ VIR_FREE(configfd_name);
+ if (configfd >= 0)
+ close(configfd);
return 0;
@@ -7721,6 +7744,9 @@ error:
VIR_FREE(devstr);
pciDeviceListDel(driver->activePciHostdevs, pci);
+ VIR_FREE(configfd_name);
+ if (configfd >= 0)
+ close(configfd);
return -1;
}