This patch will get the iommu group for host devices by XML configuration
of the vfio host bridge controller or from the sysfs.
Like other architectures, the devices in the iommu group need to be
detached manually before the guest is created.
On pseries, the iommu group can be shared by multiple host devices and
they would share the same spapr-vfio-host-bus controller. A new
controller is added for every new iommu group. Every spapr-vfio-host-bridge
in the cli creates a new pci domain in the guest. For Example,
-device spapr-pci-vfio-host-bridge,iommu=1,id=SOMEDOMAIN,index=1
The "SOMEDOMAIN" is the id for new pci domain inside guest.
spapr-pci-vfio-host-bridge is actually a PCI host bridge with VFIO support.
It can host pic-bridges, and has the features/behaviours similar to the
default emulated pci host bridge. The controllers are assigned with new domain
numbers for every new iommu group.
The sample controller tags would look like below:
<controller type='spapr-vfio-pci' index='0' model='pci-root'
iommuGroupNum='3' domain='1'/>
<controller type='spapr-vfio-pci' index='1' model='pci-bridge'
iommuGroupNum='3' domain='1'>
<address type='pci' domain='0x0001' bus='0x00'
slot='0x02' function='0x0'/>
</controller>
<controller type='spapr-vfio-pci' index='0' model='pci-root'
iommuGroupNum='13' domain='2'/>
Signed-off-by: Shivaprasad G Bhat <sbhat(a)linux.vnet.ibm.com>
Signed-off-by: Pradipta Kumar Banerjee <bpradip(a)in.ibm.com>
---
src/bhyve/bhyve_domain.c | 2 -
src/conf/domain_conf.c | 149 ++++++++++++++++++++++++++++++++++++++++++++--
src/conf/domain_conf.h | 19 ++++++
src/libvirt_private.syms | 1
src/qemu/qemu_command.c | 4 +
src/qemu/qemu_domain.c | 12 ++--
src/qemu/qemu_driver.c | 6 ++
7 files changed, 178 insertions(+), 15 deletions(-)
diff --git a/src/bhyve/bhyve_domain.c b/src/bhyve/bhyve_domain.c
index ecb1758..96d30ab 100644
--- a/src/bhyve/bhyve_domain.c
+++ b/src/bhyve/bhyve_domain.c
@@ -63,7 +63,7 @@ bhyveDomainDefPostParse(virDomainDefPtr def,
void *opaque ATTRIBUTE_UNUSED)
{
/* Add an implicit PCI root controller */
- if (virDomainDefMaybeAddController(def, VIR_DOMAIN_CONTROLLER_TYPE_PCI, 0,
+ if (virDomainDefMaybeAddController(def, VIR_DOMAIN_CONTROLLER_TYPE_PCI, 0, 0,
VIR_DOMAIN_CONTROLLER_MODEL_PCI_ROOT) < 0)
return -1;
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 42c0223..66f7809 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -34,6 +34,7 @@
#include "datatypes.h"
#include "domain_conf.h"
#include "snapshot_conf.h"
+#include "virpci.h"
#include "viralloc.h"
#include "verify.h"
#include "virxml.h"
@@ -327,7 +328,8 @@ VIR_ENUM_IMPL(virDomainController, VIR_DOMAIN_CONTROLLER_TYPE_LAST,
"virtio-serial",
"ccid",
"usb",
- "pci")
+ "pci",
+ "spapr-vfio-pci")
VIR_ENUM_IMPL(virDomainControllerModelPCI, VIR_DOMAIN_CONTROLLER_MODEL_PCI_LAST,
"pci-root",
@@ -2926,6 +2928,8 @@ virDomainDefRejectDuplicateControllers(virDomainDefPtr def)
/* multiple USB controllers with the same index are allowed */
max_idx[VIR_DOMAIN_CONTROLLER_TYPE_USB] = -1;
+ /* The idx can be same across different pci domains */
+ max_idx[VIR_DOMAIN_CONTROLLER_TYPE_SPAPR_PCI_VFIO] = -1;
for (i = 0; i < VIR_DOMAIN_CONTROLLER_TYPE_LAST; i++) {
if (max_idx[i] >= 0 && !(bitmaps[i] = virBitmapNew(max_idx[i] + 1)))
@@ -6412,6 +6416,8 @@ virDomainControllerModelTypeFromString(const virDomainControllerDef
*def,
return virDomainControllerModelUSBTypeFromString(model);
else if (def->type == VIR_DOMAIN_CONTROLLER_TYPE_PCI)
return virDomainControllerModelPCITypeFromString(model);
+ else if (def->type == VIR_DOMAIN_CONTROLLER_TYPE_SPAPR_PCI_VFIO)
+ return virDomainControllerModelPCITypeFromString(model);
return -1;
}
@@ -6584,7 +6590,48 @@ virDomainControllerDefParseXML(xmlNodePtr node,
def->opts.pciopts.pcihole64size = VIR_DIV_UP(bytes, 1024);
}
}
+ break;
+ case VIR_DOMAIN_CONTROLLER_TYPE_SPAPR_PCI_VFIO: {
+ char *iommuStr = NULL;
+ char *domainStr = NULL;
+
+ def->domain = -1;
+ def->opts.spaprvfio.iommuGroupNum = -1;
+ if (def->model == VIR_DOMAIN_CONTROLLER_MODEL_PCI_ROOT) {
+ if (def->idx != 0) {
+ virReportError(VIR_ERR_XML_ERROR, "%s",
+ _("pci-root and pcie-root controllers "
+ "should have index 0"));
+ goto error;
+ }
+ }
+ domainStr = virXMLPropString(node, "domain");
+ if (domainStr) {
+ int r = virStrToLong_i(domainStr, NULL, 10,
+ &def->domain);
+ if (r != 0 || def->domain <= 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Invalid domain number: %s"), domainStr);
+ VIR_FREE(domainStr);
+ goto error;
+ }
+ }
+ VIR_FREE(domainStr);
+ iommuStr = virXMLPropString(node, "iommuGroupNum");
+ if (iommuStr) {
+ int r = virStrToLong_i(iommuStr, NULL, 10,
+ &def->opts.spaprvfio.iommuGroupNum);
+ if (r != 0 || def->opts.spaprvfio.iommuGroupNum < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Invalid iommu group number: %s"), iommuStr);
+ VIR_FREE(iommuStr);
+ goto error;
+ }
+ }
+ VIR_FREE(iommuStr);
+ break;
+ }
default:
break;
}
@@ -11885,6 +11932,7 @@ virDomainVcpuPinDefParseXML(xmlNodePtr node,
int
virDomainDefMaybeAddController(virDomainDefPtr def,
int type,
+ int domain,
int idx,
int model)
{
@@ -11893,6 +11941,7 @@ virDomainDefMaybeAddController(virDomainDefPtr def,
for (i = 0; i < def->ncontrollers; i++) {
if (def->controllers[i]->type == type &&
+ def->controllers[i]->domain == domain &&
def->controllers[i]->idx == idx)
return 0;
}
@@ -11901,6 +11950,7 @@ virDomainDefMaybeAddController(virDomainDefPtr def,
return -1;
cont->type = type;
+ cont->domain = domain;
cont->idx = idx;
cont->model = model;
@@ -11908,6 +11958,8 @@ virDomainDefMaybeAddController(virDomainDefPtr def,
cont->opts.vioserial.ports = -1;
cont->opts.vioserial.vectors = -1;
}
+ if (cont->type == VIR_DOMAIN_CONTROLLER_TYPE_SPAPR_PCI_VFIO)
+ cont->opts.spaprvfio.iommuGroupNum = -1;
if (VIR_APPEND_ELEMENT(def->controllers, def->ncontrollers, cont) < 0) {
VIR_FREE(cont);
@@ -12026,6 +12078,79 @@ virDomainResourceDefParse(xmlNodePtr node,
return NULL;
}
+int
+virDomainDefMaybeAddHostdevSpaprPCIVfiocontrollers(virDomainDefPtr def)
+{
+ size_t i, j;
+ virDomainHostdevDefPtr hostdev;
+ virDomainControllerDefPtr controller;
+ int ret = -1;
+ int maxDomainId = 0;
+ int skip;
+
+ if ((def->os.arch != VIR_ARCH_PPC64) ||
+ !(def->os.machine && STRPREFIX(def->os.machine,
"pseries")))
+ return 0;
+
+ for (i = 0; i < def->nhostdevs; i++) {
+ hostdev = def->hostdevs[i];
+ if (IS_PCI_VFIO_HOSTDEV(hostdev))
+ hostdev->source.subsys.u.pci.iommu = -1;
+ }
+ /* The hostdevs belonging to same iommu are
+ * all part of same domain.
+ */
+ for (i = 0; i < def->ncontrollers; i++) {
+ controller = def->controllers[i];
+ if (controller->type == VIR_DOMAIN_CONTROLLER_TYPE_SPAPR_PCI_VFIO &&
+ controller->model == VIR_DOMAIN_CONTROLLER_MODEL_PCI_ROOT)
+ for (j = 0; j < def->nhostdevs; j++) {
+ hostdev = def->hostdevs[j];
+ if (IS_PCI_VFIO_HOSTDEV(hostdev))
+ if (hostdev->info->addr.pci.domain == controller->domain)
+ hostdev->source.subsys.u.pci.iommu =
controller->opts.spaprvfio.iommuGroupNum;
+ }
+ if (controller->domain > maxDomainId)
+ maxDomainId = controller->domain;
+ }
+ /* If the spapr-vfio controller doesnt exist for the hostdev
+ * add a controller for that iommu group.
+ */
+ for (i = 0; i < def->nhostdevs; i++) {
+ skip = 0;
+ hostdev = def->hostdevs[i];
+ if (IS_PCI_VFIO_HOSTDEV(hostdev)) {
+ virPCIDeviceAddressPtr addr;
+ int iommu = -1;
+ if (hostdev->source.subsys.u.pci.iommu == -1) {
+ addr =
(virPCIDeviceAddressPtr)&hostdev->source.subsys.u.pci.addr;
+ if ((iommu = virPCIDeviceAddressGetIOMMUGroupNum(addr)) < 0)
+ goto error;
+ hostdev->source.subsys.u.pci.iommu = iommu;
+
+ for (j = 0; j < def->ncontrollers; j++) {
+ controller = def->controllers[j];
+ if (controller->type == VIR_DOMAIN_CONTROLLER_TYPE_SPAPR_PCI_VFIO
&&
+ controller->model == VIR_DOMAIN_CONTROLLER_MODEL_PCI_ROOT) {
+ if (iommu == controller->opts.spaprvfio.iommuGroupNum)
+ skip = 1;
+ }
+ }
+ if (skip)
+ continue;
+ if (virDomainDefMaybeAddController(def,
VIR_DOMAIN_CONTROLLER_TYPE_SPAPR_PCI_VFIO,
+ ++maxDomainId, 0,
VIR_DOMAIN_CONTROLLER_MODEL_PCI_ROOT) < 0)
+ goto error;
+
def->controllers[def->ncontrollers-1]->opts.spaprvfio.iommuGroupNum = iommu;
+ }
+ }
+ }
+
+ ret = 0;
+ error:
+ return ret;
+}
+
static int
virDomainDefMaybeAddHostdevSCSIcontroller(virDomainDefPtr def)
{
@@ -12047,7 +12172,7 @@ virDomainDefMaybeAddHostdevSCSIcontroller(virDomainDefPtr def)
return 0;
for (i = 0; i <= maxController; i++) {
- if (virDomainDefMaybeAddController(def, VIR_DOMAIN_CONTROLLER_TYPE_SCSI, i, -1)
< 0)
+ if (virDomainDefMaybeAddController(def, VIR_DOMAIN_CONTROLLER_TYPE_SCSI, 0, i,
-1) < 0)
return -1;
}
@@ -15525,7 +15650,7 @@ virDomainDefAddDiskControllersForType(virDomainDefPtr def,
return 0;
for (i = 0; i <= maxController; i++) {
- if (virDomainDefMaybeAddController(def, controllerType, i, -1) < 0)
+ if (virDomainDefMaybeAddController(def, controllerType, 0, i, -1) < 0)
return -1;
}
@@ -15548,7 +15673,7 @@ virDomainDefMaybeAddVirtioSerialController(virDomainDefPtr def)
idx = channel->info.addr.vioserial.controller;
if (virDomainDefMaybeAddController(def,
- VIR_DOMAIN_CONTROLLER_TYPE_VIRTIO_SERIAL, idx, -1) < 0)
+ VIR_DOMAIN_CONTROLLER_TYPE_VIRTIO_SERIAL, 0, idx, -1) < 0)
return -1;
}
}
@@ -15563,7 +15688,7 @@ virDomainDefMaybeAddVirtioSerialController(virDomainDefPtr def)
idx = console->info.addr.vioserial.controller;
if (virDomainDefMaybeAddController(def,
- VIR_DOMAIN_CONTROLLER_TYPE_VIRTIO_SERIAL, idx, -1) < 0)
+ VIR_DOMAIN_CONTROLLER_TYPE_VIRTIO_SERIAL, 0, idx, -1) < 0)
return -1;
}
}
@@ -15603,7 +15728,7 @@ virDomainDefMaybeAddSmartcardController(virDomainDefPtr def)
if (virDomainDefMaybeAddController(def,
VIR_DOMAIN_CONTROLLER_TYPE_CCID,
- idx, -1) < 0)
+ 0, idx, -1) < 0)
return -1;
}
@@ -15648,6 +15773,9 @@ virDomainDefAddImplicitControllers(virDomainDefPtr def)
if (virDomainDefMaybeAddHostdevSCSIcontroller(def) < 0)
return -1;
+ if (virDomainDefMaybeAddHostdevSpaprPCIVfiocontrollers(def) < 0)
+ return -1;
+
return 0;
}
@@ -16412,6 +16540,8 @@ virDomainControllerModelTypeToString(virDomainControllerDefPtr
def,
return virDomainControllerModelUSBTypeToString(model);
else if (def->type == VIR_DOMAIN_CONTROLLER_TYPE_PCI)
return virDomainControllerModelPCITypeToString(model);
+ else if (def->type == VIR_DOMAIN_CONTROLLER_TYPE_SPAPR_PCI_VFIO)
+ return virDomainControllerModelPCITypeToString(model);
return NULL;
}
@@ -16465,7 +16595,12 @@ virDomainControllerDefFormat(virBufferPtr buf,
if (def->opts.pciopts.pcihole64)
pcihole64 = true;
break;
-
+ case VIR_DOMAIN_CONTROLLER_TYPE_SPAPR_PCI_VFIO:
+ virBufferAsprintf(buf, " iommuGroupNum='%d'",
+ def->opts.spaprvfio.iommuGroupNum);
+ virBufferAsprintf(buf, " domain='%d'",
+ def->domain);
+ break;
default:
break;
}
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index 9908d88..8671fcd 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -419,6 +419,7 @@ typedef virDomainHostdevSubsysPCI *virDomainHostdevSubsysPCIPtr;
struct _virDomainHostdevSubsysPCI {
virDevicePCIAddress addr; /* host address */
int backend; /* enum virDomainHostdevSubsysPCIBackendType */
+ int iommu;
};
typedef struct _virDomainHostdevSubsysSCSIHost virDomainHostdevSubsysSCSIHost;
@@ -685,6 +686,7 @@ typedef enum {
VIR_DOMAIN_CONTROLLER_TYPE_CCID,
VIR_DOMAIN_CONTROLLER_TYPE_USB,
VIR_DOMAIN_CONTROLLER_TYPE_PCI,
+ VIR_DOMAIN_CONTROLLER_TYPE_SPAPR_PCI_VFIO,
VIR_DOMAIN_CONTROLLER_TYPE_LAST
} virDomainControllerType;
@@ -742,6 +744,12 @@ struct _virDomainPCIControllerOpts {
unsigned long pcihole64size;
};
+typedef struct _virDomainSPAPRVfioControllerOpts virDomainSPAPRVfioControllerOpts;
+typedef virDomainSPAPRVfioControllerOpts *virDomainiSPAPRVfioControllerOptsPtr;
+struct _virDomainSPAPRVfioControllerOpts {
+ int iommuGroupNum;
+};
+
/* Stores the virtual disk controller configuration */
struct _virDomainControllerDef {
int type;
@@ -750,9 +758,11 @@ struct _virDomainControllerDef {
unsigned int queues;
unsigned int cmd_per_lun;
unsigned int max_sectors;
+ int domain;
union {
virDomainVirtioSerialOpts vioserial;
virDomainPCIControllerOpts pciopts;
+ virDomainSPAPRVfioControllerOpts spaprvfio;
} opts;
virDomainDeviceInfo info;
};
@@ -2808,12 +2818,15 @@ void virDomainListFree(virDomainPtr *list);
int
virDomainDefMaybeAddController(virDomainDefPtr def,
int type,
+ int domain,
int idx,
int model);
int
virDomainDefMaybeAddInput(virDomainDefPtr def,
int type,
int bus);
+int
+virDomainDefMaybeAddHostdevSpaprPCIVfiocontrollers(virDomainDefPtr def);
char *virDomainDefGetDefaultEmulator(virDomainDefPtr def, virCapsPtr caps);
@@ -2845,6 +2858,12 @@ int virDomainObjSetMetadata(virDomainObjPtr vm,
const char *configDir,
unsigned int flags);
+# define IS_PCI_VFIO_HOSTDEV(dvc) \
+ (((dvc)->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS) && \
+ ((dvc)->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) && \
+ (((dvc)->source.subsys.u.pci.backend == VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) ||
\
+ ((dvc)->source.subsys.u.pci.backend == VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT)))
+
bool virDomainDefNeedsPlacementAdvice(virDomainDefPtr def)
ATTRIBUTE_NONNULL(1);
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 9f749b7..f61fccd 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -197,6 +197,7 @@ virDomainDefFree;
virDomainDefGetDefaultEmulator;
virDomainDefGetSecurityLabelDef;
virDomainDefMaybeAddController;
+virDomainDefMaybeAddHostdevSpaprPCIVfiocontrollers;
virDomainDefMaybeAddInput;
virDomainDefNeedsPlacementAdvice;
virDomainDefNew;
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index 8cb0865..3cec764 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -559,6 +559,8 @@ qemuNetworkPrepareDevices(virDomainDefPtr def)
}
if (virDomainHostdevInsert(def, hostdev) < 0)
goto cleanup;
+ if (virDomainDefMaybeAddHostdevSpaprPCIVfiocontrollers(def) < 0)
+ goto cleanup;
}
}
ret = 0;
@@ -1498,7 +1500,7 @@ qemuDomainAssignPCIAddresses(virDomainDefPtr def,
virDomainPCIAddressBusPtr bus = &addrs->buses[i];
if ((rv = virDomainDefMaybeAddController(
- def, VIR_DOMAIN_CONTROLLER_TYPE_PCI,
+ def, VIR_DOMAIN_CONTROLLER_TYPE_PCI, 0,
i, bus->model)) < 0)
goto cleanup;
/* If we added a new bridge, we will need one more address */
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 76fccce..8c3f92a 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -998,17 +998,17 @@ qemuDomainDefPostParse(virDomainDefPtr def,
if (addDefaultUSB &&
virDomainDefMaybeAddController(
- def, VIR_DOMAIN_CONTROLLER_TYPE_USB, 0, -1) < 0)
+ def, VIR_DOMAIN_CONTROLLER_TYPE_USB, 0, 0, -1) < 0)
return -1;
if (addImplicitSATA &&
virDomainDefMaybeAddController(
- def, VIR_DOMAIN_CONTROLLER_TYPE_SATA, 0, -1) < 0)
+ def, VIR_DOMAIN_CONTROLLER_TYPE_SATA, 0, 0, -1) < 0)
return -1;
if (addPCIRoot &&
virDomainDefMaybeAddController(
- def, VIR_DOMAIN_CONTROLLER_TYPE_PCI, 0,
+ def, VIR_DOMAIN_CONTROLLER_TYPE_PCI, 0, 0,
VIR_DOMAIN_CONTROLLER_MODEL_PCI_ROOT) < 0)
return -1;
@@ -1018,13 +1018,13 @@ qemuDomainDefPostParse(virDomainDefPtr def,
*/
if (addPCIeRoot) {
if (virDomainDefMaybeAddController(
- def, VIR_DOMAIN_CONTROLLER_TYPE_PCI, 0,
+ def, VIR_DOMAIN_CONTROLLER_TYPE_PCI, 0, 0,
VIR_DOMAIN_CONTROLLER_MODEL_PCIE_ROOT) < 0 ||
virDomainDefMaybeAddController(
- def, VIR_DOMAIN_CONTROLLER_TYPE_PCI, 1,
+ def, VIR_DOMAIN_CONTROLLER_TYPE_PCI, 0, 1,
VIR_DOMAIN_CONTROLLER_MODEL_DMI_TO_PCI_BRIDGE) < 0 ||
virDomainDefMaybeAddController(
- def, VIR_DOMAIN_CONTROLLER_TYPE_PCI, 2,
+ def, VIR_DOMAIN_CONTROLLER_TYPE_PCI, 0, 2,
VIR_DOMAIN_CONTROLLER_MODEL_PCI_BRIDGE) < 0) {
return -1;
}
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 7377320..508b748 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -7099,6 +7099,9 @@ qemuDomainAttachDeviceConfig(virQEMUCapsPtr qemuCaps,
net =
dev->data.net;
if (virDomainNetInsert(vmdef, net))
return -1;
+ if (dev->data.net->type == VIR_DOMAIN_NET_TYPE_HOSTDEV)
+ if (virDomainDefMaybeAddHostdevSpaprPCIVfiocontrollers(vmdef) < 0)
+ return -1;
dev->data.net = NULL;
if (qemuDomainAssignAddresses(vmdef, qemuCaps, NULL) < 0)
return -1;
@@ -7113,6 +7116,9 @@ qemuDomainAttachDeviceConfig(virQEMUCapsPtr qemuCaps,
}
if (virDomainHostdevInsert(vmdef, hostdev))
return -1;
+ if (IS_PCI_VFIO_HOSTDEV(hostdev))
+ if (virDomainDefMaybeAddHostdevSpaprPCIVfiocontrollers(vmdef) < 0)
+ return -1;
dev->data.hostdev = NULL;
if (qemuDomainAssignAddresses(vmdef, qemuCaps, NULL) < 0)
return -1;