[libvirt] [PATCH 0/2] check IOMMU group devices usage during vfio device passthrough

Problem: ==================================================================== If a device in the same iommu group is in use by a different vm, the guest boot fails with the below error during vfio passthrough. bash-4.2$ virsh start demo error: Failed to start domain demo error: internal error: process exited while connecting to monitor: 2014-12-02T13:43:52.020136Z qemu-system-x86_64: -device vfio-pci,host=00:1c.3,id=hostdev0,bus=pci.0,addr=0x5: vfio: error opening /dev/vfio/7: Device or resource busy Solution: ===================================================================== The patch iterates through the iommu group devices and errors out cleanly mentioning the device and the guest which is using the device. With Patch bash-4.2$ virsh start demo error: Failed to start domain demo error: Requested operation is not valid: PCI device 0000:0d:00.0 is in use by driver QEMU, domain vm10 --- Shivaprasad G Bhat (2): Move struct _virPCIDevice definition from virpci.c to virpci.h check IOMMU group devices usage when preparing device for vfio passthrough src/util/virhostdev.c | 87 +++++++++++++++++++++++++++++-------------------- src/util/virpci.c | 29 ---------------- src/util/virpci.h | 30 +++++++++++++++++ 3 files changed, 82 insertions(+), 64 deletions(-) -- Signature

The struct members can't be referenced from files including the .h. Moving the definition to .h from .c helps referencing the members. The patch is just the struct definition code movement. Signed-off-by: Shivaprasad G Bhat <sbhat@linux.vnet.ibm.com> --- src/util/virpci.c | 29 ----------------------------- src/util/virpci.h | 30 ++++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 29 deletions(-) diff --git a/src/util/virpci.c b/src/util/virpci.c index cd78212..74c5c9b 100644 --- a/src/util/virpci.c +++ b/src/util/virpci.c @@ -49,39 +49,10 @@ VIR_LOG_INIT("util.pci"); #define PCI_SYSFS "/sys/bus/pci/" -#define PCI_ID_LEN 10 /* "XXXX XXXX" */ -#define PCI_ADDR_LEN 13 /* "XXXX:XX:XX.X" */ VIR_ENUM_IMPL(virPCIELinkSpeed, VIR_PCIE_LINK_SPEED_LAST, "", "2.5", "5", "8") -struct _virPCIDevice { - unsigned int domain; - unsigned int bus; - unsigned int slot; - unsigned int function; - - char name[PCI_ADDR_LEN]; /* domain:bus:slot.function */ - char id[PCI_ID_LEN]; /* product vendor */ - char *path; - - /* The driver:domain which uses the device */ - char *used_by_drvname; - char *used_by_domname; - - unsigned int pcie_cap_pos; - unsigned int pci_pm_cap_pos; - bool has_flr; - bool has_pm_reset; - bool managed; - char *stubDriver; - - /* used by reattach function */ - bool unbind_from_stub; - bool remove_slot; - bool reprobe; -}; - struct _virPCIDeviceList { virObjectLockable parent; diff --git a/src/util/virpci.h b/src/util/virpci.h index 1ce9821..0612f08 100644 --- a/src/util/virpci.h +++ b/src/util/virpci.h @@ -35,6 +35,36 @@ typedef virPCIDeviceAddress *virPCIDeviceAddressPtr; typedef struct _virPCIDeviceList virPCIDeviceList; typedef virPCIDeviceList *virPCIDeviceListPtr; +# define PCI_ID_LEN 10 /* "XXXX XXXX" */ +# define PCI_ADDR_LEN 13 /* "XXXX:XX:XX.X" */ + +struct _virPCIDevice { + unsigned int domain; + unsigned int bus; + unsigned int slot; + unsigned int function; + + char name[PCI_ADDR_LEN]; /* domain:bus:slot.function */ + char id[PCI_ID_LEN]; /* product vendor */ + char *path; + + /* The driver:domain which uses the device */ + char *used_by_drvname; + char *used_by_domname; + + unsigned int pcie_cap_pos; + unsigned int pci_pm_cap_pos; + bool has_flr; + bool has_pm_reset; + bool managed; + char *stubDriver; + + /* used by reattach function */ + bool unbind_from_stub; + bool remove_slot; + bool reprobe; +}; + struct _virPCIDeviceAddress { unsigned int domain; unsigned int bus;

On 04.12.2014 08:27, Shivaprasad G Bhat wrote:
The struct members can't be referenced from files including the .h. Moving the definition to .h from .c helps referencing the members. The patch is just the struct definition code movement.
Signed-off-by: Shivaprasad G Bhat <sbhat@linux.vnet.ibm.com> --- src/util/virpci.c | 29 ----------------------------- src/util/virpci.h | 30 ++++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 29 deletions(-)
diff --git a/src/util/virpci.c b/src/util/virpci.c index cd78212..74c5c9b 100644 --- a/src/util/virpci.c +++ b/src/util/virpci.c @@ -49,39 +49,10 @@ VIR_LOG_INIT("util.pci");
#define PCI_SYSFS "/sys/bus/pci/" -#define PCI_ID_LEN 10 /* "XXXX XXXX" */ -#define PCI_ADDR_LEN 13 /* "XXXX:XX:XX.X" */
VIR_ENUM_IMPL(virPCIELinkSpeed, VIR_PCIE_LINK_SPEED_LAST, "", "2.5", "5", "8")
-struct _virPCIDevice { - unsigned int domain; - unsigned int bus; - unsigned int slot; - unsigned int function; - - char name[PCI_ADDR_LEN]; /* domain:bus:slot.function */ - char id[PCI_ID_LEN]; /* product vendor */ - char *path; - - /* The driver:domain which uses the device */ - char *used_by_drvname; - char *used_by_domname; - - unsigned int pcie_cap_pos; - unsigned int pci_pm_cap_pos; - bool has_flr; - bool has_pm_reset; - bool managed; - char *stubDriver; - - /* used by reattach function */ - bool unbind_from_stub; - bool remove_slot; - bool reprobe; -}; - struct _virPCIDeviceList { virObjectLockable parent;
diff --git a/src/util/virpci.h b/src/util/virpci.h index 1ce9821..0612f08 100644 --- a/src/util/virpci.h +++ b/src/util/virpci.h @@ -35,6 +35,36 @@ typedef virPCIDeviceAddress *virPCIDeviceAddressPtr; typedef struct _virPCIDeviceList virPCIDeviceList; typedef virPCIDeviceList *virPCIDeviceListPtr;
+# define PCI_ID_LEN 10 /* "XXXX XXXX" */ +# define PCI_ADDR_LEN 13 /* "XXXX:XX:XX.X" */ + +struct _virPCIDevice { + unsigned int domain; + unsigned int bus; + unsigned int slot; + unsigned int function; + + char name[PCI_ADDR_LEN]; /* domain:bus:slot.function */ + char id[PCI_ID_LEN]; /* product vendor */ + char *path; + + /* The driver:domain which uses the device */ + char *used_by_drvname; + char *used_by_domname; + + unsigned int pcie_cap_pos; + unsigned int pci_pm_cap_pos; + bool has_flr; + bool has_pm_reset; + bool managed; + char *stubDriver; + + /* used by reattach function */ + bool unbind_from_stub; + bool remove_slot; + bool reprobe; +}; + struct _virPCIDeviceAddress { unsigned int domain; unsigned int bus;
I think instead of exposing the struct we may need getters, virPCIDeviceGetDomain(), virPCIDeviceGetBus(), etc. Or even better, virPCIdeviceGetAddress() which would return type of virPCIDeviceAddress. Michal

Thanks for the comments Michal. I just posted the v2 for review. Regards. Shiva On Mon, Jan 12, 2015 at 2:43 PM, Michal Privoznik <mprivozn@redhat.com> wrote:
On 04.12.2014 08:27, Shivaprasad G Bhat wrote:
The struct members can't be referenced from files including the .h. Moving the definition to .h from .c helps referencing the members. The patch is just the struct definition code movement.
Signed-off-by: Shivaprasad G Bhat <sbhat@linux.vnet.ibm.com> --- src/util/virpci.c | 29 ----------------------------- src/util/virpci.h | 30 ++++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 29 deletions(-)
diff --git a/src/util/virpci.c b/src/util/virpci.c index cd78212..74c5c9b 100644 --- a/src/util/virpci.c +++ b/src/util/virpci.c @@ -49,39 +49,10 @@ VIR_LOG_INIT("util.pci");
#define PCI_SYSFS "/sys/bus/pci/" -#define PCI_ID_LEN 10 /* "XXXX XXXX" */ -#define PCI_ADDR_LEN 13 /* "XXXX:XX:XX.X" */
VIR_ENUM_IMPL(virPCIELinkSpeed, VIR_PCIE_LINK_SPEED_LAST, "", "2.5", "5", "8")
-struct _virPCIDevice { - unsigned int domain; - unsigned int bus; - unsigned int slot; - unsigned int function; - - char name[PCI_ADDR_LEN]; /* domain:bus:slot.function */ - char id[PCI_ID_LEN]; /* product vendor */ - char *path; - - /* The driver:domain which uses the device */ - char *used_by_drvname; - char *used_by_domname; - - unsigned int pcie_cap_pos; - unsigned int pci_pm_cap_pos; - bool has_flr; - bool has_pm_reset; - bool managed; - char *stubDriver; - - /* used by reattach function */ - bool unbind_from_stub; - bool remove_slot; - bool reprobe; -}; - struct _virPCIDeviceList { virObjectLockable parent;
diff --git a/src/util/virpci.h b/src/util/virpci.h index 1ce9821..0612f08 100644 --- a/src/util/virpci.h +++ b/src/util/virpci.h @@ -35,6 +35,36 @@ typedef virPCIDeviceAddress *virPCIDeviceAddressPtr; typedef struct _virPCIDeviceList virPCIDeviceList; typedef virPCIDeviceList *virPCIDeviceListPtr;
+# define PCI_ID_LEN 10 /* "XXXX XXXX" */ +# define PCI_ADDR_LEN 13 /* "XXXX:XX:XX.X" */ + +struct _virPCIDevice { + unsigned int domain; + unsigned int bus; + unsigned int slot; + unsigned int function; + + char name[PCI_ADDR_LEN]; /* domain:bus:slot.function */ + char id[PCI_ID_LEN]; /* product vendor */ + char *path; + + /* The driver:domain which uses the device */ + char *used_by_drvname; + char *used_by_domname; + + unsigned int pcie_cap_pos; + unsigned int pci_pm_cap_pos; + bool has_flr; + bool has_pm_reset; + bool managed; + char *stubDriver; + + /* used by reattach function */ + bool unbind_from_stub; + bool remove_slot; + bool reprobe; +}; + struct _virPCIDeviceAddress { unsigned int domain; unsigned int bus;
I think instead of exposing the struct we may need getters, virPCIDeviceGetDomain(), virPCIDeviceGetBus(), etc. Or even better, virPCIdeviceGetAddress() which would return type of virPCIDeviceAddress.
Michal

The virsh start <domain> fails with qemu error when the hostdevices of the same iommu group are used actively by other vms. It is not clear which hostdev from the same iommu group is used by any of the running guests. User has to go through every guest xml to figure out who is using the hostdev of same iommu group. Solution: Iterate the iommu group of the hostdev and error our neatly in case a device in the same iommu group is busy. Reattach code also does the same kind of check, remove duplicate code as well. Signed-off-by: Shivaprasad G Bhat <sbhat@linux.vnet.ibm.com> --- src/util/virhostdev.c | 87 +++++++++++++++++++++++++++++-------------------- 1 file changed, 52 insertions(+), 35 deletions(-) diff --git a/src/util/virhostdev.c b/src/util/virhostdev.c index 18ff96b..da40030 100644 --- a/src/util/virhostdev.c +++ b/src/util/virhostdev.c @@ -54,6 +54,42 @@ static virClassPtr virHostdevManagerClass; static void virHostdevManagerDispose(void *obj); static virHostdevManagerPtr virHostdevManagerNew(void); +static int virHostdevIsPCINodeDeviceUsed(virPCIDeviceAddressPtr devAddr, void *opaque) +{ + virPCIDevicePtr other; + int ret = -1; + virPCIDevicePtr pci; + virHostdevManagerPtr hostdev_mgr = opaque; + + if (!(pci = virPCIDeviceNew(devAddr->domain, devAddr->bus, + devAddr->slot, devAddr->function))) + goto cleanup; + + other = virPCIDeviceListFind(hostdev_mgr->activePCIHostdevs, pci); + if (other) { + const char *other_drvname = NULL; + const char *other_domname = NULL; + virPCIDeviceGetUsedBy(other, &other_drvname, &other_domname); + + if (other_drvname && other_domname) + virReportError(VIR_ERR_OPERATION_INVALID, + _("PCI device %s is in use by " + "driver %s, domain %s"), + virPCIDeviceGetName(pci), + other_drvname, other_domname); + else + virReportError(VIR_ERR_OPERATION_INVALID, + _("PCI device %s is in use"), + virPCIDeviceGetName(pci)); + ret = -1; + goto cleanup; + } + ret = 0; + cleanup: + virPCIDeviceFree(pci); + return ret; +} + static int virHostdevManagerOnceInit(void) { if (!(virHostdevManagerClass = virClassNew(virClassForObject(), @@ -517,8 +553,10 @@ virHostdevPreparePCIDevices(virHostdevManagerPtr hostdev_mgr, */ for (i = 0; i < virPCIDeviceListCount(pcidevs); i++) { + void *opaque = hostdev_mgr; virPCIDevicePtr dev = virPCIDeviceListGet(pcidevs, i); - virPCIDevicePtr other; + virPCIDeviceAddress devAddr = { dev->domain, dev->bus, + dev->slot, dev->function }; bool strict_acs_check = !!(flags & VIR_HOSTDEV_STRICT_ACS_CHECK); if (!virPCIDeviceIsAssignable(dev, strict_acs_check)) { @@ -528,24 +566,17 @@ virHostdevPreparePCIDevices(virHostdevManagerPtr hostdev_mgr, goto cleanup; } /* The device is in use by other active domain if - * the dev is in list activePCIHostdevs. + * the dev is in list activePCIHostdevs. VFIO devices + * belonging to same iommu group cant be shared + * across guests. */ - if ((other = virPCIDeviceListFind(hostdev_mgr->activePCIHostdevs, dev))) { - const char *other_drvname; - const char *other_domname; - - virPCIDeviceGetUsedBy(other, &other_drvname, &other_domname); - if (other_drvname && other_domname) - virReportError(VIR_ERR_OPERATION_INVALID, - _("PCI device %s is in use by " - "driver %s, domain %s"), - virPCIDeviceGetName(dev), - other_drvname, other_domname); - else - virReportError(VIR_ERR_OPERATION_INVALID, - _("PCI device %s is already in use"), - virPCIDeviceGetName(dev)); - goto cleanup; + if (STREQ(virPCIDeviceGetStubDriver(dev), "vfio-pci")) { + if (virPCIDeviceAddressIOMMUGroupIterate(&devAddr, + virHostdevIsPCINodeDeviceUsed, + opaque) < 0) + goto cleanup; + } else if (virHostdevIsPCINodeDeviceUsed(&devAddr, opaque)) { + goto cleanup; } } @@ -1506,29 +1537,15 @@ int virHostdevPCINodeDeviceReAttach(virHostdevManagerPtr hostdev_mgr, virPCIDevicePtr pci) { - virPCIDevicePtr other; + virPCIDeviceAddress devAddr = { pci->domain, pci->bus, pci->slot, pci->function }; int ret = -1; + void *opaque = hostdev_mgr; virObjectLock(hostdev_mgr->activePCIHostdevs); virObjectLock(hostdev_mgr->inactivePCIHostdevs); - other = virPCIDeviceListFind(hostdev_mgr->activePCIHostdevs, pci); - if (other) { - const char *other_drvname = NULL; - const char *other_domname = NULL; - virPCIDeviceGetUsedBy(other, &other_drvname, &other_domname); - if (other_drvname && other_domname) - virReportError(VIR_ERR_OPERATION_INVALID, - _("PCI device %s is still in use by " - "driver %s, domain %s"), - virPCIDeviceGetName(pci), - other_drvname, other_domname); - else - virReportError(VIR_ERR_OPERATION_INVALID, - _("PCI device %s is still in use"), - virPCIDeviceGetName(pci)); + if (virHostdevIsPCINodeDeviceUsed(&devAddr, opaque)) goto out; - } virPCIDeviceReattachInit(pci);

On 04.12.2014 08:28, Shivaprasad G Bhat wrote:
The virsh start <domain> fails with qemu error when the hostdevices of the same iommu group are used actively by other vms. It is not clear which hostdev from the same iommu group is used by any of the running guests. User has to go through every guest xml to figure out who is using the hostdev of same iommu group.
Solution: Iterate the iommu group of the hostdev and error our neatly in case a device in the same iommu group is busy. Reattach code also does the same kind of check, remove duplicate code as well.
Signed-off-by: Shivaprasad G Bhat <sbhat@linux.vnet.ibm.com> --- src/util/virhostdev.c | 87 +++++++++++++++++++++++++++++-------------------- 1 file changed, 52 insertions(+), 35 deletions(-)
diff --git a/src/util/virhostdev.c b/src/util/virhostdev.c index 18ff96b..da40030 100644 --- a/src/util/virhostdev.c +++ b/src/util/virhostdev.c @@ -54,6 +54,42 @@ static virClassPtr virHostdevManagerClass; static void virHostdevManagerDispose(void *obj); static virHostdevManagerPtr virHostdevManagerNew(void);
+static int virHostdevIsPCINodeDeviceUsed(virPCIDeviceAddressPtr devAddr, void *opaque) +{ + virPCIDevicePtr other; + int ret = -1; + virPCIDevicePtr pci; + virHostdevManagerPtr hostdev_mgr = opaque; + + if (!(pci = virPCIDeviceNew(devAddr->domain, devAddr->bus, + devAddr->slot, devAddr->function)))
Indentation's off here and in other places too. But that's not a show stopper.
+ goto cleanup; + + other = virPCIDeviceListFind(hostdev_mgr->activePCIHostdevs, pci); + if (other) { + const char *other_drvname = NULL; + const char *other_domname = NULL; + virPCIDeviceGetUsedBy(other, &other_drvname, &other_domname); + + if (other_drvname && other_domname) + virReportError(VIR_ERR_OPERATION_INVALID, + _("PCI device %s is in use by " + "driver %s, domain %s"), + virPCIDeviceGetName(pci), + other_drvname, other_domname); + else + virReportError(VIR_ERR_OPERATION_INVALID, + _("PCI device %s is in use"), + virPCIDeviceGetName(pci)); + ret = -1; + goto cleanup; + } + ret = 0; + cleanup: + virPCIDeviceFree(pci); + return ret; +} + static int virHostdevManagerOnceInit(void) { if (!(virHostdevManagerClass = virClassNew(virClassForObject(), @@ -517,8 +553,10 @@ virHostdevPreparePCIDevices(virHostdevManagerPtr hostdev_mgr, */
for (i = 0; i < virPCIDeviceListCount(pcidevs); i++) { + void *opaque = hostdev_mgr;
This is unneeded variable. Just drop it and use hostdev_mgr instead.
virPCIDevicePtr dev = virPCIDeviceListGet(pcidevs, i); - virPCIDevicePtr other; + virPCIDeviceAddress devAddr = { dev->domain, dev->bus, + dev->slot, dev->function }; bool strict_acs_check = !!(flags & VIR_HOSTDEV_STRICT_ACS_CHECK);
if (!virPCIDeviceIsAssignable(dev, strict_acs_check)) { @@ -528,24 +566,17 @@ virHostdevPreparePCIDevices(virHostdevManagerPtr hostdev_mgr, goto cleanup; } /* The device is in use by other active domain if - * the dev is in list activePCIHostdevs. + * the dev is in list activePCIHostdevs. VFIO devices + * belonging to same iommu group cant be shared + * across guests. */ - if ((other = virPCIDeviceListFind(hostdev_mgr->activePCIHostdevs, dev))) { - const char *other_drvname; - const char *other_domname; - - virPCIDeviceGetUsedBy(other, &other_drvname, &other_domname); - if (other_drvname && other_domname) - virReportError(VIR_ERR_OPERATION_INVALID, - _("PCI device %s is in use by " - "driver %s, domain %s"), - virPCIDeviceGetName(dev), - other_drvname, other_domname); - else - virReportError(VIR_ERR_OPERATION_INVALID, - _("PCI device %s is already in use"), - virPCIDeviceGetName(dev)); - goto cleanup; + if (STREQ(virPCIDeviceGetStubDriver(dev), "vfio-pci")) { + if (virPCIDeviceAddressIOMMUGroupIterate(&devAddr, + virHostdevIsPCINodeDeviceUsed, + opaque) < 0) + goto cleanup; + } else if (virHostdevIsPCINodeDeviceUsed(&devAddr, opaque)) { + goto cleanup; } }
@@ -1506,29 +1537,15 @@ int virHostdevPCINodeDeviceReAttach(virHostdevManagerPtr hostdev_mgr, virPCIDevicePtr pci) { - virPCIDevicePtr other; + virPCIDeviceAddress devAddr = { pci->domain, pci->bus, pci->slot, pci->function }; int ret = -1; + void *opaque = hostdev_mgr;
virObjectLock(hostdev_mgr->activePCIHostdevs); virObjectLock(hostdev_mgr->inactivePCIHostdevs); - other = virPCIDeviceListFind(hostdev_mgr->activePCIHostdevs, pci); - if (other) { - const char *other_drvname = NULL; - const char *other_domname = NULL; - virPCIDeviceGetUsedBy(other, &other_drvname, &other_domname);
- if (other_drvname && other_domname) - virReportError(VIR_ERR_OPERATION_INVALID, - _("PCI device %s is still in use by " - "driver %s, domain %s"), - virPCIDeviceGetName(pci), - other_drvname, other_domname); - else - virReportError(VIR_ERR_OPERATION_INVALID, - _("PCI device %s is still in use"), - virPCIDeviceGetName(pci)); + if (virHostdevIsPCINodeDeviceUsed(&devAddr, opaque)) goto out; - }
virPCIDeviceReattachInit(pci);
Okay, I like this patch. it's a weak ACK, but I can't push it without 1/2 which implies slight rework of this patch too. Looking forward to v2. Michal

Ping! Thanks, Shiva On Thu, Dec 4, 2014 at 12:56 PM, Shivaprasad G Bhat <shivaprasadbhat@gmail.com> wrote:
Problem: ==================================================================== If a device in the same iommu group is in use by a different vm, the guest boot fails with the below error during vfio passthrough. bash-4.2$ virsh start demo error: Failed to start domain demo error: internal error: process exited while connecting to monitor: 2014-12-02T13:43:52.020136Z qemu-system-x86_64: -device vfio-pci,host=00:1c.3,id=hostdev0,bus=pci.0,addr=0x5: vfio: error opening /dev/vfio/7: Device or resource busy
Solution: ===================================================================== The patch iterates through the iommu group devices and errors out cleanly mentioning the device and the guest which is using the device.
With Patch bash-4.2$ virsh start demo error: Failed to start domain demo error: Requested operation is not valid: PCI device 0000:0d:00.0 is in use by driver QEMU, domain vm10
---
Shivaprasad G Bhat (2): Move struct _virPCIDevice definition from virpci.c to virpci.h check IOMMU group devices usage when preparing device for vfio passthrough
src/util/virhostdev.c | 87 +++++++++++++++++++++++++++++-------------------- src/util/virpci.c | 29 ---------------- src/util/virpci.h | 30 +++++++++++++++++ 3 files changed, 82 insertions(+), 64 deletions(-)
-- Signature
-- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
participants (3)
-
Michal Privoznik
-
Shivaprasad bhat
-
Shivaprasad G Bhat