[libvirt] [PATCH 0/3] Block assignment of PCI devices below non-ACS capable switch

Hi. This is a patchset for blocking assignment of PCI devices below non-ACS capable switch. In case user still wants to assign such device even though it might not be safe, she can specify permissive='yes' attribute for <hostdev>. Special thanks to Chris L. who created a standalone program the PCI check. This code is a port of that check to libvirt. Jiri Denemark (3): Tests for ACS in PCIe switches New 'permissive' attribute for hostdev Use pciDeviceIsAssignable in qemu driver docs/schemas/domain.rng | 8 +++ src/conf/domain_conf.c | 14 ++++- src/conf/domain_conf.h | 1 + src/libvirt_private.syms | 3 + src/qemu/qemu_driver.c | 9 +++- src/util/pci.c | 147 ++++++++++++++++++++++++++++++++++++++++++++++ src/util/pci.h | 7 ++ 7 files changed, 186 insertions(+), 3 deletions(-)

New pciDeviceIsAssignable() function for checking whether a given PCI device can be assigned to a guest was added. Currently it only checks for ACS being enabled on all PCIe switches between root and the PCI device. In the future, it could be the right place to check whether a device is unbound or bound to a stub driver. Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- src/libvirt_private.syms | 3 + src/util/pci.c | 147 ++++++++++++++++++++++++++++++++++++++++++++++ src/util/pci.h | 7 ++ 3 files changed, 157 insertions(+), 0 deletions(-) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index f90f269..7073e0c 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -438,6 +438,9 @@ pciDeviceListGet; pciDeviceListLock; pciDeviceListUnlock; pciDeviceListSteal; +pciDeviceSetPermissive; +pciDeviceGetPermissive; +pciDeviceIsAssignable; # processinfo.h diff --git a/src/util/pci.c b/src/util/pci.c index 1e003c2..113299e 100644 --- a/src/util/pci.c +++ b/src/util/pci.c @@ -64,6 +64,7 @@ struct _pciDevice { unsigned has_flr : 1; unsigned has_pm_reset : 1; unsigned managed : 1; + unsigned permissive : 1; }; struct _pciDeviceList { @@ -140,6 +141,14 @@ struct _pciDeviceList { #define PCI_AF_CAP 0x3 /* Advanced features capabilities */ #define PCI_AF_CAP_FLR 0x2 /* Function Level Reset */ +#define PCI_EXP_FLAGS 0x2 +#define PCI_EXP_FLAGS_TYPE 0x00f0 +#define PCI_EXP_TYPE_DOWNSTREAM 0x6 + +#define PCI_EXT_CAP_ID_ACS 0x000d +#define PCI_EXT_ACS_CTRL 0x06 +#define PCI_EXT_CAP_ACS_ENABLED 0x1d + static int pciOpenConfig(pciDevice *dev) { @@ -326,6 +335,29 @@ pciFindCapabilityOffset(pciDevice *dev, unsigned capability) return 0; } +static unsigned int +pciFindExtendedCapabilityOffset(pciDevice *dev, unsigned capability) +{ + int ttl; + unsigned int pos; + uint32_t header; + + ttl = 480; /* 3840 bytes, minimum 8 bytes per capability */ + pos = PCI_CONF_LEN; + + while (ttl > 0 && pos >= PCI_CONF_LEN) { + header = pciRead32(dev, pos); + + if ((header & 0x0000ffff) == capability) + return pos; + + pos = (header >> 20) & 0xffc; /* WTH */ + ttl--; + } + + return 0; +} + static unsigned pciDetectFunctionLevelReset(pciDevice *dev) { @@ -933,6 +965,16 @@ unsigned pciDeviceGetManaged(pciDevice *dev) return dev->managed; } +void pciDeviceSetPermissive(pciDevice *dev, unsigned permissive) +{ + dev->permissive = !!permissive; +} + +unsigned pciDeviceGetPermissive(pciDevice *dev) +{ + return dev->permissive; +} + pciDeviceList * pciDeviceListNew(virConnectPtr conn) { @@ -1110,3 +1152,108 @@ cleanup: VIR_FREE(pcidir); return ret; } + +static int +pciDeviceDownstreamLacksACS(virConnectPtr conn, + pciDevice *dev) +{ + uint16_t flags; + uint16_t ctrl; + unsigned int pos; + + if (!dev->initted && pciInitDevice(conn, dev) < 0) + return -1; + + pos = dev->pcie_cap_pos; + if (!pos || !pciRead16(dev, PCI_CLASS_DEVICE) == PCI_CLASS_BRIDGE_PCI) + return 0; + + flags = pciRead16(dev, pos + PCI_EXP_FLAGS); + if (((flags & PCI_EXP_FLAGS_TYPE) >> 4) != PCI_EXP_TYPE_DOWNSTREAM) + return 0; + + pos = pciFindExtendedCapabilityOffset(dev, PCI_EXT_CAP_ID_ACS); + if (!pos) { + VIR_DEBUG("%s %s: downstream port lacks ACS", dev->id, dev->name); + return 1; + } + + ctrl = pciRead16(dev, pos + PCI_EXT_ACS_CTRL); + if ((ctrl & PCI_EXT_CAP_ACS_ENABLED) != PCI_EXT_CAP_ACS_ENABLED) { + VIR_DEBUG("%s %s: downstream port has ACS disabled", + dev->id, dev->name); + return 1; + } + + return 0; +} + +static int +pciDeviceIsBehindSwitchLackingACS(virConnectPtr conn, + pciDevice *dev) +{ + pciDevice *parent; + + if (!(parent = pciGetParentDevice(conn, dev))) { + pciReportError(conn, VIR_ERR_NO_SUPPORT, + _("Failed to find parent device for %s"), + dev->name); + return -1; + } + + /* XXX we should rather fail when we can't find device's parent and + * stop the loop when we get to root instead of just stopping when no + * parent can be found + */ + do { + pciDevice *tmp; + int acs; + + acs = pciDeviceDownstreamLacksACS(conn, parent); + + if (acs) { + pciFreeDevice(conn, parent); + if (acs < 0) + return -1; + else + return 1; + } + + tmp = parent; + parent = pciGetParentDevice(conn, parent); + pciFreeDevice(conn, tmp); + } while (parent); + + return 0; +} + +int pciDeviceIsAssignable(virConnectPtr conn, + pciDevice *dev) +{ + int ret; + + /* XXX This could be a great place to actually check that a non-managed + * device isn't in use, e.g. by checking that device is either un-bound + * or bound to a stub driver. + */ + + ret = pciDeviceIsBehindSwitchLackingACS(conn, dev); + if (ret < 0) + return 0; + + if (ret) { + if (dev->permissive) { + VIR_DEBUG("%s %s: is in permissive mode; allowing it to be assigned", + dev->id, dev->name); + } + else { + pciReportError(conn, VIR_ERR_NO_SUPPORT, + _("Device %s is behind a switch lacking ACS and " + "cannot be assigned"), + dev->name); + return 0; + } + } + + return 1; +} diff --git a/src/util/pci.h b/src/util/pci.h index 1f0b9d2..a56f064 100644 --- a/src/util/pci.h +++ b/src/util/pci.h @@ -78,4 +78,11 @@ int pciDeviceFileIterate(virConnectPtr conn, pciDeviceFileActor actor, void *opaque); +int pciDeviceIsAssignable(virConnectPtr conn, + pciDevice *dev); + +void pciDeviceSetPermissive(pciDevice *dev, + unsigned managed); +unsigned pciDeviceGetPermissive(pciDevice *dev); + #endif /* __VIR_PCI_H__ */ -- 1.6.6.rc4

On Mon, Dec 21, 2009 at 02:27:17PM +0100, Jiri Denemark wrote:
New pciDeviceIsAssignable() function for checking whether a given PCI device can be assigned to a guest was added. Currently it only checks for ACS being enabled on all PCIe switches between root and the PCI device. In the future, it could be the right place to check whether a device is unbound or bound to a stub driver.
Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- src/libvirt_private.syms | 3 + src/util/pci.c | 147 ++++++++++++++++++++++++++++++++++++++++++++++ src/util/pci.h | 7 ++ 3 files changed, 157 insertions(+), 0 deletions(-)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index f90f269..7073e0c 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -438,6 +438,9 @@ pciDeviceListGet; pciDeviceListLock; pciDeviceListUnlock; pciDeviceListSteal; +pciDeviceSetPermissive; +pciDeviceGetPermissive; +pciDeviceIsAssignable;
# processinfo.h diff --git a/src/util/pci.c b/src/util/pci.c index 1e003c2..113299e 100644 --- a/src/util/pci.c +++ b/src/util/pci.c @@ -64,6 +64,7 @@ struct _pciDevice { unsigned has_flr : 1; unsigned has_pm_reset : 1; unsigned managed : 1; + unsigned permissive : 1; };
Rather than the generic term 'permissive' I think we should be explicitly referring to the 'acs' feature. So perhaps call it 'strict_acs_check', or something along those lines. Likewise for the method names. Regards, Daniel -- |: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://ovirt.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

Hi Jiri, On Mon, 2009-12-21 at 14:27 +0100, Jiri Denemark wrote:
@@ -140,6 +141,14 @@ struct _pciDeviceList { #define PCI_AF_CAP 0x3 /* Advanced features capabilities */ #define PCI_AF_CAP_FLR 0x2 /* Function Level Reset */
+#define PCI_EXP_FLAGS 0x2 +#define PCI_EXP_FLAGS_TYPE 0x00f0 +#define PCI_EXP_TYPE_DOWNSTREAM 0x6 + +#define PCI_EXT_CAP_ID_ACS 0x000d +#define PCI_EXT_ACS_CTRL 0x06 +#define PCI_EXT_CAP_ACS_ENABLED 0x1d
Just a very minor thing, but it'd be nice if you could reference the specification and section number etc. in comments like the rest of the macros do Thanks, Mark.

When it is set to 'yes', some check whether a device is safe to be assigned to a guest will be weakened. Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- docs/schemas/domain.rng | 8 ++++++++ src/conf/domain_conf.c | 14 ++++++++++++-- src/conf/domain_conf.h | 1 + 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/docs/schemas/domain.rng b/docs/schemas/domain.rng index 566b117..02116c3 100644 --- a/docs/schemas/domain.rng +++ b/docs/schemas/domain.rng @@ -1129,6 +1129,14 @@ </choice> </attribute> </optional> + <optional> + <attribute name="permissive"> + <choice> + <value>yes</value> + <value>no</value> + </choice> + </attribute> + </optional> <group> <element name="source"> <choice> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 0b4fe8b..642744d 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -2422,6 +2422,7 @@ virDomainHostdevDefParseXML(virConnectPtr conn, xmlNodePtr cur; virDomainHostdevDefPtr def; char *mode, *type = NULL, *managed = NULL; + char *permissive = NULL; if (VIR_ALLOC(def) < 0) { virReportOOMError(conn); @@ -2460,6 +2461,13 @@ virDomainHostdevDefParseXML(virConnectPtr conn, VIR_FREE(managed); } + permissive = virXMLPropString(node, "permissive"); + if (permissive != NULL) { + if (STREQ(permissive, "yes")) + def->permissive = 1; + VIR_FREE(permissive); + } + cur = node->children; while (cur != NULL) { if (cur->type == XML_ELEMENT_NODE) { @@ -4497,8 +4505,10 @@ virDomainHostdevDefFormat(virConnectPtr conn, return -1; } - virBufferVSprintf(buf, " <hostdev mode='%s' type='%s' managed='%s'>\n", - mode, type, def->managed ? "yes" : "no"); + virBufferVSprintf(buf, " <hostdev mode='%s' type='%s' managed='%s' permissive='%s'>\n", + mode, type, + def->managed ? "yes" : "no", + def->permissive ? "yes" : "no"); virBufferAddLit(buf, " <source>\n"); if (def->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_USB) { diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index a807e9d..e8607b5 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -431,6 +431,7 @@ typedef virDomainHostdevDef *virDomainHostdevDefPtr; struct _virDomainHostdevDef { int mode; /* enum virDomainHostdevMode */ unsigned int managed : 1; + unsigned int permissive : 1; union { struct { int type; /* enum virDomainHostdevBusType */ -- 1.6.6.rc4

On Mon, Dec 21, 2009 at 02:27:18PM +0100, Jiri Denemark wrote:
When it is set to 'yes', some check whether a device is safe to be assigned to a guest will be weakened.
I think this is a rather ill-defined concept to be adding the guest XML, since there are many checks done for assignment, and this is only impacting one of them. Whether to allow a device beind a non-ACS enable switch to be used in a VM has implications beyond just the one VM it is assigned to. Thus is strikes me that the decision as to whether to allow use of devices behind non-ACS switches should be a host level attribute. eg a config item in the /etc/qemu/qemu.conf file Regards, Daniel -- |: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://ovirt.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

When it is set to 'yes', some check whether a device is safe to be assigned to a guest will be weakened.
I think this is a rather ill-defined concept to be adding the guest XML, since there are many checks done for assignment, and this is only impacting one of them. Whether to allow a device beind a non-ACS enable switch to be used in a VM has implications beyond just the one VM it is assigned to. Thus is strikes me that the decision as to whether to allow use of devices behind non-ACS switches should be a host level attribute. eg a config item in the /etc/qemu/qemu.conf file
This was the idea behind: What remains to be implemented is the logic of the whitelist that you mention in comments #2 and #3. To be honest, I don't love this idea of the whitelist; not only will we have to maintain some kind of table, we will need to make sure the table is up-to-date every time new hardware comes out. It also breaks the security of the setup without letting the user know about (because it is on a magic whitelist that the user probably won't know anything about). I have an alternate proposal. What if we added a new <permissive/> tag to the libvirt XML for device assignment? In the normal case, we wouldn't allow *any* passthrough of devices behind non-ACS switches. However, if the user knows what they are doing, and they want to take this risk, they can add the <permissive/> tag to the XML, in which case it would allow the assignment to happen. This can even be used pretty successfully in virt-manager; it just needs to catch the appropriate exception from the first assignment, pop-up "This is dangerous because of non-ACS, blah, blah. Are you sure?", and then re-do the assignment with the <permissive/> tag. I just changed it to an attribute as I think it fits better. Jirka

On Mon, Dec 21, 2009 at 08:43:14PM +0100, Jiri Denemark wrote:
When it is set to 'yes', some check whether a device is safe to be assigned to a guest will be weakened.
I think this is a rather ill-defined concept to be adding the guest XML, since there are many checks done for assignment, and this is only impacting one of them. Whether to allow a device beind a non-ACS enable switch to be used in a VM has implications beyond just the one VM it is assigned to. Thus is strikes me that the decision as to whether to allow use of devices behind non-ACS switches should be a host level attribute. eg a config item in the /etc/qemu/qemu.conf file
This was the idea behind:
What remains to be implemented is the logic of the whitelist that you mention in comments #2 and #3. To be honest, I don't love this idea of the whitelist; not only will we have to maintain some kind of table, we will need to make sure the table is up-to-date every time new hardware comes out. It also breaks the security of the setup without letting the user know about (because it is on a magic whitelist that the user probably won't know anything about).
I have an alternate proposal. What if we added a new <permissive/> tag to the libvirt XML for device assignment? In the normal case, we wouldn't allow *any* passthrough of devices behind non-ACS switches. However, if the user knows what they are doing, and they want to take this risk, they can add the <permissive/> tag to the XML, in which case it would allow the assignment to happen. This can even be used pretty successfully in virt-manager; it just needs to catch the appropriate exception from the first assignment, pop-up "This is dangerous because of non-ACS, blah, blah. Are you sure?", and then re-do the assignment with the <permissive/> tag.
I think this is a pretty horrible user experiance, since the first thought will be 'what on earth is ACS?', closely followed by clicking 'OK'. There are also already many other restrictions on PCI device assignment, such as the availabilty of FLR, availablity of PM-reset, even VT-d itself, and we don't have user facing tunables to turn off those checks. These are all low-level hardware details that users really aren't equipped to understand - even most of us developers don't really understand them. I don't much like the idea of having to maintain a whitelist of devices that are safe to use, but pushing the problem off to the end user via a config option/confirm dialog is just avoiding the issue. Regards, Daniel -- |: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://ovirt.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

On 12/21/2009 02:57 PM, Daniel P. Berrange wrote:
On Mon, Dec 21, 2009 at 08:43:14PM +0100, Jiri Denemark wrote:
When it is set to 'yes', some check whether a device is safe to be assigned to a guest will be weakened.
I think this is a rather ill-defined concept to be adding the guest XML, since there are many checks done for assignment, and this is only impacting one of them. Whether to allow a device beind a non-ACS enable switch to be used in a VM has implications beyond just the one VM it is assigned to. Thus is strikes me that the decision as to whether to allow use of devices behind non-ACS switches should be a host level attribute. eg a config item in the /etc/qemu/qemu.conf file
This was the idea behind:
What remains to be implemented is the logic of the whitelist that you mention in comments #2 and #3. To be honest, I don't love this idea of the whitelist; not only will we have to maintain some kind of table, we will need to make sure the table is up-to-date every time new hardware comes out. It also breaks the security of the setup without letting the user know about (because it is on a magic whitelist that the user probably won't know anything about).
I have an alternate proposal. What if we added a new <permissive/> tag to the libvirt XML for device assignment? In the normal case, we wouldn't allow *any* passthrough of devices behind non-ACS switches. However, if the user knows what they are doing, and they want to take this risk, they can add the <permissive/> tag to the XML, in which case it would allow the assignment to happen. This can even be used pretty successfully in virt-manager; it just needs to catch the appropriate exception from the first assignment, pop-up "This is dangerous because of non-ACS, blah, blah. Are you sure?", and then re-do the assignment with the <permissive/> tag.
I think this is a pretty horrible user experiance, since the first thought will be 'what on earth is ACS?', closely followed by clicking 'OK'. There are also already many other restrictions on PCI device assignment, such as the availabilty of FLR, availablity of PM-reset, even VT-d itself, and we don't have user facing tunables to turn off those checks. These are all low-level hardware details that users really aren't equipped to understand - even most of us developers don't really understand them.
I don't much like the idea of having to maintain a whitelist of devices that are safe to use, but pushing the problem off to the end user via a config option/confirm dialog is just avoiding the issue.
I know I'm very late in responding to this, but I thought I'd expand a bit about the problem (and why maintaining a whitelist doesn't solve the problem either). Any PCI devices that are behind non-ACS bridges are not safe to assign to guests (since they can initiate peer-to-peer DMA transfers to other devices, theoretically subverting IOMMU isolation). Unfortunately, a number of multifunction NICs operate in exactly this manner; they are non-ACS capable, meaning that if you assigned different functions on the card to different guests, they could subvert each other. Doubly unfortunate is that these types of NICs are *exactly* the types of NICs that you would want to do assign to guests. A whitelist doesn't really help here. While you can whitelist these multi-function NICs to allow them to be passed-through, you are also lowering the security of the system without telling the administrator. That's why I suggested the additional <permissive> (or whatever) flag; I didn't want to lower overall security of the system without giving the administrator a heads-up. I agree that users/administrators probably have very little idea of what ACS is, but I don't have any better ideas at the moment. We are stuck between the rock of lowered security and the hard place of poor user experience. If anyone has better ideas about how to go about this, I'd be happy to hear them. -- Chris Lalancette

On Mon, Dec 21, 2009 at 07:09:08PM +0000, Daniel P. Berrange wrote:
On Mon, Dec 21, 2009 at 02:27:18PM +0100, Jiri Denemark wrote:
When it is set to 'yes', some check whether a device is safe to be assigned to a guest will be weakened.
I think this is a rather ill-defined concept to be adding the guest XML, since there are many checks done for assignment, and this is only impacting one of them. Whether to allow a device beind a non-ACS enable switch to be used in a VM has implications beyond just the one VM it is assigned to. Thus is strikes me that the decision as to whether to allow use of devices behind non-ACS switches should be a host level attribute. eg a config item in the /etc/qemu/qemu.conf file
Agreed, it's a Host PCI implementation issue, and this should be delt with in a host wide manner I think, a daemon setting, with the defaulting being on the safe side sounds the best to me. Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

On Tue, Dec 22, 2009 at 10:51:15AM +0100, Daniel Veillard wrote:
On Mon, Dec 21, 2009 at 07:09:08PM +0000, Daniel P. Berrange wrote:
On Mon, Dec 21, 2009 at 02:27:18PM +0100, Jiri Denemark wrote:
When it is set to 'yes', some check whether a device is safe to be assigned to a guest will be weakened.
I think this is a rather ill-defined concept to be adding the guest XML, since there are many checks done for assignment, and this is only impacting one of them. Whether to allow a device beind a non-ACS enable switch to be used in a VM has implications beyond just the one VM it is assigned to. Thus is strikes me that the decision as to whether to allow use of devices behind non-ACS switches should be a host level attribute. eg a config item in the /etc/qemu/qemu.conf file
Agreed, it's a Host PCI implementation issue, and this should be delt with in a host wide manner I think, a daemon setting, with the defaulting being on the safe side sounds the best to me.
I'm having second thoughts about even a host daemon setting. I really think we ought to be doing full checking ourselves, even with whitelists if needed. Daniel -- |: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://ovirt.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

On Tue, Dec 22, 2009 at 10:04:16 +0000, Daniel P. Berrange wrote:
On Tue, Dec 22, 2009 at 10:51:15AM +0100, Daniel Veillard wrote:
On Mon, Dec 21, 2009 at 07:09:08PM +0000, Daniel P. Berrange wrote:
On Mon, Dec 21, 2009 at 02:27:18PM +0100, Jiri Denemark wrote:
When it is set to 'yes', some check whether a device is safe to be assigned to a guest will be weakened.
I think this is a rather ill-defined concept to be adding the guest XML, since there are many checks done for assignment, and this is only impacting one of them. Whether to allow a device beind a non-ACS enable switch to be used in a VM has implications beyond just the one VM it is assigned to. Thus is strikes me that the decision as to whether to allow use of devices behind non-ACS switches should be a host level attribute. eg a config item in the /etc/qemu/qemu.conf file
Agreed, it's a Host PCI implementation issue, and this should be delt with in a host wide manner I think, a daemon setting, with the defaulting being on the safe side sounds the best to me.
I'm having second thoughts about even a host daemon setting. I really think we ought to be doing full checking ourselves, even with whitelists if needed.
OK, I understand. On the other hand, maintaining a whitelist of devices which do not communicate directly with other devices is not a good idea. I guess a good compromise between the two options could be a user configurable whitelist. That is, instead of telling libvirt a PCI device can be really assigned in guest's XML, one could tell this in one place for all such PCI devices and it would affect all guests on that machine. What is the best place for such a list? Perhaps that was what you meant by the config item in /etc/qemu/qemu.conf :-) Jirka

Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- src/qemu/qemu_driver.c | 9 ++++++++- 1 files changed, 8 insertions(+), 1 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 60dea9c..0a8744f 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -1746,6 +1746,7 @@ qemuGetPciHostDeviceList(virConnectPtr conn, } pciDeviceSetManaged(dev, hostdev->managed); + pciDeviceSetPermissive(dev, hostdev->permissive); } return list; @@ -1810,6 +1811,9 @@ qemuPrepareHostDevices(virConnectPtr conn, for (i = 0; i < pciDeviceListCount(pcidevs); i++) { pciDevice *dev = pciDeviceListGet(pcidevs, i); + if (!pciDeviceIsAssignable(conn, dev)) + goto cleanup; + if (pciDeviceGetManaged(dev) && pciDettachDevice(conn, dev) < 0) goto cleanup; @@ -5293,7 +5297,10 @@ static int qemudDomainAttachHostPciDevice(virConnectPtr conn, if (!pci) return -1; - if ((hostdev->managed && pciDettachDevice(conn, pci) < 0) || + pciDeviceSetPermissive(pci, hostdev->permissive); + + if (!pciDeviceIsAssignable(conn, pci) || + (hostdev->managed && pciDettachDevice(conn, pci) < 0) || pciResetDevice(conn, pci, driver->activePciHostdevs) < 0) { pciFreeDevice(conn, pci); return -1; -- 1.6.6.rc4
participants (5)
-
Chris Lalancette
-
Daniel P. Berrange
-
Daniel Veillard
-
Jiri Denemark
-
Mark McLoughlin