[libvirt] [PATCH] qemu: Restore the original states of PCI device when restarting daemon

To support "managed" mode of host PCI device, we record the original states (unbind_from_stub, remove_slot, and reprobe) so that could reattach the device to host with original driver. But there is no XML for theses attrs, and thus after daemon is restarted, we lose the original states. It's easy to reproduce: 1) virsh start domain 2) virsh attach-device dom hostpci.xml (in 'managed' mode) 3) service libvirtd restart 4) virsh destroy domain You will see the device won't be bound to the original driver if there was one. This patch is to solve the problem by introducing internal XML (won't be dumped to user, only dumped to status XML). The XML is: <origstates> <unbind/> <remove_slot/> <reprobe/> </origstates> A new struct "virDomainHostdevOrigStates" is introduced for the XML, and the according members are updated when preparing the PCI device. And function "qemuUpdateActivePciHostdevs" is modified to honor the original states. Use of qemuGetPciHostDeviceList is removed in function "qemuUpdateActivePciHostdevs", and the "managed" value of the device config is honored by the change. This fixes another problem alongside: qemuGetPciHostDeviceList set the device as "managed" force regardless of whether the device is configured as "managed='yes'" or not in XML, which is not right. --- src/conf/domain_conf.c | 79 +++++++++++++++++++++++++++++++++++++++++----- src/conf/domain_conf.h | 26 +++++++++++++++ src/libvirt_private.syms | 6 +++ src/qemu/qemu_hostdev.c | 79 ++++++++++++++++++++++++++++++++++++---------- src/util/pci.c | 36 +++++++++++++++++++++ src/util/pci.h | 9 +++++ 6 files changed, 210 insertions(+), 25 deletions(-) diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 5959593..4ccf381 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -59,8 +59,12 @@ verify(VIR_DOMAIN_VIRT_LAST <= 32); /* Private flags used internally by virDomainSaveStatus and * virDomainLoadStatus. */ typedef enum { - VIR_DOMAIN_XML_INTERNAL_STATUS = (1<<16), /* dump internal domain status information */ - VIR_DOMAIN_XML_INTERNAL_ACTUAL_NET = (1<<17), /* dump/parse <actual> element */ + /* dump internal domain status information */ + VIR_DOMAIN_XML_INTERNAL_STATUS = (1<<16), + /* dump/parse <actual> element */ + VIR_DOMAIN_XML_INTERNAL_ACTUAL_NET = (1<<17), + /* dump/parse original states of host PCI device */ + VIR_DOMAIN_XML_INTERNAL_PCI_ORIG_STATES = (1<<18), } virDomainXMLInternalFlags; VIR_ENUM_IMPL(virDomainTaint, VIR_DOMAIN_TAINT_LAST, @@ -5437,13 +5441,47 @@ out: return ret; } +/* The internal XML for host PCI device's original states: + * + * <origstates> + * <unbind/> + * <removeslot/> + * <reprobe/> + * </origstates> + */ +static int +virDomainHostdevSubsysPciOrigStatesDefParseXML(const xmlNodePtr node, + virDomainHostdevOrigStatesPtr def) +{ + xmlNodePtr cur; + cur = node->children; + + while (cur != NULL) { + if (cur->type == XML_ELEMENT_NODE) { + if (xmlStrEqual(cur->name, BAD_CAST "unbind")) { + def->states.pci.unbind_from_stub = 1; + } else if (xmlStrEqual(cur->name, BAD_CAST "removeslot")) { + def->states.pci.remove_slot = 1; + } else if (xmlStrEqual(cur->name, BAD_CAST "reprobe")) { + def->states.pci.reprobe = 1; + } else { + virDomainReportError(VIR_ERR_INTERNAL_ERROR, + _("unsupported element '%s' of 'origstates'"), + cur->name); + return -1; + } + } + cur = cur->next; + } + + return 0; +} static int virDomainHostdevSubsysPciDefParseXML(const xmlNodePtr node, virDomainHostdevDefPtr def, unsigned int flags) { - int ret = -1; xmlNodePtr cur; @@ -5470,6 +5508,11 @@ virDomainHostdevSubsysPciDefParseXML(const xmlNodePtr node, goto out; } def->info.type = VIR_DOMAIN_DEVICE_ADDRESS_TYPE_PCI; + } else if ((flags & VIR_DOMAIN_XML_INTERNAL_PCI_ORIG_STATES) && + xmlStrEqual(cur->name, BAD_CAST "origstates")) { + virDomainHostdevOrigStatesPtr states = &def->origstates; + if (virDomainHostdevSubsysPciOrigStatesDefParseXML(cur, states) < 0) + goto out; } else { virDomainReportError(VIR_ERR_INTERNAL_ERROR, _("unknown pci source type '%s'"), @@ -10445,7 +10488,9 @@ virDomainHostdevDefFormat(virBufferPtr buf, } type = virDomainHostdevSubsysTypeToString(def->source.subsys.type); - if (!type || (def->source.subsys.type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_USB && def->source.subsys.type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) ) { + if (!type || + (def->source.subsys.type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_USB && + def->source.subsys.type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI)) { virDomainReportError(VIR_ERR_INTERNAL_ERROR, _("unexpected hostdev type %d"), def->source.subsys.type); @@ -10474,6 +10519,20 @@ virDomainHostdevDefFormat(virBufferPtr buf, def->source.subsys.u.pci.bus, def->source.subsys.u.pci.slot, def->source.subsys.u.pci.function); + + if ((flags & VIR_DOMAIN_XML_INTERNAL_PCI_ORIG_STATES) && + (def->origstates.states.pci.unbind_from_stub || + def->origstates.states.pci.remove_slot || + def->origstates.states.pci.reprobe)) { + virBufferAddLit(buf, " <origstates>\n"); + if (def->origstates.states.pci.unbind_from_stub) + virBufferAddLit(buf, " <unbind/>\n"); + if (def->origstates.states.pci.remove_slot) + virBufferAddLit(buf, " <removeslot/>\n"); + if (def->origstates.states.pci.reprobe) + virBufferAddLit(buf, " <reprobe/>\n"); + virBufferAddLit(buf, " </origstates>\n"); + } } virBufferAddLit(buf, " </source>\n"); @@ -10554,7 +10613,8 @@ virDomainHubDefFormat(virBufferPtr buf, VIR_DOMAIN_XML_UPDATE_CPU) verify(((VIR_DOMAIN_XML_INTERNAL_STATUS | - VIR_DOMAIN_XML_INTERNAL_ACTUAL_NET) + VIR_DOMAIN_XML_INTERNAL_ACTUAL_NET | + VIR_DOMAIN_XML_INTERNAL_PCI_ORIG_STATES) & DUMPXML_FLAGS) == 0); /* This internal version can accept VIR_DOMAIN_XML_INTERNAL_*, @@ -10574,7 +10634,8 @@ virDomainDefFormatInternal(virDomainDefPtr def, virCheckFlags(DUMPXML_FLAGS | VIR_DOMAIN_XML_INTERNAL_STATUS | - VIR_DOMAIN_XML_INTERNAL_ACTUAL_NET, + VIR_DOMAIN_XML_INTERNAL_ACTUAL_NET | + VIR_DOMAIN_XML_INTERNAL_PCI_ORIG_STATES, -1); if (!(type = virDomainVirtTypeToString(def->virtType))) { @@ -11150,7 +11211,8 @@ int virDomainSaveStatus(virCapsPtr caps, { unsigned int flags = (VIR_DOMAIN_XML_SECURE | VIR_DOMAIN_XML_INTERNAL_STATUS | - VIR_DOMAIN_XML_INTERNAL_ACTUAL_NET); + VIR_DOMAIN_XML_INTERNAL_ACTUAL_NET | + VIR_DOMAIN_XML_INTERNAL_PCI_ORIG_STATES); int ret = -1; char *xml; @@ -11249,7 +11311,8 @@ static virDomainObjPtr virDomainLoadStatus(virCapsPtr caps, if (!(obj = virDomainObjParseFile(caps, statusFile, expectedVirtTypes, VIR_DOMAIN_XML_INTERNAL_STATUS | - VIR_DOMAIN_XML_INTERNAL_ACTUAL_NET))) + VIR_DOMAIN_XML_INTERNAL_ACTUAL_NET | + VIR_DOMAIN_XML_INTERNAL_PCI_ORIG_STATES))) goto error; virUUIDFormat(obj->def->uuid, uuidstr); diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 2119b5a..c5eda75 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -153,6 +153,31 @@ struct _virDomainDeviceInfo { } master; }; +typedef struct _virDomainHostdevOrigStates virDomainHostdevOrigStates; +typedef virDomainHostdevOrigStates *virDomainHostdevOrigStatesPtr; +struct _virDomainHostdevOrigStates { + union { + struct { + /* Does the device need to ubind from stub when + * reattaching to host? + */ + unsigned int unbind_from_stub : 1; + + /* Does it need to use remove_slot when reattaching + * the device to host? + */ + unsigned int remove_slot : 1; + + /* Does it need to reprobe driver for the device when + * reattaching to host? + */ + unsigned int reprobe :1; + } pci; + + /* Perhaps 'usb' in future */ + } states; +}; + typedef struct _virDomainLeaseDef virDomainLeaseDef; typedef virDomainLeaseDef *virDomainLeaseDefPtr; struct _virDomainLeaseDef { @@ -995,6 +1020,7 @@ struct _virDomainHostdevDef { int bootIndex; virDomainDeviceInfo info; /* Guest address */ int rombar; /* enum virDomainPciRombarMode */ + virDomainHostdevOrigStates origstates; }; enum virDomainRedirdevBus { diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index dedbd16..b85dabf 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -882,6 +882,9 @@ pciDettachDevice; pciDeviceFileIterate; pciDeviceGetManaged; pciDeviceGetName; +pciDeviceGetRemoveSlot; +pciDeviceGetReprobe; +pciDeviceGetUnbindFromStub; pciDeviceGetUsedBy; pciDeviceIsAssignable; pciDeviceIsVirtualFunction; @@ -896,6 +899,9 @@ pciDeviceListSteal; pciDeviceNetName; pciDeviceReAttachInit; pciDeviceSetManaged; +pciDeviceSetRemoveSlot; +pciDeviceSetReprobe; +pciDeviceSetUnbindFromStub; pciDeviceSetUsedBy; pciFreeDevice; pciGetDevice; diff --git a/src/qemu/qemu_hostdev.c b/src/qemu/qemu_hostdev.c index 1fb373e..9137388 100644 --- a/src/qemu/qemu_hostdev.c +++ b/src/qemu/qemu_hostdev.c @@ -116,33 +116,46 @@ qemuGetActivePciHostDeviceList(struct qemud_driver *driver, int qemuUpdateActivePciHostdevs(struct qemud_driver *driver, virDomainDefPtr def) { - pciDeviceList *pcidevs; - int ret = -1; + virDomainHostdevDefPtr hostdev = NULL; + int i; if (!def->nhostdevs) return 0; - if (!(pcidevs = qemuGetPciHostDeviceList(def->hostdevs, def->nhostdevs))) - return -1; + for (i = 0; i < def->nhostdevs; i++) { + pciDevice *dev = NULL; + hostdev = def->hostdevs[i]; + + if (hostdev->mode != VIR_DOMAIN_HOSTDEV_MODE_SUBSYS) + continue; + if (hostdev->source.subsys.type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) + continue; + + dev = pciGetDevice(hostdev->source.subsys.u.pci.domain, + hostdev->source.subsys.u.pci.bus, + hostdev->source.subsys.u.pci.slot, + hostdev->source.subsys.u.pci.function); + + if (!dev) + return -1; + + pciDeviceSetManaged(dev, hostdev->managed); + pciDeviceSetUsedBy(dev, def->name); + + /* Setup the original states for the PCI device */ + pciDeviceSetUnbindFromStub(dev, hostdev->origstates.states.pci.unbind_from_stub); + pciDeviceSetRemoveSlot(dev, hostdev->origstates.states.pci.remove_slot); + pciDeviceSetReprobe(dev, hostdev->origstates.states.pci.reprobe); - while (pciDeviceListCount(pcidevs) > 0) { - pciDevice *dev = pciDeviceListGet(pcidevs, 0); - pciDeviceListSteal(pcidevs, dev); if (pciDeviceListAdd(driver->activePciHostdevs, dev) < 0) { pciFreeDevice(dev); - goto cleanup; + return -1; } } - ret = 0; - -cleanup: - pciDeviceListFree(pcidevs); - return ret; + return 0; } - - int qemuPrepareHostdevPCIDevices(struct qemud_driver *driver, const char *name, virDomainHostdevDefPtr *hostdevs, @@ -155,7 +168,7 @@ int qemuPrepareHostdevPCIDevices(struct qemud_driver *driver, if (!(pcidevs = qemuGetPciHostDeviceList(hostdevs, nhostdevs))) return -1; - /* We have to use 6 loops here. *All* devices must + /* We have to use 7 loops here. *All* devices must * be detached before we reset any of them, because * in some cases you have to reset the whole PCI, * which impacts all devices on it. Also, all devices @@ -232,7 +245,39 @@ int qemuPrepareHostdevPCIDevices(struct qemud_driver *driver, pciDeviceSetUsedBy(activeDev, name); } - /* Loop 6: Now steal all the devices from pcidevs */ + /* Loop 6: Now set the original states for hostdev def */ + for (i = 0; i < nhostdevs; i++) { + pciDevice *dev; + pciDevice *pcidev; + virDomainHostdevDefPtr hostdev = hostdevs[i]; + + if (hostdev->mode != VIR_DOMAIN_HOSTDEV_MODE_SUBSYS) + continue; + if (hostdev->source.subsys.type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) + continue; + + dev = pciGetDevice(hostdev->source.subsys.u.pci.domain, + hostdev->source.subsys.u.pci.bus, + hostdev->source.subsys.u.pci.slot, + hostdev->source.subsys.u.pci.function); + + /* original states "unbind_from_stub", "remove_slot", + * "reprobe" were already set by pciDettachDevice in + * loop 2. + */ + if ((pcidev = pciDeviceListFind(pcidevs, dev))) { + hostdev->origstates.states.pci.unbind_from_stub = + pciDeviceGetUnbindFromStub(pcidev); + hostdev->origstates.states.pci.remove_slot = + pciDeviceGetRemoveSlot(pcidev); + hostdev->origstates.states.pci.reprobe = + pciDeviceGetReprobe(pcidev); + } + + pciFreeDevice(dev); + } + + /* Loop 7: Now steal all the devices from pcidevs */ while (pciDeviceListCount(pcidevs) > 0) { pciDevice *dev = pciDeviceListGet(pcidevs, 0); pciDeviceListSteal(pcidevs, dev); diff --git a/src/util/pci.c b/src/util/pci.c index 33b4b0e..b835979 100644 --- a/src/util/pci.c +++ b/src/util/pci.c @@ -1394,6 +1394,42 @@ unsigned pciDeviceGetManaged(pciDevice *dev) return dev->managed; } +unsigned +pciDeviceGetUnbindFromStub(pciDevice *dev) +{ + return dev->unbind_from_stub; +} + +void +pciDeviceSetUnbindFromStub(pciDevice *dev, unsigned unbind) +{ + dev->unbind_from_stub = !!unbind; +} + +unsigned +pciDeviceGetRemoveSlot(pciDevice *dev) +{ + return dev->remove_slot; +} + +void +pciDeviceSetRemoveSlot(pciDevice *dev, unsigned remove_slot) +{ + dev->remove_slot = !!remove_slot; +} + +unsigned +pciDeviceGetReprobe(pciDevice *dev) +{ + return dev->reprobe; +} + +void +pciDeviceSetReprobe(pciDevice *dev, unsigned reprobe) +{ + dev->reprobe = !!reprobe; +} + void pciDeviceSetUsedBy(pciDevice *dev, const char *name) { diff --git a/src/util/pci.h b/src/util/pci.h index ab29c0b..76e37e3 100644 --- a/src/util/pci.h +++ b/src/util/pci.h @@ -51,6 +51,15 @@ unsigned pciDeviceGetManaged(pciDevice *dev); void pciDeviceSetUsedBy(pciDevice *dev, const char *used_by); const char *pciDeviceGetUsedBy(pciDevice *dev); +unsigned pciDeviceGetUnbindFromStub(pciDevice *dev); +void pciDeviceSetUnbindFromStub(pciDevice *dev, + unsigned unbind); +unsigned pciDeviceGetRemoveSlot(pciDevice *dev); +void pciDeviceSetRemoveSlot(pciDevice *dev, + unsigned remove_slot); +unsigned pciDeviceGetReprobe(pciDevice *dev); +void pciDeviceSetReprobe(pciDevice *dev, + unsigned reprobe); void pciDeviceReAttachInit(pciDevice *dev); pciDeviceList *pciDeviceListNew (void); -- 1.7.6

On 10/20/2011 04:45 AM, Osier Yang wrote:
This patch is to solve the problem by introducing internal XML (won't be dumped to user, only dumped to status XML). The XML is: <origstates> <unbind/> <remove_slot/> <reprobe/> </origstates>
Which element will this be added under?
+++ b/src/conf/domain_conf.c @@ -59,8 +59,12 @@ verify(VIR_DOMAIN_VIRT_LAST<= 32); /* Private flags used internally by virDomainSaveStatus and * virDomainLoadStatus. */ typedef enum { - VIR_DOMAIN_XML_INTERNAL_STATUS = (1<<16), /* dump internal domain status information */ - VIR_DOMAIN_XML_INTERNAL_ACTUAL_NET = (1<<17), /* dump/parse<actual> element */ + /* dump internal domain status information */ + VIR_DOMAIN_XML_INTERNAL_STATUS = (1<<16), + /* dump/parse<actual> element */ + VIR_DOMAIN_XML_INTERNAL_ACTUAL_NET = (1<<17), + /* dump/parse original states of host PCI device */ + VIR_DOMAIN_XML_INTERNAL_PCI_ORIG_STATES = (1<<18),
Is this worth a new bit, or does it fit under the already-existing umbrella of VIR_DOMAIN_XML_INTERNAL_STATUS, since we are treating these three bits as internal status? But we can change that later, if we need.
+++ b/src/conf/domain_conf.h @@ -153,6 +153,31 @@ struct _virDomainDeviceInfo { } master; };
+typedef struct _virDomainHostdevOrigStates virDomainHostdevOrigStates; +typedef virDomainHostdevOrigStates *virDomainHostdevOrigStatesPtr; +struct _virDomainHostdevOrigStates { + union {
Don't you need some sort of discrimination field outside of the union which tells you which part of the union is valid? Typically we use a discriminator such as 'int type; /* enum vir... */'. But until we have something to distinguish from, I guess this works.
+ struct { + /* Does the device need to ubind from stub when
s/ubind/unbind/ ACK with spelling nit fixed. -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org

于 2011年10月29日 07:00, Eric Blake 写道:
On 10/20/2011 04:45 AM, Osier Yang wrote:
This patch is to solve the problem by introducing internal XML (won't be dumped to user, only dumped to status XML). The XML is: <origstates> <unbind/> <remove_slot/> <reprobe/> </origstates>
Oh, I forgot to clarify this, will add when pushing. <hostdev> <source> <origstates>...</origstates> </source> </hostdev> only for PCI device.
Which element will this be added under?
+++ b/src/conf/domain_conf.c @@ -59,8 +59,12 @@ verify(VIR_DOMAIN_VIRT_LAST<= 32); /* Private flags used internally by virDomainSaveStatus and * virDomainLoadStatus. */ typedef enum { - VIR_DOMAIN_XML_INTERNAL_STATUS = (1<<16), /* dump internal domain status information */ - VIR_DOMAIN_XML_INTERNAL_ACTUAL_NET = (1<<17), /* dump/parse<actual> element */ + /* dump internal domain status information */ + VIR_DOMAIN_XML_INTERNAL_STATUS = (1<<16), + /* dump/parse<actual> element */ + VIR_DOMAIN_XML_INTERNAL_ACTUAL_NET = (1<<17), + /* dump/parse original states of host PCI device */ + VIR_DOMAIN_XML_INTERNAL_PCI_ORIG_STATES = (1<<18),
Is this worth a new bit, or does it fit under the already-existing umbrella of VIR_DOMAIN_XML_INTERNAL_STATUS, since we are treating these three bits as internal status? But we can change that later, if we need.
I like a new *_PCI_ORIG_STATES more, as it allows us control the the status XML more precisely, e.g. one only wants to dumpxml PCI_ORIG_STATES, though there is no such requirement currently.
+++ b/src/conf/domain_conf.h @@ -153,6 +153,31 @@ struct _virDomainDeviceInfo { } master; };
+typedef struct _virDomainHostdevOrigStates virDomainHostdevOrigStates; +typedef virDomainHostdevOrigStates *virDomainHostdevOrigStatesPtr; +struct _virDomainHostdevOrigStates { + union {
Don't you need some sort of discrimination field outside of the union which tells you which part of the union is valid? Typically we use a discriminator such as 'int type; /* enum vir... */'. But until we have something to distinguish from, I guess this works.
I considered this when making the patch, there is a "type" in virDomainHostdevDef, and virDomainHostdevOrigstates won't be used standalone. We only need it when doing something on virDomainHostdevDef. So I think it's not necessary.
+ struct { + /* Does the device need to ubind from stub when
s/ubind/unbind/
ACK with spelling nit fixed.
Thanks. I pushed with the spelling fixed, and commit message updated.
participants (2)
-
Eric Blake
-
Osier Yang