On Thu, Mar 21, 2019 at 18:29:01 -0400, Laine Stump wrote:
> The VIR_DOMAIN_EVENT_ID_DEVICE_REMOVED event is sent after qemu has
> responded to a device_del command with a DEVICE_DELETED event. Before
> queuing the event, *some* of the final teardown of the device's
> trappings in libvirt is done, but not *all* of it. As a result, an
> application may receive and process the DEVICE_REMOVED event before
> libvirt has really finished with it.
>
> Usually this doesn't cause a problem, but it can - in the case of the
> bug report referenced below, vdsm is assigning a PCI device to a guest
> with managed='no', using livirt's virNodeDeviceDetachFlags() and
> virNodeDeviceReAttach() APIs. Immediately after receiving a
> DEVICE_REMOVED event from libvirt signalling that the device had been
> successfully unplugged, vdsm would cal virNodeDeviceReAttach() to
> unbind the device from vfio-pci and rebind it to the host driverm but
> because the event was received before libvirt had completely finished
> processing the removal, that device was still on the "activeDevs"
> list, and so virNodeDeviceReAttach() failed.
So between the lines this reads as if qemuDomainRemoveHostDevice is
broken. I agree though that fixing everything systematically is better.
> Experimentation with additional debug logs proved that libvirt would
> always end up dispatching the DEVICE_REMOVED event before it had
> removed the device from activeDevs (with a *much* greater difference
> with managed='yes', since in that case the re-binding of the device
> occurred after queuing the device).
>
> Although the case of hostdev devices is the most extreme (since there
> is so much involved in tearing down the device), *all* device types
> suffer from the same problem - the DEVICE_REMOVED event is queued very
> early in the qemuDomainRemove*Device() function for all of them,
> resulting in a possibility of any application receiving the event
> before libvirt has really finished with the device.
>
> The solution is to save the device's alias (which is the only piece of
> info from the device object that is needed for the event) at the
> beginning of processing the device removal, and then queue the event
> as a final act before returning. Since all of the
> qemuDomainRemove*Device() functions (except
> qemuDomainRemoveChrDevice()) are now called exclusively from
> qemuDomainRemoveDevice() (which selects which of the subordinates to
> call in a switch statement based on the type of device), the shortest
> route to a solution is to doing the saving of alias, and later
> queueing of the event, in the higher level qemuDomainRemoveDevice(),
> and just completely remove the event-related code from all the
> subordinate functions.
>
> The single exception to this, as mentioned before, is
> qemuDomainRemoveChrDevice(), which is still called from somewhere
> other than qemuDomainRemoveDevice() (and has a separate arg used to
> trigger different behavior when the chr device has targetType ==
> GUESTFWD), so it must keep its original behavior intact, and must be
> treated differently by qemuDomainRemoveDevice() (similar to the way
> that qemuDomainDetachDeviceLive() treats chr and lease devices
> differently from all the others).
>
> Resolves:
https://bugzilla.redhat.com/1658198
>
> Signed-off-by: Laine Stump <laine(a)laine.org>
> ---
> src/qemu/qemu_hotplug.c | 154 ++++++++++++++++++++--------------------
> 1 file changed, 78 insertions(+), 76 deletions(-)
>
> diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c
> index de7a7a2c95..43cc3a314d 100644
> --- a/src/qemu/qemu_hotplug.c
> +++ b/src/qemu/qemu_hotplug.c
> @@ -4501,7 +4501,6 @@ qemuDomainRemoveDiskDevice(virQEMUDriverPtr driver,
> {
> qemuHotplugDiskSourceDataPtr diskbackend = NULL;
> virDomainDeviceDef dev;
> - virObjectEventPtr event;
> size_t i;
> qemuDomainObjPrivatePtr priv = vm->privateData;
> int ret = -1;
> @@ -4529,9 +4528,6 @@ qemuDomainRemoveDiskDevice(virQEMUDriverPtr driver,
>
> virDomainAuditDisk(vm, disk->src, NULL, "detach", true);
>
> - event = virDomainEventDeviceRemovedNewFromObj(vm, disk->info.alias);
> - virObjectEventStateQueue(driver->domainEventState, event);
> -
> qemuDomainReleaseDeviceAddress(vm, &disk->info,
virDomainDiskGetSource(disk));
>
> /* tear down disk security access */
> @@ -4555,19 +4551,14 @@ qemuDomainRemoveDiskDevice(virQEMUDriverPtr driver,
>
>
> static int
> -qemuDomainRemoveControllerDevice(virQEMUDriverPtr driver,
> - virDomainObjPtr vm,
> +qemuDomainRemoveControllerDevice(virDomainObjPtr vm,
> virDomainControllerDefPtr controller)
> {
> - virObjectEventPtr event;
> size_t i;
>
> VIR_DEBUG("Removing controller %s from domain %p %s",
> controller->info.alias, vm, vm->def->name);
>
> - event = virDomainEventDeviceRemovedNewFromObj(vm, controller->info.alias);
> - virObjectEventStateQueue(driver->domainEventState, event);
> -
> for (i = 0; i < vm->def->ncontrollers; i++) {
> if (vm->def->controllers[i] == controller) {
> virDomainControllerRemove(vm->def, i);
> @@ -4589,7 +4580,6 @@ qemuDomainRemoveMemoryDevice(virQEMUDriverPtr driver,
> qemuDomainObjPrivatePtr priv = vm->privateData;
> unsigned long long oldmem = virDomainDefGetMemoryTotal(vm->def);
> unsigned long long newmem = oldmem - mem->size;
> - virObjectEventPtr event;
> char *backendAlias = NULL;
> int rc;
> int idx;
> @@ -4611,9 +4601,6 @@ qemuDomainRemoveMemoryDevice(virQEMUDriverPtr driver,
> if (rc < 0)
> return -1;
>
> - event = virDomainEventDeviceRemovedNewFromObj(vm, mem->info.alias);
> - virObjectEventStateQueue(driver->domainEventState, event);
> -
> if ((idx = virDomainMemoryFindByDef(vm->def, mem)) >= 0)
> virDomainMemoryRemove(vm->def, idx);
>
> @@ -4693,7 +4680,6 @@ qemuDomainRemoveHostDevice(virQEMUDriverPtr driver,
> {
> virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
> virDomainNetDefPtr net = NULL;
> - virObjectEventPtr event;
> size_t i;
> int ret = -1;
> qemuDomainObjPrivatePtr priv = vm->privateData;
> @@ -4737,9 +4723,6 @@ qemuDomainRemoveHostDevice(virQEMUDriverPtr driver,
> goto cleanup;
> }
>
> - event = virDomainEventDeviceRemovedNewFromObj(vm, hostdev->info->alias);
> - virObjectEventStateQueue(driver->domainEventState, event);
> -
> if (hostdev->parent.type == VIR_DOMAIN_DEVICE_NET) {
> net =
hostdev->parent.data.net;
>
> @@ -4818,7 +4801,6 @@ qemuDomainRemoveNetDevice(virQEMUDriverPtr driver,
> {
> virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
> qemuDomainObjPrivatePtr priv = vm->privateData;
> - virObjectEventPtr event;
> char *hostnet_name = NULL;
> char *charDevAlias = NULL;
> size_t i;
> @@ -4863,9 +4845,6 @@ qemuDomainRemoveNetDevice(virQEMUDriverPtr driver,
>
> virDomainAuditNet(vm, net, NULL, "detach", true);
>
> - event = virDomainEventDeviceRemovedNewFromObj(vm, net->info.alias);
> - virObjectEventStateQueue(driver->domainEventState, event);
> -
> for (i = 0; i < vm->def->nnets; i++) {
> if (vm->def->nets[i] == net) {
> virDomainNetRemove(vm->def, i);
> @@ -4948,11 +4927,20 @@ qemuDomainRemoveChrDevice(virQEMUDriverPtr driver,
> if (qemuDomainNamespaceTeardownChardev(vm, chr) < 0)
> VIR_WARN("Unable to remove chr device from /dev");
>
> + qemuDomainReleaseDeviceAddress(vm, &chr->info, NULL);
> + qemuDomainChrRemove(vm->def, chr);
> +
> + /* NB: all other qemuDomainRemove*Device() functions omit the
> + * sending of the DEVICE_REMOVED event, because they are *only*
> + * called from qemuDomainRemoveDevice(), and that function sends
> + * the DEVICE_REMOVED event for them, this function is different -
> + * it is called from other places than just
> + * qemuDomainRemoveDevice(), so it must send the DEVICE_REMOVED
> + * event itself.
> + */
> event = virDomainEventDeviceRemovedNewFromObj(vm, chr->info.alias);
> virObjectEventStateQueue(driver->domainEventState, event);
>
> - qemuDomainReleaseDeviceAddress(vm, &chr->info, NULL);
> - qemuDomainChrRemove(vm->def, chr);
> virDomainChrDefFree(chr);
> ret = 0;
>
> @@ -4967,7 +4955,6 @@ qemuDomainRemoveRNGDevice(virQEMUDriverPtr driver,
> virDomainObjPtr vm,
> virDomainRNGDefPtr rng)
> {
> - virObjectEventPtr event;
> char *charAlias = NULL;
> char *objAlias = NULL;
> qemuDomainObjPrivatePtr priv = vm->privateData;
> @@ -5016,9 +5003,6 @@ qemuDomainRemoveRNGDevice(virQEMUDriverPtr driver,
> if (qemuDomainNamespaceTeardownRNG(vm, rng) < 0)
> VIR_WARN("Unable to remove RNG device from /dev");
>
> - event = virDomainEventDeviceRemovedNewFromObj(vm, rng->info.alias);
> - virObjectEventStateQueue(driver->domainEventState, event);
> -
> if ((idx = virDomainRNGFind(vm->def, rng)) >= 0)
> virDomainRNGRemove(vm->def, idx);
> qemuDomainReleaseDeviceAddress(vm, &rng->info, NULL);
> @@ -5043,7 +5027,6 @@ qemuDomainRemoveShmemDevice(virQEMUDriverPtr driver,
> char *charAlias = NULL;
> char *memAlias = NULL;
> qemuDomainObjPrivatePtr priv = vm->privateData;
> - virObjectEventPtr event = NULL;
>
> VIR_DEBUG("Removing shmem device %s from domain %p %s",
> shmem->info.alias, vm, vm->def->name);
> @@ -5071,9 +5054,6 @@ qemuDomainRemoveShmemDevice(virQEMUDriverPtr driver,
> if (rc < 0)
> goto cleanup;
>
> - event = virDomainEventDeviceRemovedNewFromObj(vm, shmem->info.alias);
> - virObjectEventStateQueue(driver->domainEventState, event);
> -
> if ((idx = virDomainShmemDefFind(vm->def, shmem)) >= 0)
> virDomainShmemDefRemove(vm->def, idx);
> qemuDomainReleaseDeviceAddress(vm, &shmem->info, NULL);
> @@ -5089,17 +5069,12 @@ qemuDomainRemoveShmemDevice(virQEMUDriverPtr driver,
>
>
> static int
> -qemuDomainRemoveWatchdog(virQEMUDriverPtr driver,
> - virDomainObjPtr vm,
> +qemuDomainRemoveWatchdog(virDomainObjPtr vm,
> virDomainWatchdogDefPtr watchdog)
> {
> - virObjectEventPtr event = NULL;
> -
> VIR_DEBUG("Removing watchdog %s from domain %p %s",
> watchdog->info.alias, vm, vm->def->name);
>
> - event = virDomainEventDeviceRemovedNewFromObj(vm, watchdog->info.alias);
> - virObjectEventStateQueue(driver->domainEventState, event);
> qemuDomainReleaseDeviceAddress(vm, &watchdog->info, NULL);
> virDomainWatchdogDefFree(vm->def->watchdog);
> vm->def->watchdog = NULL;
> @@ -5111,16 +5086,11 @@ static int
> qemuDomainRemoveInputDevice(virDomainObjPtr vm,
> virDomainInputDefPtr dev)
> {
> - qemuDomainObjPrivatePtr priv = vm->privateData;
> - virQEMUDriverPtr driver = priv->driver;
> - virObjectEventPtr event = NULL;
> size_t i;
>
> VIR_DEBUG("Removing input device %s from domain %p %s",
> dev->info.alias, vm, vm->def->name);
>
> - event = virDomainEventDeviceRemovedNewFromObj(vm, dev->info.alias);
> - virObjectEventStateQueue(driver->domainEventState, event);
> for (i = 0; i < vm->def->ninputs; i++) {
> if (vm->def->inputs[i] == dev)
> break;
> @@ -5145,15 +5115,9 @@ static int
> qemuDomainRemoveVsockDevice(virDomainObjPtr vm,
> virDomainVsockDefPtr dev)
> {
> - qemuDomainObjPrivatePtr priv = vm->privateData;
> - virQEMUDriverPtr driver = priv->driver;
> - virObjectEventPtr event = NULL;
> -
> VIR_DEBUG("Removing vsock device %s from domain %p %s",
> dev->info.alias, vm, vm->def->name);
>
> - event = virDomainEventDeviceRemovedNewFromObj(vm, dev->info.alias);
> - virObjectEventStateQueue(driver->domainEventState, event);
> qemuDomainReleaseDeviceAddress(vm, &dev->info, NULL);
> virDomainVsockDefFree(vm->def->vsock);
> vm->def->vsock = NULL;
> @@ -5167,7 +5131,6 @@ qemuDomainRemoveRedirdevDevice(virQEMUDriverPtr driver,
> virDomainRedirdevDefPtr dev)
> {
> qemuDomainObjPrivatePtr priv = vm->privateData;
> - virObjectEventPtr event;
> char *charAlias = NULL;
> ssize_t idx;
> int ret = -1;
> @@ -5192,9 +5155,6 @@ qemuDomainRemoveRedirdevDevice(virQEMUDriverPtr driver,
>
> virDomainAuditRedirdev(vm, dev, "detach", true);
>
> - event = virDomainEventDeviceRemovedNewFromObj(vm, dev->info.alias);
> - virObjectEventStateQueue(driver->domainEventState, event);
> -
> if ((idx = virDomainRedirdevDefFind(vm->def, dev)) >= 0)
> virDomainRedirdevDefRemove(vm->def, idx);
> qemuDomainReleaseDeviceAddress(vm, &dev->info, NULL);
> @@ -5285,50 +5245,88 @@ qemuDomainRemoveDevice(virQEMUDriverPtr driver,
> virDomainObjPtr vm,
> virDomainDeviceDefPtr dev)
> {
> - int ret = -1;
> + virDomainDeviceInfoPtr info;
> + virObjectEventPtr event;
> + VIR_AUTOFREE(char *)alias = NULL;
> +
> + if (!(info = virDomainDeviceGetInfo(dev))) {
> + /*
> + * This should never happen, since all of the device types in
> + * the switch cases that end with a "break" instead of a
> + * return have a virDeviceInfo in them.
Well, but it's called prior to doing a return down below. The deduction
from the sentence is that those that use 'return' don't necessarily have
it and thus wouldn't ever be reached.
> + */
> + virReportError(VIR_ERR_INTERNAL_ERROR,
> + _("device of type '%s' has no device
info"),
> + virDomainDeviceTypeToString(dev->type));
> + return -1;
I think you can change this to a best-effort scenario. Copy the alias
only if info is non-null and emit the event in the same case.
> + }
> +
> + /*
> + * save the alias to use when sending a DEVICE_REMOVED event after
> + * all other teardown is complete
> + */
> + if (VIR_STRDUP(alias, info->alias) < 0)
> + return -1;
Also at this point 'info' should be set to NULL as any call to the
Remove function frees the definition thus info would be dangling.
Although it's not *currently* used after this point anyway, I agree that
is the prudent thing to do to prevent some future maintainer from
assuming they can use it.
> +
> switch ((virDomainDeviceType)dev->type) {
> + case VIR_DOMAIN_DEVICE_CHR:
> + /* unlike other qemuDomainRemove*Device() functions, this one
> + * can't take advantage of any common code at the end of
> + * qemuDomainRemoveDevice(). This is because it is called
> + * directly from other places (with an extra arg that would be
> + * clumsy to add into qemuDomainRemoveDevice())
The last sentence is not necessary.
Yeah, sometimes my comments are useful when making the change, but
pointless once it is made. I'll remove it.
> + */
> + return qemuDomainRemoveChrDevice(driver, vm, dev->data.chr, true);
> +
> + /*
> + * all of the following qemuDomainRemove*Device() functions
> + * are (and must be) only called from this function, so any
> + * code that is common to them all can be pulled out and put
> + * at the end of this function.
Or before if it's the same prologue code.
Good point. I'll change "at the end of" to "in".
> + */
> case VIR_DOMAIN_DEVICE_DISK:
> - ret = qemuDomainRemoveDiskDevice(driver, vm, dev->data.disk);
> + if (qemuDomainRemoveDiskDevice(driver, vm, dev->data.disk) < 0)
> + return -1;
> break;
> case VIR_DOMAIN_DEVICE_CONTROLLER:
> - ret = qemuDomainRemoveControllerDevice(driver, vm,
dev->data.controller);
> + if (qemuDomainRemoveControllerDevice(vm, dev->data.controller) < 0)
> + return -1;
> break;
> case VIR_DOMAIN_DEVICE_NET:
> - ret = qemuDomainRemoveNetDevice(driver, vm,
dev->data.net);
> + if (qemuDomainRemoveNetDevice(driver, vm,
dev->data.net) < 0)
> + return -1;
> break;
> case VIR_DOMAIN_DEVICE_HOSTDEV:
> - ret = qemuDomainRemoveHostDevice(driver, vm, dev->data.hostdev);
> - break;
> -
> - case VIR_DOMAIN_DEVICE_CHR:
> - ret = qemuDomainRemoveChrDevice(driver, vm, dev->data.chr, true);
> + if (qemuDomainRemoveHostDevice(driver, vm, dev->data.hostdev) < 0)
> + return -1;
> break;
> case VIR_DOMAIN_DEVICE_RNG:
> - ret = qemuDomainRemoveRNGDevice(driver, vm, dev->data.rng);
> + if (qemuDomainRemoveRNGDevice(driver, vm, dev->data.rng) < 0)
> + return -1;
> break;
> -
> case VIR_DOMAIN_DEVICE_MEMORY:
> - ret = qemuDomainRemoveMemoryDevice(driver, vm, dev->data.memory);
> + if (qemuDomainRemoveMemoryDevice(driver, vm, dev->data.memory) < 0)
> + return -1;
> break;
> -
> case VIR_DOMAIN_DEVICE_SHMEM:
> - ret = qemuDomainRemoveShmemDevice(driver, vm, dev->data.shmem);
> + if (qemuDomainRemoveShmemDevice(driver, vm, dev->data.shmem) < 0)
> + return -1;
> break;
> -
> case VIR_DOMAIN_DEVICE_INPUT:
> - ret = qemuDomainRemoveInputDevice(vm, dev->data.input);
> + if (qemuDomainRemoveInputDevice(vm, dev->data.input) < 0)
> + return -1;
> break;
> -
> case VIR_DOMAIN_DEVICE_REDIRDEV:
> - ret = qemuDomainRemoveRedirdevDevice(driver, vm, dev->data.redirdev);
> + if (qemuDomainRemoveRedirdevDevice(driver, vm, dev->data.redirdev) <
0)
> + return -1;
> break;
> -
> case VIR_DOMAIN_DEVICE_WATCHDOG:
> - ret = qemuDomainRemoveWatchdog(driver, vm, dev->data.watchdog);
> + if (qemuDomainRemoveWatchdog(vm, dev->data.watchdog) < 0)
> + return -1;
> break;
> -
> case VIR_DOMAIN_DEVICE_VSOCK:
> - ret = qemuDomainRemoveVsockDevice(vm, dev->data.vsock);
> + if (qemuDomainRemoveVsockDevice(vm, dev->data.vsock) < 0)
> + return -1;
> break;
>
> case VIR_DOMAIN_DEVICE_NONE:
> @@ -5350,7 +5348,11 @@ qemuDomainRemoveDevice(virQEMUDriverPtr driver,
> virDomainDeviceTypeToString(dev->type));
> break;
> }
> - return ret;
> +
> + event = virDomainEventDeviceRemovedNewFromObj(vm, alias);
> + virObjectEventStateQueue(driver->domainEventState, event);
Btw, this is probably a regression since we fixed locking of the
'driver' object. Until then the event would stay queued until the end of
the API.
It took me a minute to parse that. So what you mean is that the "event
is dispatched to the application too early" bug was a regression
resulting from a fix made to driver locking, right? That makes a lot of
sense.
ACK when the alias is copied best-effort rather than reporting error and
info is set to NULL appropriately.