[libvirt] [PATCH v3 0/5] qemu_hotplug: Fix a rare race condition when detaching a device twice

v3 of: https://www.redhat.com/archives/libvir-list/2019-March/msg00924.html diff to v2: - Dropped @enterMonitor from qemuDomainDeleteDevice, - Simplified event checking since @vm is always locked after @enterMonitor is dropped, - Switched to plain qemuMonitorDelDevice for zpci device - Added comment about error reporting to 3/5 Note that 3/5 was ACKed in v2. Michal Prívozník (5): qemu_hotplug: Introduce and use qemuDomainDeleteDevice DO NOT APPLY: Simple reproducer qemuMonitorJSONDelDevice: Return -2 on DeviceNotFound error qemu_hotplug: Fix a rare race condition when detaching a device twice DO NOT APPLY: Revert simple reproducer src/qemu/qemu_domain.h | 1 + src/qemu/qemu_hotplug.c | 278 +++++++++++++++-------------------- src/qemu/qemu_monitor.c | 10 ++ src/qemu/qemu_monitor_json.c | 5 + 4 files changed, 134 insertions(+), 160 deletions(-) -- 2.19.2

The aim of this function will be to fix return value of qemuMonitorDelDevice() in one specific case. But that is yet to come. Right now this is nothing but a plain substitution. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_hotplug.c | 250 +++++++++++++++------------------------- 1 file changed, 90 insertions(+), 160 deletions(-) diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index f43f80668c..0a3ee2628c 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -67,6 +67,45 @@ VIR_LOG_INIT("qemu.qemu_hotplug"); unsigned long long qemuDomainRemoveDeviceWaitTime = 1000ull * 5; +/** + * qemuDomainDeleteDevice: + * @vm: domain object + * @alias: device to remove + * + * This is a wrapper over qemuMonitorDelDevice() plus enter/exit + * monitor calls. This function MUST be used instead of plain + * qemuMonitorDelDevice() in all places where @alias represents a + * device from domain XML, i.e. caller marks the device for + * removal and then calls qemuDomainWaitForDeviceRemoval() + * followed by qemuDomainRemove*Device(). + * + * For collateral devices (e.g. extension devices like zPCI) it + * is safe to use plain qemuMonitorDelDevice(). + * + * Upon entry, @vm must be locked. + * + * Returns: 0 on success, + * -1 otherwise. + */ +static int +qemuDomainDeleteDevice(virDomainObjPtr vm, + const char *alias) +{ + qemuDomainObjPrivatePtr priv = vm->privateData; + virQEMUDriverPtr driver = priv->driver; + int rc; + + qemuDomainObjEnterMonitor(driver, vm); + + rc = qemuMonitorDelDevice(priv->mon, alias); + + if (qemuDomainObjExitMonitor(driver, vm) < 0) + rc = -1; + + return rc; +} + + /** * qemuHotplugPrepareDiskSourceAccess: * @driver: qemu driver struct @@ -5216,7 +5255,6 @@ qemuDomainDetachVirtioDiskDevice(virQEMUDriverPtr driver, bool async) { int ret = -1; - qemuDomainObjPrivatePtr priv = vm->privateData; if (qemuIsMultiFunctionDevice(vm->def, &detach->info)) { virReportError(VIR_ERR_OPERATION_FAILED, @@ -5228,15 +5266,11 @@ qemuDomainDetachVirtioDiskDevice(virQEMUDriverPtr driver, if (!async) qemuDomainMarkDeviceForRemoval(vm, &detach->info); - qemuDomainObjEnterMonitor(driver, vm); - if (qemuMonitorDelDevice(priv->mon, detach->info.alias) < 0) { - if (qemuDomainObjExitMonitor(driver, vm) < 0) - goto cleanup; - virDomainAuditDisk(vm, detach->src, NULL, "detach", false); + if (qemuDomainDeleteDevice(vm, detach->info.alias) < 0) { + if (virDomainObjIsActive(vm)) + virDomainAuditDisk(vm, detach->src, NULL, "detach", false); goto cleanup; } - if (qemuDomainObjExitMonitor(driver, vm) < 0) - goto cleanup; if (async) { ret = 0; @@ -5258,7 +5292,6 @@ qemuDomainDetachDiskDevice(virQEMUDriverPtr driver, bool async) { int ret = -1; - qemuDomainObjPrivatePtr priv = vm->privateData; if (qemuDomainDiskBlockJobIsActive(detach)) goto cleanup; @@ -5266,15 +5299,11 @@ qemuDomainDetachDiskDevice(virQEMUDriverPtr driver, if (!async) qemuDomainMarkDeviceForRemoval(vm, &detach->info); - qemuDomainObjEnterMonitor(driver, vm); - if (qemuMonitorDelDevice(priv->mon, detach->info.alias) < 0) { - if (qemuDomainObjExitMonitor(driver, vm) < 0) - goto cleanup; - virDomainAuditDisk(vm, detach->src, NULL, "detach", false); + if (qemuDomainDeleteDevice(vm, detach->info.alias) < 0) { + if (virDomainObjIsActive(vm)) + virDomainAuditDisk(vm, detach->src, NULL, "detach", false); goto cleanup; } - if (qemuDomainObjExitMonitor(driver, vm) < 0) - goto cleanup; if (async) { ret = 0; @@ -5455,19 +5484,18 @@ int qemuDomainDetachControllerDevice(virQEMUDriverPtr driver, if (!async) qemuDomainMarkDeviceForRemoval(vm, &detach->info); - qemuDomainObjEnterMonitor(driver, vm); - if (detach->type == VIR_DOMAIN_CONTROLLER_TYPE_PCI && - qemuDomainDetachExtensionDevice(priv->mon, &detach->info) < 0) { - goto exit_monitor; - } + if (detach->type == VIR_DOMAIN_CONTROLLER_TYPE_PCI) { + int rc; + qemuDomainObjEnterMonitor(driver, vm); + rc = qemuDomainDetachExtensionDevice(priv->mon, &detach->info); + if (qemuDomainObjExitMonitor(driver, vm) < 0) + rc = -1; - if (qemuMonitorDelDevice(priv->mon, detach->info.alias) < 0) { - ignore_value(qemuDomainObjExitMonitor(driver, vm)); - goto cleanup; + if (rc < 0) + goto cleanup; } - exit_monitor: - if (qemuDomainObjExitMonitor(driver, vm) < 0) + if (qemuDomainDeleteDevice(vm, detach->info.alias) < 0) goto cleanup; if (async) { @@ -5484,14 +5512,11 @@ int qemuDomainDetachControllerDevice(virQEMUDriverPtr driver, } static int -qemuDomainDetachHostPCIDevice(virQEMUDriverPtr driver, - virDomainObjPtr vm, +qemuDomainDetachHostPCIDevice(virDomainObjPtr vm, virDomainHostdevDefPtr detach, bool async) { - qemuDomainObjPrivatePtr priv = vm->privateData; virDomainHostdevSubsysPCIPtr pcisrc = &detach->source.subsys.u.pci; - int ret; if (qemuIsMultiFunctionDevice(vm->def, detach->info)) { virReportError(VIR_ERR_OPERATION_FAILED, @@ -5504,23 +5529,14 @@ qemuDomainDetachHostPCIDevice(virQEMUDriverPtr driver, if (!async) qemuDomainMarkDeviceForRemoval(vm, detach->info); - qemuDomainObjEnterMonitor(driver, vm); - ret = qemuMonitorDelDevice(priv->mon, detach->info->alias); - if (qemuDomainObjExitMonitor(driver, vm) < 0) - ret = -1; - - return ret; + return qemuDomainDeleteDevice(vm, detach->info->alias); } static int -qemuDomainDetachHostUSBDevice(virQEMUDriverPtr driver, - virDomainObjPtr vm, +qemuDomainDetachHostUSBDevice(virDomainObjPtr vm, virDomainHostdevDefPtr detach, bool async) { - qemuDomainObjPrivatePtr priv = vm->privateData; - int ret; - if (!detach->info->alias) { virReportError(VIR_ERR_OPERATION_FAILED, "%s", _("device cannot be detached without a device alias")); @@ -5530,23 +5546,14 @@ qemuDomainDetachHostUSBDevice(virQEMUDriverPtr driver, if (!async) qemuDomainMarkDeviceForRemoval(vm, detach->info); - qemuDomainObjEnterMonitor(driver, vm); - ret = qemuMonitorDelDevice(priv->mon, detach->info->alias); - if (qemuDomainObjExitMonitor(driver, vm) < 0) - ret = -1; - - return ret; + return qemuDomainDeleteDevice(vm, detach->info->alias); } static int -qemuDomainDetachHostSCSIDevice(virQEMUDriverPtr driver, - virDomainObjPtr vm, +qemuDomainDetachHostSCSIDevice(virDomainObjPtr vm, virDomainHostdevDefPtr detach, bool async) { - qemuDomainObjPrivatePtr priv = vm->privateData; - int ret = -1; - if (!detach->info->alias) { virReportError(VIR_ERR_OPERATION_FAILED, "%s", _("device cannot be detached without a device alias")); @@ -5556,24 +5563,14 @@ qemuDomainDetachHostSCSIDevice(virQEMUDriverPtr driver, if (!async) qemuDomainMarkDeviceForRemoval(vm, detach->info); - qemuDomainObjEnterMonitor(driver, vm); - ret = qemuMonitorDelDevice(priv->mon, detach->info->alias); - - if (qemuDomainObjExitMonitor(driver, vm) < 0) - return -1; - - return ret; + return qemuDomainDeleteDevice(vm, detach->info->alias); } static int -qemuDomainDetachSCSIVHostDevice(virQEMUDriverPtr driver, - virDomainObjPtr vm, +qemuDomainDetachSCSIVHostDevice(virDomainObjPtr vm, virDomainHostdevDefPtr detach, bool async) { - qemuDomainObjPrivatePtr priv = vm->privateData; - int ret = -1; - if (!detach->info->alias) { virReportError(VIR_ERR_OPERATION_FAILED, "%s", _("device cannot be detached without a device alias")); @@ -5583,25 +5580,15 @@ qemuDomainDetachSCSIVHostDevice(virQEMUDriverPtr driver, if (!async) qemuDomainMarkDeviceForRemoval(vm, detach->info); - qemuDomainObjEnterMonitor(driver, vm); - ret = qemuMonitorDelDevice(priv->mon, detach->info->alias); - - if (qemuDomainObjExitMonitor(driver, vm) < 0) - return -1; - - return ret; + return qemuDomainDeleteDevice(vm, detach->info->alias); } static int -qemuDomainDetachMediatedDevice(virQEMUDriverPtr driver, - virDomainObjPtr vm, +qemuDomainDetachMediatedDevice(virDomainObjPtr vm, virDomainHostdevDefPtr detach, bool async) { - int ret = -1; - qemuDomainObjPrivatePtr priv = vm->privateData; - if (!detach->info->alias) { virReportError(VIR_ERR_OPERATION_FAILED, "%s", _("device cannot be detached without a device alias")); @@ -5611,12 +5598,7 @@ qemuDomainDetachMediatedDevice(virQEMUDriverPtr driver, if (!async) qemuDomainMarkDeviceForRemoval(vm, detach->info); - qemuDomainObjEnterMonitor(driver, vm); - ret = qemuMonitorDelDevice(priv->mon, detach->info->alias); - if (qemuDomainObjExitMonitor(driver, vm) < 0) - ret = -1; - - return ret; + return qemuDomainDeleteDevice(vm, detach->info->alias); } @@ -5633,19 +5615,19 @@ qemuDomainDetachThisHostDevice(virQEMUDriverPtr driver, switch (detach->source.subsys.type) { case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI: - ret = qemuDomainDetachHostPCIDevice(driver, vm, detach, async); + ret = qemuDomainDetachHostPCIDevice(vm, detach, async); break; case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_USB: - ret = qemuDomainDetachHostUSBDevice(driver, vm, detach, async); + ret = qemuDomainDetachHostUSBDevice(vm, detach, async); break; case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_SCSI: - ret = qemuDomainDetachHostSCSIDevice(driver, vm, detach, async); + ret = qemuDomainDetachHostSCSIDevice(vm, detach, async); break; case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_SCSI_HOST: - ret = qemuDomainDetachSCSIVHostDevice(driver, vm, detach, async); + ret = qemuDomainDetachSCSIVHostDevice(vm, detach, async); break; case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_MDEV: - ret = qemuDomainDetachMediatedDevice(driver, vm, detach, async); + ret = qemuDomainDetachMediatedDevice(vm, detach, async); break; default: virReportError(VIR_ERR_CONFIG_UNSUPPORTED, @@ -5762,7 +5744,6 @@ qemuDomainDetachShmemDevice(virQEMUDriverPtr driver, int ret = -1; ssize_t idx = -1; virDomainShmemDefPtr shmem = NULL; - qemuDomainObjPrivatePtr priv = vm->privateData; if ((idx = virDomainShmemDefFind(vm->def, dev)) < 0) { virReportError(VIR_ERR_DEVICE_MISSING, @@ -5791,12 +5772,7 @@ qemuDomainDetachShmemDevice(virQEMUDriverPtr driver, if (!async) qemuDomainMarkDeviceForRemoval(vm, &shmem->info); - qemuDomainObjEnterMonitor(driver, vm); - if (qemuMonitorDelDevice(priv->mon, shmem->info.alias) < 0) { - ignore_value(qemuDomainObjExitMonitor(driver, vm)); - goto cleanup; - } - if (qemuDomainObjExitMonitor(driver, vm) < 0) + if (qemuDomainDeleteDevice(vm, shmem->info.alias) < 0) goto cleanup; if (async) { @@ -5821,7 +5797,6 @@ qemuDomainDetachWatchdog(virQEMUDriverPtr driver, { int ret = -1; virDomainWatchdogDefPtr watchdog = vm->def->watchdog; - qemuDomainObjPrivatePtr priv = vm->privateData; if (!watchdog) { virReportError(VIR_ERR_DEVICE_MISSING, "%s", @@ -5852,12 +5827,7 @@ qemuDomainDetachWatchdog(virQEMUDriverPtr driver, if (!async) qemuDomainMarkDeviceForRemoval(vm, &watchdog->info); - qemuDomainObjEnterMonitor(driver, vm); - if (qemuMonitorDelDevice(priv->mon, watchdog->info.alias) < 0) { - ignore_value(qemuDomainObjExitMonitor(driver, vm)); - goto cleanup; - } - if (qemuDomainObjExitMonitor(driver, vm) < 0) + if (qemuDomainDeleteDevice(vm, watchdog->info.alias) < 0) goto cleanup; if (async) { @@ -5881,7 +5851,6 @@ qemuDomainDetachRedirdevDevice(virQEMUDriverPtr driver, bool async) { int ret = -1; - qemuDomainObjPrivatePtr priv = vm->privateData; virDomainRedirdevDefPtr tmpRedirdevDef; ssize_t idx; @@ -5902,12 +5871,7 @@ qemuDomainDetachRedirdevDevice(virQEMUDriverPtr driver, if (!async) qemuDomainMarkDeviceForRemoval(vm, &tmpRedirdevDef->info); - qemuDomainObjEnterMonitor(driver, vm); - if (qemuMonitorDelDevice(priv->mon, tmpRedirdevDef->info.alias) < 0) { - ignore_value(qemuDomainObjExitMonitor(driver, vm)); - goto cleanup; - } - if (qemuDomainObjExitMonitor(driver, vm) < 0) + if (qemuDomainDeleteDevice(vm, tmpRedirdevDef->info.alias) < 0) goto cleanup; if (async) { @@ -5932,7 +5896,6 @@ qemuDomainDetachNetDevice(virQEMUDriverPtr driver, { int detachidx, ret = -1; virDomainNetDefPtr detach = NULL; - qemuDomainObjPrivatePtr priv = vm->privateData; if ((detachidx = virDomainNetFindIdx(vm->def, dev->data.net)) < 0) goto cleanup; @@ -5973,15 +5936,11 @@ qemuDomainDetachNetDevice(virQEMUDriverPtr driver, if (!async) qemuDomainMarkDeviceForRemoval(vm, &detach->info); - qemuDomainObjEnterMonitor(driver, vm); - if (qemuMonitorDelDevice(priv->mon, detach->info.alias) < 0) { - if (qemuDomainObjExitMonitor(driver, vm) < 0) - goto cleanup; - virDomainAuditNet(vm, detach, NULL, "detach", false); + if (qemuDomainDeleteDevice(vm, detach->info.alias) < 0) { + if (virDomainObjIsActive(vm)) + virDomainAuditNet(vm, detach, NULL, "detach", false); goto cleanup; } - if (qemuDomainObjExitMonitor(driver, vm) < 0) - goto cleanup; if (async) { ret = 0; @@ -6144,20 +6103,19 @@ int qemuDomainDetachChrDevice(virQEMUDriverPtr driver, if (!async && !guestfwd) qemuDomainMarkDeviceForRemoval(vm, &tmpChr->info); - qemuDomainObjEnterMonitor(driver, vm); if (guestfwd) { - if (qemuMonitorRemoveNetdev(priv->mon, tmpChr->info.alias) < 0) { - ignore_value(qemuDomainObjExitMonitor(driver, vm)); + int rc; + qemuDomainObjEnterMonitor(driver, vm); + rc = qemuMonitorRemoveNetdev(priv->mon, tmpChr->info.alias); + if (qemuDomainObjExitMonitor(driver, vm) < 0) + rc = -1; + + if (rc < 0) goto cleanup; - } } else { - if (qemuMonitorDelDevice(priv->mon, tmpChr->info.alias) < 0) { - ignore_value(qemuDomainObjExitMonitor(driver, vm)); + if (qemuDomainDeleteDevice(vm, tmpChr->info.alias) < 0) goto cleanup; - } } - if (qemuDomainObjExitMonitor(driver, vm) < 0) - goto cleanup; if (guestfwd) { ret = qemuDomainRemoveChrDevice(driver, vm, tmpChr, false); @@ -6181,10 +6139,8 @@ qemuDomainDetachRNGDevice(virQEMUDriverPtr driver, virDomainRNGDefPtr rng, bool async) { - qemuDomainObjPrivatePtr priv = vm->privateData; ssize_t idx; virDomainRNGDefPtr tmpRNG; - int rc; int ret = -1; if ((idx = virDomainRNGFind(vm->def, rng)) < 0) { @@ -6206,9 +6162,7 @@ qemuDomainDetachRNGDevice(virQEMUDriverPtr driver, if (!async) qemuDomainMarkDeviceForRemoval(vm, &tmpRNG->info); - qemuDomainObjEnterMonitor(driver, vm); - rc = qemuMonitorDelDevice(priv->mon, tmpRNG->info.alias); - if (qemuDomainObjExitMonitor(driver, vm) || rc < 0) + if (qemuDomainDeleteDevice(vm, tmpRNG->info.alias) < 0) goto cleanup; if (async) { @@ -6231,10 +6185,8 @@ qemuDomainDetachMemoryDevice(virQEMUDriverPtr driver, virDomainMemoryDefPtr memdef, bool async) { - qemuDomainObjPrivatePtr priv = vm->privateData; virDomainMemoryDefPtr mem; int idx; - int rc; int ret = -1; qemuDomainMemoryDeviceAlignSize(vm->def, memdef); @@ -6258,9 +6210,7 @@ qemuDomainDetachMemoryDevice(virQEMUDriverPtr driver, if (!async) qemuDomainMarkDeviceForRemoval(vm, &mem->info); - qemuDomainObjEnterMonitor(driver, vm); - rc = qemuMonitorDelDevice(priv->mon, mem->info.alias); - if (qemuDomainObjExitMonitor(driver, vm) < 0 || rc < 0) + if (qemuDomainDeleteDevice(vm, mem->info.alias) < 0) goto cleanup; if (async) { @@ -6365,15 +6315,9 @@ qemuDomainHotplugDelVcpu(virQEMUDriverPtr driver, qemuDomainMarkDeviceAliasForRemoval(vm, vcpupriv->alias); - qemuDomainObjEnterMonitor(driver, vm); - - rc = qemuMonitorDelDevice(qemuDomainGetMonitor(vm), vcpupriv->alias); - - if (qemuDomainObjExitMonitor(driver, vm) < 0) - goto cleanup; - - if (rc < 0) { - virDomainAuditVcpu(vm, oldvcpus, oldvcpus - nvcpus, "update", false); + if (qemuDomainDeleteDevice(vm, vcpupriv->alias) < 0) { + if (virDomainObjIsActive(vm)) + virDomainAuditVcpu(vm, oldvcpus, oldvcpus - nvcpus, "update", false); goto cleanup; } @@ -6943,8 +6887,6 @@ qemuDomainDetachInputDevice(virDomainObjPtr vm, virDomainInputDefPtr def, bool async) { - qemuDomainObjPrivatePtr priv = vm->privateData; - virQEMUDriverPtr driver = priv->driver; virDomainInputDefPtr input; int ret = -1; int idx; @@ -6974,12 +6916,7 @@ qemuDomainDetachInputDevice(virDomainObjPtr vm, if (!async) qemuDomainMarkDeviceForRemoval(vm, &input->info); - qemuDomainObjEnterMonitor(driver, vm); - if (qemuMonitorDelDevice(priv->mon, input->info.alias) < 0) { - ignore_value(qemuDomainObjExitMonitor(driver, vm)); - goto cleanup; - } - if (qemuDomainObjExitMonitor(driver, vm) < 0) + if (qemuDomainDeleteDevice(vm, input->info.alias) < 0) goto cleanup; if (async) { @@ -7001,8 +6938,6 @@ qemuDomainDetachVsockDevice(virDomainObjPtr vm, virDomainVsockDefPtr dev, bool async) { - qemuDomainObjPrivatePtr priv = vm->privateData; - virQEMUDriverPtr driver = priv->driver; virDomainVsockDefPtr vsock = vm->def->vsock; int ret = -1; @@ -7017,12 +6952,7 @@ qemuDomainDetachVsockDevice(virDomainObjPtr vm, if (!async) qemuDomainMarkDeviceForRemoval(vm, &vsock->info); - qemuDomainObjEnterMonitor(driver, vm); - if (qemuMonitorDelDevice(priv->mon, vsock->info.alias) < 0) { - ignore_value(qemuDomainObjExitMonitor(driver, vm)); - goto cleanup; - } - if (qemuDomainObjExitMonitor(driver, vm) < 0) + if (qemuDomainDeleteDevice(vm, vsock->info.alias) < 0) goto cleanup; if (async) { -- 2.19.2

On Fri, Mar 15, 2019 at 11:23:51 +0100, Michal Privoznik wrote:
The aim of this function will be to fix return value of qemuMonitorDelDevice() in one specific case. But that is yet to come. Right now this is nothing but a plain substitution.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_hotplug.c | 250 +++++++++++++++------------------------- 1 file changed, 90 insertions(+), 160 deletions(-)
ACK

https://bugzilla.redhat.com/show_bug.cgi?id=1623389 Steps to reproduce: 1) cat shmem.xml <shmem name='my_shmem0'> <model type='ivshmem-plain'/> <size unit='M'>4</size> <alias name='ua-123'/> </shmem> 2) virsh attach-device vm1 shmem.xml 3) virsh detach-device-alias vm1 ua-123; virsh detach-device vm1 shmem.xml 4) observe that the device is still in the domain: virsh dumpxml vm1 <shmem name='my_shmem0'> <model type='ivshmem-plain'/> <size unit='M'>4</size> <alias name='ua-123'/> <address type='pci' domain='0x0000' bus='0x00' slot='0x0a' function='0x0'/> </shmem> 5) But qemu has the device no more: virsh detach-device-alias vm1 ua-123 error: Failed to detach device with alias ua-123 error: internal error: unable to execute QEMU command 'device_del': Device 'ua-123' not found This reproducer is to make sure that DELETE_DEVICE event arrives while monitor is unlocked. It is very hard to time qemu and libvirt so that the event comes exactly at the time when detach-device from 3) is doing the monitor call. Simulate this by unlocking monitor, waiting a few seconds and locking the monitor again. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_hotplug.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index 0a3ee2628c..c4a0971a65 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -98,6 +98,11 @@ qemuDomainDeleteDevice(virDomainObjPtr vm, qemuDomainObjEnterMonitor(driver, vm); rc = qemuMonitorDelDevice(priv->mon, alias); + if (rc < 0) { + virObjectUnlock(priv->mon); + sleep(10); + virObjectLock(priv->mon); + } if (qemuDomainObjExitMonitor(driver, vm) < 0) rc = -1; -- 2.19.2

A caller might be interested in differentiating the cause for error, especially if DeviceNotFound error occurred. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> ACKed-by: Peter Krempa <pkrempa@redhat.com> --- src/qemu/qemu_monitor.c | 10 ++++++++++ src/qemu/qemu_monitor_json.c | 5 +++++ 2 files changed, 15 insertions(+) diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c index 8bd4d4d761..0eb7f60e38 100644 --- a/src/qemu/qemu_monitor.c +++ b/src/qemu/qemu_monitor.c @@ -3008,6 +3008,16 @@ qemuMonitorDriveDel(qemuMonitorPtr mon, } +/** + * @mon: monitor object + * @devalias: alias of the device to detach + * + * Sends device detach request to qemu. + * + * Returns: 0 on success, + * -2 if DeviceNotFound error encountered (error NOT reported) + * -1 otherwise (error reported) + */ int qemuMonitorDelDevice(qemuMonitorPtr mon, const char *devalias) diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c index 0236323a51..5c16e1f3a1 100644 --- a/src/qemu/qemu_monitor_json.c +++ b/src/qemu/qemu_monitor_json.c @@ -4191,6 +4191,11 @@ int qemuMonitorJSONDelDevice(qemuMonitorPtr mon, if (qemuMonitorJSONCommand(mon, cmd, &reply) < 0) goto cleanup; + if (qemuMonitorJSONHasError(reply, "DeviceNotFound")) { + ret = -2; + goto cleanup; + } + if (qemuMonitorJSONCheckError(cmd, reply) < 0) goto cleanup; -- 2.19.2

https://bugzilla.redhat.com/show_bug.cgi?id=1623389 If a device is detached twice from the same domain the following race condition may happen: 1) The first DetachDevice() call will issue "device_del" on qemu monitor, but since the DEVICE_DELETED event did not arrive in time, the API ends claiming "Device detach request sent successfully". 2) The second DetachDevice() therefore still find the device in the domain and thus proceeds to detaching it again. It calls EnterMonitor() and qemuMonitorSend() trying to issue "device_del" command again. This gets both domain lock and monitor lock released. 3) At this point, qemu sends us the DEVICE_DELETED event which is going to be handled by the event loop which ends up calling qemuDomainSignalDeviceRemoval() to determine who is going to remove the device from domain definition. Whether it is the caller that marked the device for removal or whether it is going to be the event processing thread. 4) Because the device was marked for removal, qemuDomainSignalDeviceRemoval() returns true, which means the event is to be processed by the thread that has marked the device for removal (and is currently still trying to issue "device_del" command) 5) The thread finally issues the "device_del" command, which fails (obviously) and therefore it calls qemuDomainResetDeviceRemoval() to reset the device marking and quits immediately after, NOT removing any device from the domain definition. At this point, the device is still present in the domain definition but doesn't exist in qemu anymore. Worse, there is no way to remove it from the domain definition. Solution is to note down that we've seen the event and if the second "device_del" fails, not take it as a failure but carry on with the usual execution. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_domain.h | 1 + src/qemu/qemu_hotplug.c | 32 ++++++++++++++++++++++++++++++-- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 9f468e5661..fb361515ba 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -218,6 +218,7 @@ typedef qemuDomainUnpluggingDevice *qemuDomainUnpluggingDevicePtr; struct _qemuDomainUnpluggingDevice { const char *alias; qemuDomainUnpluggingDeviceStatus status; + bool eventSeen; /* True if DEVICE_DELETED event arrived. */ }; diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index c4a0971a65..99abf051bd 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -67,6 +67,9 @@ VIR_LOG_INIT("qemu.qemu_hotplug"); unsigned long long qemuDomainRemoveDeviceWaitTime = 1000ull * 5; +static void +qemuDomainResetDeviceRemoval(virDomainObjPtr vm); + /** * qemuDomainDeleteDevice: * @vm: domain object @@ -104,8 +107,31 @@ qemuDomainDeleteDevice(virDomainObjPtr vm, virObjectLock(priv->mon); } - if (qemuDomainObjExitMonitor(driver, vm) < 0) - rc = -1; + if (qemuDomainObjExitMonitor(driver, vm) < 0) { + /* Domain is no longer running. No cleanup needed. */ + return -1; + } + + if (rc < 0) { + /* Deleting device failed. Let's check if DEVICE_DELETED + * even arrived. If it did, we need to claim success to + * make the caller remove device from domain XML. */ + + if (priv->unplug.eventSeen) { + /* The event arrived. Return success. */ + VIR_DEBUG("Detaching of device %s failed, but event arrived", alias); + qemuDomainResetDeviceRemoval(vm); + rc = 0; + } else if (rc == -2) { + /* The device does not exist in qemu, but it still + * exists in libvirt. Claim success to make caller + * qemuDomainWaitForDeviceRemoval(). Otherwise if + * domain XML is queried right after detach API the + * device would still be there. */ + VIR_DEBUG("Detaching of device %s failed and no event arrived", alias); + rc = 0; + } + } return rc; } @@ -5187,6 +5213,7 @@ qemuDomainResetDeviceRemoval(virDomainObjPtr vm) { qemuDomainObjPrivatePtr priv = vm->privateData; priv->unplug.alias = NULL; + priv->unplug.eventSeen = false; } /* Returns: @@ -5246,6 +5273,7 @@ qemuDomainSignalDeviceRemoval(virDomainObjPtr vm, VIR_DEBUG("Removal of device '%s' continues in waiting thread", devAlias); qemuDomainResetDeviceRemoval(vm); priv->unplug.status = status; + priv->unplug.eventSeen = true; virDomainObjBroadcast(vm); return true; } -- 2.19.2

On Fri, Mar 15, 2019 at 11:23:54 +0100, Michal Privoznik wrote:
https://bugzilla.redhat.com/show_bug.cgi?id=1623389
If a device is detached twice from the same domain the following race condition may happen:
1) The first DetachDevice() call will issue "device_del" on qemu monitor, but since the DEVICE_DELETED event did not arrive in time, the API ends claiming "Device detach request sent successfully".
2) The second DetachDevice() therefore still find the device in the domain and thus proceeds to detaching it again. It calls EnterMonitor() and qemuMonitorSend() trying to issue "device_del" command again. This gets both domain lock and monitor lock released.
3) At this point, qemu sends us the DEVICE_DELETED event which is going to be handled by the event loop which ends up calling qemuDomainSignalDeviceRemoval() to determine who is going to remove the device from domain definition. Whether it is the caller that marked the device for removal or whether it is going to be the event processing thread.
4) Because the device was marked for removal, qemuDomainSignalDeviceRemoval() returns true, which means the event is to be processed by the thread that has marked the device for removal (and is currently still trying to issue "device_del" command)
5) The thread finally issues the "device_del" command, which fails (obviously) and therefore it calls qemuDomainResetDeviceRemoval() to reset the device marking and quits immediately after, NOT removing any device from the domain definition.
At this point, the device is still present in the domain definition but doesn't exist in qemu anymore. Worse, there is no way to remove it from the domain definition.
Solution is to note down that we've seen the event and if the second "device_del" fails, not take it as a failure but carry on with the usual execution.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_domain.h | 1 + src/qemu/qemu_hotplug.c | 32 ++++++++++++++++++++++++++++++-- 2 files changed, 31 insertions(+), 2 deletions(-)
ACK

This reverts previous commit. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_hotplug.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index 99abf051bd..811fdf6c3a 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -101,11 +101,6 @@ qemuDomainDeleteDevice(virDomainObjPtr vm, qemuDomainObjEnterMonitor(driver, vm); rc = qemuMonitorDelDevice(priv->mon, alias); - if (rc < 0) { - virObjectUnlock(priv->mon); - sleep(10); - virObjectLock(priv->mon); - } if (qemuDomainObjExitMonitor(driver, vm) < 0) { /* Domain is no longer running. No cleanup needed. */ -- 2.19.2
participants (2)
-
Michal Privoznik
-
Peter Krempa