[libvirt] [PATCH 0/2] qemu: Process DEVICE_DELETED event in a separate thread

Jiri Denemark (2): qemu: Finish device removal in the original thread qemu: Process DEVICE_DELETED event in a separate thread src/qemu/qemu_domain.h | 2 ++ src/qemu/qemu_driver.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++-- src/qemu/qemu_hotplug.c | 44 +++++++++++++++++++++++++++++++++----------- src/qemu/qemu_hotplug.h | 2 +- src/qemu/qemu_process.c | 30 +++++++++++++++++++++--------- 5 files changed, 103 insertions(+), 23 deletions(-) -- 1.9.3

If QEMU supports DEVICE_DELETED event, we always call qemuDomainRemoveDevice from the event handler. However, we will need to push this call away from the main event loop and begin a job for it (see the following commit), we need to make sure the device if fully removed by the original thread (and within its existing job) in case the DEVICE_DELETED event arrives before qemuDomainWaitForDeviceRemoval times out. Without this patch, device removals would be guaranteed to never finish before the timeout because the could would be blocked by the original job being still active. Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- src/qemu/qemu_hotplug.c | 44 +++++++++++++++++++++++++++++++++----------- src/qemu/qemu_hotplug.h | 2 +- src/qemu/qemu_process.c | 3 ++- 3 files changed, 36 insertions(+), 13 deletions(-) diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index 76e289b..fdede5d 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -2780,8 +2780,9 @@ qemuDomainResetDeviceRemoval(virDomainObjPtr vm) /* Returns: * -1 on error * 0 when DEVICE_DELETED event is unsupported - * 1 when device removal finished - * 2 device removal did not finish in QEMU_REMOVAL_WAIT_TIME + * 1 when DEVICE_DELETED arrived before the timeout and the caller is + * responsible for finishing the removal + * 2 device removal did not finish in qemuDomainRemoveDeviceWaitTime */ static int qemuDomainWaitForDeviceRemoval(virDomainObjPtr vm) @@ -2812,7 +2813,13 @@ qemuDomainWaitForDeviceRemoval(virDomainObjPtr vm) return 1; } -void +/* Returns: + * true there was a thread waiting for devAlias to be removed and this + * thread will take care of finishing the removal + * false the thread that started the removal is already gone and delegate + * finishing the removal to a new thread + */ +bool qemuDomainSignalDeviceRemoval(virDomainObjPtr vm, const char *devAlias) { @@ -2821,7 +2828,9 @@ qemuDomainSignalDeviceRemoval(virDomainObjPtr vm, if (STREQ_NULLABLE(priv->unpluggingDevice, devAlias)) { qemuDomainResetDeviceRemoval(vm); virCondSignal(&priv->unplugFinished); + return true; } + return false; } @@ -2833,6 +2842,7 @@ qemuDomainDetachVirtioDiskDevice(virQEMUDriverPtr driver, int ret = -1; qemuDomainObjPrivatePtr priv = vm->privateData; char *drivestr = NULL; + int rc; if (qemuIsMultiFunctionDevice(vm->def, &detach->info)) { virReportError(VIR_ERR_OPERATION_FAILED, @@ -2889,7 +2899,8 @@ qemuDomainDetachVirtioDiskDevice(virQEMUDriverPtr driver, qemuDomainObjExitMonitor(driver, vm); - if (!qemuDomainWaitForDeviceRemoval(vm)) + rc = qemuDomainWaitForDeviceRemoval(vm); + if (rc == 0 || rc == 1) qemuDomainRemoveDiskDevice(driver, vm, detach); ret = 0; @@ -2907,6 +2918,7 @@ qemuDomainDetachDiskDevice(virQEMUDriverPtr driver, int ret = -1; qemuDomainObjPrivatePtr priv = vm->privateData; char *drivestr = NULL; + int rc; if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE)) { virReportError(VIR_ERR_OPERATION_FAILED, @@ -2943,7 +2955,8 @@ qemuDomainDetachDiskDevice(virQEMUDriverPtr driver, qemuDomainObjExitMonitor(driver, vm); - if (!qemuDomainWaitForDeviceRemoval(vm)) + rc = qemuDomainWaitForDeviceRemoval(vm); + if (rc == 0 || rc == 1) qemuDomainRemoveDiskDevice(driver, vm, detach); ret = 0; @@ -3063,6 +3076,7 @@ int qemuDomainDetachControllerDevice(virQEMUDriverPtr driver, int idx, ret = -1; virDomainControllerDefPtr detach = NULL; qemuDomainObjPrivatePtr priv = vm->privateData; + int rc; if ((idx = virDomainControllerFind(vm->def, dev->data.controller->type, @@ -3128,7 +3142,8 @@ int qemuDomainDetachControllerDevice(virQEMUDriverPtr driver, } qemuDomainObjExitMonitor(driver, vm); - if (!qemuDomainWaitForDeviceRemoval(vm)) + rc = qemuDomainWaitForDeviceRemoval(vm); + if (rc == 0 || rc == 1) qemuDomainRemoveControllerDevice(driver, vm, detach); ret = 0; @@ -3280,10 +3295,13 @@ qemuDomainDetachThisHostDevice(virQEMUDriverPtr driver, return -1; } - if (ret < 0) + if (ret < 0) { virDomainAuditHostdev(vm, detach, "detach", false); - else if (!qemuDomainWaitForDeviceRemoval(vm)) - qemuDomainRemoveHostDevice(driver, vm, detach); + } else { + int rc = qemuDomainWaitForDeviceRemoval(vm); + if (rc == 0 || rc == 1) + qemuDomainRemoveHostDevice(driver, vm, detach); + } qemuDomainResetDeviceRemoval(vm); @@ -3361,6 +3379,7 @@ qemuDomainDetachNetDevice(virQEMUDriverPtr driver, qemuDomainObjPrivatePtr priv = vm->privateData; int vlan; char *hostnet_name = NULL; + int rc; if ((detachidx = virDomainNetFindIdx(vm->def, dev->data.net)) < 0) goto cleanup; @@ -3440,7 +3459,8 @@ qemuDomainDetachNetDevice(virQEMUDriverPtr driver, } qemuDomainObjExitMonitor(driver, vm); - if (!qemuDomainWaitForDeviceRemoval(vm)) + rc = qemuDomainWaitForDeviceRemoval(vm); + if (rc == 0 || rc == 1) qemuDomainRemoveNetDevice(driver, vm, detach); ret = 0; @@ -3578,6 +3598,7 @@ int qemuDomainDetachChrDevice(virQEMUDriverPtr driver, virDomainChrDefPtr tmpChr; char *charAlias = NULL; char *devstr = NULL; + int rc; if (!(tmpChr = virDomainChrFind(vmdef, chr))) { virReportError(VIR_ERR_OPERATION_INVALID, "%s", @@ -3605,7 +3626,8 @@ int qemuDomainDetachChrDevice(virQEMUDriverPtr driver, } qemuDomainObjExitMonitor(driver, vm); - if (!qemuDomainWaitForDeviceRemoval(vm)) + rc = qemuDomainWaitForDeviceRemoval(vm); + if (rc == 0 || rc == 1) qemuDomainRemoveChrDevice(driver, vm, tmpChr); ret = 0; diff --git a/src/qemu/qemu_hotplug.h b/src/qemu/qemu_hotplug.h index ffc435c..9ed630a 100644 --- a/src/qemu/qemu_hotplug.h +++ b/src/qemu/qemu_hotplug.h @@ -106,7 +106,7 @@ void qemuDomainRemoveDevice(virQEMUDriverPtr driver, virDomainObjPtr vm, virDomainDeviceDefPtr dev); -void qemuDomainSignalDeviceRemoval(virDomainObjPtr vm, +bool qemuDomainSignalDeviceRemoval(virDomainObjPtr vm, const char *devAlias); #endif /* __QEMU_HOTPLUG_H__ */ diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 124fe28..9f9aa69 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -1381,7 +1381,8 @@ qemuProcessHandleDeviceDeleted(qemuMonitorPtr mon ATTRIBUTE_UNUSED, VIR_DEBUG("Device %s removed from domain %p %s", devAlias, vm, vm->def->name); - qemuDomainSignalDeviceRemoval(vm, devAlias); + if (qemuDomainSignalDeviceRemoval(vm, devAlias)) + goto cleanup; if (virDomainDefFindDevice(vm->def, devAlias, &dev, true) < 0) goto cleanup; -- 1.9.3

On 05/26/14 17:30, Jiri Denemark wrote:
If QEMU supports DEVICE_DELETED event, we always call qemuDomainRemoveDevice from the event handler. However, we will need to push this call away from the main event loop and begin a job for it (see the following commit), we need to make sure the device if fully removed by the original thread (and within its existing job) in case the DEVICE_DELETED event arrives before qemuDomainWaitForDeviceRemoval times out.
Without this patch, device removals would be guaranteed to never finish before the timeout because the could would be blocked by the original job being still active.
Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- src/qemu/qemu_hotplug.c | 44 +++++++++++++++++++++++++++++++++----------- src/qemu/qemu_hotplug.h | 2 +- src/qemu/qemu_process.c | 3 ++- 3 files changed, 36 insertions(+), 13 deletions(-)
ACK, Peter

Currently, we don not acquire any job when removing a device after DEVICE_DELETED event was received from QEMU. This means that if there is another API running at the time DEVICE_DELETED is delivered and the API acquired a job, we may happily change the definition of the domain the API is working with whenever it unlocks the domain object (e.g., to talk with its monitor). That said, we have to acquire a job before finishing device removal to make things safe. However, doing so in the main event loop would cause a deadlock so we need to move most of the event handler into a separate thread. Another good reason for both acquiring a job and handling the event in a separate thread is that we currently remove a device backend immediately after removing its frontend while we should only remove the backend once we already received DEVICE_DELETED event. That is, we will have to talk to QEMU monitor from the event handler. Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- src/qemu/qemu_domain.h | 2 ++ src/qemu/qemu_driver.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++-- src/qemu/qemu_process.c | 27 +++++++++++++++++++-------- 3 files changed, 67 insertions(+), 10 deletions(-) diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 87017e5..d7b010c 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -181,6 +181,7 @@ struct _qemuDomainObjPrivate { typedef enum { QEMU_PROCESS_EVENT_WATCHDOG = 0, QEMU_PROCESS_EVENT_GUESTPANIC, + QEMU_PROCESS_EVENT_DEVICE_DELETED, QEMU_PROCESS_EVENT_LAST } qemuProcessEventType; @@ -189,6 +190,7 @@ struct qemuProcessEvent { virDomainObjPtr vm; qemuProcessEventType eventType; int action; + void *data; }; const char *qemuDomainAsyncJobPhaseToString(enum qemuDomainAsyncJob job, diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 2b852eb..2fb0dbf 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -3955,6 +3955,47 @@ processGuestPanicEvent(virQEMUDriverPtr driver, virObjectUnref(cfg); } + +static void +processDeviceDeletedEvent(virQEMUDriverPtr driver, + virDomainObjPtr vm, + char *devAlias) +{ + virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); + virDomainDeviceDef dev; + + VIR_DEBUG("Removing device %s from domain %p %s", + devAlias, vm, vm->def->name); + + if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0) + goto cleanup; + + if (!virDomainObjIsActive(vm)) { + VIR_DEBUG("Domain is not running"); + goto endjob; + } + + if (virDomainDefFindDevice(vm->def, devAlias, &dev, true) < 0) + goto endjob; + + qemuDomainRemoveDevice(driver, vm, &dev); + + if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm) < 0) + VIR_WARN("unable to save domain status after removing device %s", + devAlias); + + endjob: + /* We had an extra reference to vm before starting a new job so ending the + * job is guaranteed not to remove the last reference. + */ + ignore_value(qemuDomainObjEndJob(driver, vm)); + + cleanup: + VIR_FREE(devAlias); + virObjectUnref(cfg); +} + + static void qemuProcessEventHandler(void *data, void *opaque) { struct qemuProcessEvent *processEvent = data; @@ -3972,8 +4013,11 @@ static void qemuProcessEventHandler(void *data, void *opaque) case QEMU_PROCESS_EVENT_GUESTPANIC: processGuestPanicEvent(driver, vm, processEvent->action); break; - default: - break; + case QEMU_PROCESS_EVENT_DEVICE_DELETED: + processDeviceDeletedEvent(driver, vm, processEvent->data); + break; + case QEMU_PROCESS_EVENT_LAST: + break; } if (virObjectUnref(vm)) diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 9f9aa69..d719716 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -1373,8 +1373,8 @@ qemuProcessHandleDeviceDeleted(qemuMonitorPtr mon ATTRIBUTE_UNUSED, void *opaque) { virQEMUDriverPtr driver = opaque; - virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); - virDomainDeviceDef dev; + struct qemuProcessEvent *processEvent = NULL; + char *data; virObjectLock(vm); @@ -1384,18 +1384,29 @@ qemuProcessHandleDeviceDeleted(qemuMonitorPtr mon ATTRIBUTE_UNUSED, if (qemuDomainSignalDeviceRemoval(vm, devAlias)) goto cleanup; - if (virDomainDefFindDevice(vm->def, devAlias, &dev, true) < 0) - goto cleanup; + if (VIR_ALLOC(processEvent) < 0) + goto error; - qemuDomainRemoveDevice(driver, vm, &dev); + processEvent->eventType = QEMU_PROCESS_EVENT_DEVICE_DELETED; + if (VIR_STRDUP(data, devAlias) < 0) + goto error; + processEvent->data = data; + processEvent->vm = vm; - if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm) < 0) - VIR_WARN("unable to save domain status with balloon change"); + virObjectRef(vm); + if (virThreadPoolSendJob(driver->workerPool, 0, processEvent) < 0) { + ignore_value(virObjectUnref(vm)); + goto error; + } cleanup: virObjectUnlock(vm); - virObjectUnref(cfg); return 0; + error: + if (processEvent) + VIR_FREE(processEvent->data); + VIR_FREE(processEvent); + goto cleanup; } -- 1.9.3

On 05/26/14 17:30, Jiri Denemark wrote:
Currently, we don not acquire any job when removing a device after DEVICE_DELETED event was received from QEMU. This means that if there is another API running at the time DEVICE_DELETED is delivered and the API acquired a job, we may happily change the definition of the domain the API is working with whenever it unlocks the domain object (e.g., to talk with its monitor). That said, we have to acquire a job before finishing device removal to make things safe. However, doing so in the main event loop would cause a deadlock so we need to move most of the event handler into a separate thread.
Another good reason for both acquiring a job and handling the event in a separate thread is that we currently remove a device backend immediately after removing its frontend while we should only remove the backend once we already received DEVICE_DELETED event. That is, we will have to talk to QEMU monitor from the event handler.
Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- src/qemu/qemu_domain.h | 2 ++ src/qemu/qemu_driver.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++-- src/qemu/qemu_process.c | 27 +++++++++++++++++++-------- 3 files changed, 67 insertions(+), 10 deletions(-)
ACK, Peter

On Mon, May 26, 2014 at 17:30:19 +0200, Jiri Denemark wrote:
Jiri Denemark (2): qemu: Finish device removal in the original thread qemu: Process DEVICE_DELETED event in a separate thread
src/qemu/qemu_domain.h | 2 ++ src/qemu/qemu_driver.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++-- src/qemu/qemu_hotplug.c | 44 +++++++++++++++++++++++++++++++++----------- src/qemu/qemu_hotplug.h | 2 +- src/qemu/qemu_process.c | 30 +++++++++++++++++++++--------- 5 files changed, 103 insertions(+), 23 deletions(-)
Pushed. Jirka
participants (2)
-
Jiri Denemark
-
Peter Krempa