[libvirt] [PATCH 0/2] qemu: Refactor reconnecting after daemon restart

Peter Krempa (2): qemu: driver: Reload snapshots and managedsaves prior to reconnecting qemu: process: Refactor reconnecting to qemu processes src/qemu/qemu_driver.c | 4 +- src/qemu/qemu_process.c | 167 ++++++++++++++++++++++-------------------------- 2 files changed, 78 insertions(+), 93 deletions(-) -- 2.1.0

Reconnect to the VM is a possibly long-running job spawned in a separate thread. We should reload the snapshot defs and managedsave state prior to spawning the thread to avoid blocking of the daemon startup which would serialize on the VM lock. Also the reloading code would violate the domain job held while reconnecting as the loader functions don't create jobs. --- src/qemu/qemu_driver.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 5dc62b0..31565bb 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -873,8 +873,6 @@ qemuStateInitialize(bool privileged, NULL, NULL) < 0) goto error; - qemuProcessReconnectAll(conn, qemu_driver); - virDomainObjListForEach(qemu_driver->domains, qemuDomainSnapshotLoad, cfg->snapshotDir); @@ -883,6 +881,8 @@ qemuStateInitialize(bool privileged, qemuDomainManagedSaveLoad, qemu_driver); + qemuProcessReconnectAll(conn, qemu_driver); + qemu_driver->workerPool = virThreadPoolNew(0, 1, 0, qemuProcessEventHandler, qemu_driver); if (!qemu_driver->workerPool) goto error; -- 2.1.0

On 12/03/2014 08:02 AM, Peter Krempa wrote:
Reconnect to the VM is a possibly long-running job spawned in a separate thread. We should reload the snapshot defs and managedsave state prior to spawning the thread to avoid blocking of the daemon startup which would serialize on the VM lock.
Also the reloading code would violate the domain job held while reconnecting as the loader functions don't create jobs. --- src/qemu/qemu_driver.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
ACK -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

On 12/03/14 17:49, Eric Blake wrote:
On 12/03/2014 08:02 AM, Peter Krempa wrote:
Reconnect to the VM is a possibly long-running job spawned in a separate thread. We should reload the snapshot defs and managedsave state prior to spawning the thread to avoid blocking of the daemon startup which would serialize on the VM lock.
Also the reloading code would violate the domain job held while reconnecting as the loader functions don't create jobs. --- src/qemu/qemu_driver.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
ACK
Pushed; Thanks. Peter

Move entering the job into the thread to simplify the program flow. Also as the code holds a separate reference to the domain object some conditions can be simplified. --- src/qemu/qemu_process.c | 167 ++++++++++++++++++++++-------------------------- 1 file changed, 76 insertions(+), 91 deletions(-) diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 08d6b7c..2029745 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -3524,8 +3524,7 @@ qemuProcessUpdateDevices(virQEMUDriverPtr driver, struct qemuProcessReconnectData { virConnectPtr conn; virQEMUDriverPtr driver; - void *payload; - struct qemuDomainJobObj oldjob; + virDomainObjPtr obj; }; /* * Open an existing VM's monitor, re-detect VCPU threads @@ -3534,13 +3533,27 @@ struct qemuProcessReconnectData { * We own the virConnectPtr we are passed here - whoever started * this thread function has increased the reference counter to it * so that we now have to close it. + * + * This function also inherits a locked domain object. + * + * This function needs to: + * 1. Enter job + * 1. just before monitor reconnect do lightweight MonitorEnter + * (increase VM refcount and unlock VM) + * 2. reconnect to monitor + * 3. do lightweight MonitorExit (lock VM) + * 4. continue reconnect process + * 5. EndJob + * + * We can't do normal MonitorEnter & MonitorExit because these two lock the + * monitor lock, which does not exists in this early phase. */ static void qemuProcessReconnect(void *opaque) { struct qemuProcessReconnectData *data = opaque; virQEMUDriverPtr driver = data->driver; - virDomainObjPtr obj = data->payload; + virDomainObjPtr obj = data->obj; qemuDomainObjPrivatePtr priv; virConnectPtr conn = data->conn; struct qemuDomainJobObj oldjob; @@ -3550,26 +3563,24 @@ qemuProcessReconnect(void *opaque) size_t i; int ret; - memcpy(&oldjob, &data->oldjob, sizeof(oldjob)); - VIR_FREE(data); - virNWFilterReadLockFilterUpdates(); + qemuDomainObjRestoreJob(obj, &oldjob); - virObjectLock(obj); + /* Hold an extra reference because we can't allow 'vm' to be + * deleted if qemuConnectMonitor() failed */ + virObjectRef(obj); + + if (qemuDomainObjBeginJob(driver, obj, QEMU_JOB_MODIFY) < 0) + goto killvm; + + virNWFilterReadLockFilterUpdates(); cfg = virQEMUDriverGetConfig(driver); VIR_DEBUG("Reconnect monitor to %p '%s'", obj, obj->def->name); priv = obj->privateData; - /* Job was started by the caller for us */ - qemuDomainObjTransferJob(obj); - - /* Hold an extra reference because we can't allow 'vm' to be - * deleted if qemuConnectMonitor() failed */ - virObjectRef(obj); - /* XXX check PID liveliness & EXE path */ if (qemuConnectMonitor(driver, obj, QEMU_ASYNC_JOB_NONE, -1) < 0) goto error; @@ -3701,10 +3712,10 @@ qemuProcessReconnect(void *opaque) driver->inhibitCallback(true, driver->inhibitOpaque); endjob: - if (!qemuDomainObjEndJob(driver, obj)) - obj = NULL; + /* we hold an extra reference, so this will never fail */ + ignore_value(qemuDomainObjEndJob(driver, obj)); - if (obj && virObjectUnref(obj)) + if (virObjectUnref(obj)) virObjectUnlock(obj); virObjectUnref(conn); @@ -3714,35 +3725,35 @@ qemuProcessReconnect(void *opaque) return; error: - if (!qemuDomainObjEndJob(driver, obj)) - obj = NULL; - - if (obj) { - if (!virDomainObjIsActive(obj)) { - if (virObjectUnref(obj)) - virObjectUnlock(obj); - } else if (virObjectUnref(obj)) { - /* We can't get the monitor back, so must kill the VM - * to remove danger of it ending up running twice if - * user tries to start it again later - */ - if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_NO_SHUTDOWN)) { - /* If we couldn't get the monitor and qemu supports - * no-shutdown, we can safely say that the domain - * crashed ... */ - state = VIR_DOMAIN_SHUTOFF_CRASHED; - } else { - /* ... but if it doesn't we can't say what the state - * really is and FAILED means "failed to start" */ - state = VIR_DOMAIN_SHUTOFF_UNKNOWN; - } - qemuProcessStop(driver, obj, state, 0); - if (!obj->persistent) - qemuDomainRemoveInactive(driver, obj); - else - virObjectUnlock(obj); + /* we hold an extra reference, so this will never fail */ + ignore_value(qemuDomainObjEndJob(driver, obj)); + + killvm: + if (virDomainObjIsActive(obj)) { + /* We can't get the monitor back, so must kill the VM + * to remove danger of it ending up running twice if + * user tries to start it again later + */ + if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_NO_SHUTDOWN)) { + /* If we couldn't get the monitor and qemu supports + * no-shutdown, we can safely say that the domain + * crashed ... */ + state = VIR_DOMAIN_SHUTOFF_CRASHED; + } else { + /* ... but if it doesn't we can't say what the state + * really is and FAILED means "failed to start" */ + state = VIR_DOMAIN_SHUTOFF_UNKNOWN; } + qemuProcessStop(driver, obj, state, 0); + } + + if (virObjectUnref(obj)) { + if (!obj->persistent) + qemuDomainRemoveInactive(driver, obj); + else + virObjectUnlock(obj); } + virObjectUnref(conn); virObjectUnref(cfg); virNWFilterUnlockFilterUpdates(); @@ -3756,71 +3767,45 @@ qemuProcessReconnectHelper(virDomainObjPtr obj, struct qemuProcessReconnectData *src = opaque; struct qemuProcessReconnectData *data; - if (!obj->pid) - return 0; - if (VIR_ALLOC(data) < 0) return -1; memcpy(data, src, sizeof(*data)); - data->payload = obj; - - /* - * We create a separate thread to run qemuProcessReconnect in it. - * However, qemuProcessReconnect needs to: - * 1. just before monitor reconnect do lightweight MonitorEnter - * (increase VM refcount, unlock VM & driver) - * 2. reconnect to monitor - * 3. do lightweight MonitorExit (lock VM) - * 4. continue reconnect process - * 5. EndJob - * - * NB, we can't do normal MonitorEnter & MonitorExit because - * these two lock the monitor lock, which does not exists in - * this early phase. - */ + data->obj = obj; + /* this lock will be eventually transferred to the thread that handles the + * reconnect */ virObjectLock(obj); - qemuDomainObjRestoreJob(obj, &data->oldjob); - - if (qemuDomainObjBeginJob(src->driver, obj, QEMU_JOB_MODIFY) < 0) - goto error; + /* If the VM was inactive, we don't need to reconnect */ + if (!obj->pid) { + virObjectUnlock(obj); + return 0; + } - /* Since we close the connection later on, we have to make sure - * that the threads we start see a valid connection throughout their - * lifetime. We simply increase the reference counter here. + /* Since we close the connection later on, we have to make sure that the + * threads we start see a valid connection throughout their lifetime. We + * simply increase the reference counter here. */ virObjectRef(data->conn); if (virThreadCreate(&thread, false, qemuProcessReconnect, data) < 0) { - - virObjectUnref(data->conn); - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("Could not create thread. QEMU initialization " "might be incomplete")); - if (!qemuDomainObjEndJob(src->driver, obj)) { - obj = NULL; - } else if (virObjectUnref(obj)) { - /* We can't spawn a thread and thus connect to monitor. - * Kill qemu */ - qemuProcessStop(src->driver, obj, VIR_DOMAIN_SHUTOFF_FAILED, 0); - if (!obj->persistent) - qemuDomainRemoveInactive(src->driver, obj); - else - virObjectUnlock(obj); - } - goto error; - } + /* We can't spawn a thread and thus connect to monitor. Kill qemu. */ + qemuProcessStop(src->driver, obj, VIR_DOMAIN_SHUTOFF_FAILED, 0); + if (!obj->persistent) + qemuDomainRemoveInactive(src->driver, obj); + else + virObjectUnlock(obj); - virObjectUnlock(obj); + virObjectUnref(data->conn); + VIR_FREE(data); + return -1; + } return 0; - - error: - VIR_FREE(data); - return -1; } /** -- 2.1.0
participants (2)
-
Eric Blake
-
Peter Krempa