[libvirt] [PATCHv2] qemu: don't leak vm on failure

Failure to attach to a domain during 'virsh qemu-attach' left the list of domains in an odd state: $ virsh qemu-attach 4176 error: An error occurred, but the cause is unknown $ virsh list --all Id Name State ---------------------------------------------------- 2 foo shut off $ virsh qemu-attach 4176 error: Requested operation is not valid: domain is already active as 'foo' $ virsh undefine foo error: Failed to undefine domain foo error: Requested operation is not valid: cannot undefine transient domain $ virsh shutdown foo error: Failed to shutdown domain foo error: invalid argument: monitor must not be NULL It all stems from leaving the list of domains unmodified on the initial failure; we should follow the lead of createXML which removes vm on failure (the actual initial failure still needs to be fixed in a later patch, but at least this patch gets us to the point where we aren't getting stuck with an unremovable "shut off" transient domain). While investigating, I also found a leak in qemuDomainCreateXML; the two functions should behave similarly. Note that there are still two unusual paths: if dom is not allocated, the user will see an OOM error even though the vm remains registered (but oom errors already indicate tricky cleanup); and if the vm starts and then quits again all before the job ends, it is possible to return a non-NULL dom even though the dom will no longer be useful for anything (but this at least lets the user know their short-lived vm ran). * src/qemu/qemu_driver.c (qemuDomainCreateXML): Don't leak vm on failure to obtain job. (qemuDomainQemuAttach): Match cleanup of qemuDomainCreateXML. Signed-off-by: Eric Blake <eblake@redhat.com> --- v1 was discussed here: https://www.redhat.com/archives/libvir-list/2013-September/msg00362.html changes since then: plug another leak of vm, and clean up some style. Expand the commit message to explain unusual paths. src/qemu/qemu_driver.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index e37fe33..6dafa2b 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -1602,8 +1602,11 @@ static virDomainPtr qemuDomainCreateXML(virConnectPtr conn, def = NULL; - if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0) - goto cleanup; /* XXXX free the 'vm' we created ? */ + if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0) { + qemuDomainRemoveInactive(driver, vm); + vm = NULL; + goto cleanup; + } if (qemuProcessStart(conn, driver, vm, NULL, -1, NULL, NULL, VIR_NETDEV_VPORT_PROFILE_OP_CREATE, @@ -1631,10 +1634,10 @@ static virDomainPtr qemuDomainCreateXML(virConnectPtr conn, virDomainAuditStart(vm, "booted", true); dom = virGetDomain(conn, vm->def->name, vm->def->uuid); - if (dom) dom->id = vm->def->id; + if (dom) + dom->id = vm->def->id; - if (vm && - qemuDomainObjEndJob(driver, vm) == 0) + if (qemuDomainObjEndJob(driver, vm) == 0) vm = NULL; cleanup: @@ -13630,34 +13633,38 @@ static virDomainPtr qemuDomainQemuAttach(virConnectPtr conn, def = NULL; - if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0) + if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0) { + qemuDomainRemoveInactive(driver, vm); + vm = NULL; goto cleanup; + } if (qemuProcessAttach(conn, driver, vm, pid, pidfile, monConfig, monJSON) < 0) { + if (qemuDomainObjEndJob(driver, vm) > 0) + qemuDomainRemoveInactive(driver, vm); + vm = NULL; monConfig = NULL; - goto endjob; + goto cleanup; } monConfig = NULL; dom = virGetDomain(conn, vm->def->name, vm->def->uuid); - if (dom) dom->id = vm->def->id; + if (dom) + dom->id = vm->def->id; -endjob: if (qemuDomainObjEndJob(driver, vm) == 0) { vm = NULL; - goto cleanup; - } cleanup: virDomainDefFree(def); - virObjectUnref(qemuCaps); virDomainChrSourceDefFree(monConfig); if (vm) virObjectUnlock(vm); VIR_FREE(pidfile); virObjectUnref(caps); + virObjectUnref(qemuCaps); return dom; } -- 1.8.3.1

On 09/06/2013 11:55 AM, Eric Blake wrote:
Failure to attach to a domain during 'virsh qemu-attach' left the list of domains in an odd state:
$ virsh qemu-attach 4176 error: An error occurred, but the cause is unknown
-endjob: if (qemuDomainObjEndJob(driver, vm) == 0) { vm = NULL; - goto cleanup; - }
Hmm, I see that I tested something slightly different than what I posted (the '{' needs to be deleted to match the '}' cleanup). Consider that squashed in. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

On 07/09/13 01:55, Eric Blake wrote:
Failure to attach to a domain during 'virsh qemu-attach' left the list of domains in an odd state:
$ virsh qemu-attach 4176 error: An error occurred, but the cause is unknown
$ virsh list --all Id Name State ---------------------------------------------------- 2 foo shut off
$ virsh qemu-attach 4176 error: Requested operation is not valid: domain is already active as 'foo'
$ virsh undefine foo error: Failed to undefine domain foo error: Requested operation is not valid: cannot undefine transient domain
$ virsh shutdown foo error: Failed to shutdown domain foo error: invalid argument: monitor must not be NULL
It all stems from leaving the list of domains unmodified on the initial failure; we should follow the lead of createXML which removes vm on failure (the actual initial failure still needs to be fixed in a later patch, but at least this patch gets us to the point where we aren't getting stuck with an unremovable "shut off" transient domain).
While investigating, I also found a leak in qemuDomainCreateXML; the two functions should behave similarly. Note that there are still two unusual paths: if dom is not allocated, the user will see an OOM error even though the vm remains registered (but oom errors already indicate tricky cleanup); and if the vm starts and then quits again all before the job ends, it is possible to return a non-NULL dom even though the dom will no longer be useful for anything (but this at least lets the user know their short-lived vm ran).
* src/qemu/qemu_driver.c (qemuDomainCreateXML): Don't leak vm on failure to obtain job. (qemuDomainQemuAttach): Match cleanup of qemuDomainCreateXML.
Signed-off-by: Eric Blake <eblake@redhat.com> ---
v1 was discussed here: https://www.redhat.com/archives/libvir-list/2013-September/msg00362.html changes since then: plug another leak of vm, and clean up some style. Expand the commit message to explain unusual paths.
src/qemu/qemu_driver.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index e37fe33..6dafa2b 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -1602,8 +1602,11 @@ static virDomainPtr qemuDomainCreateXML(virConnectPtr conn,
def = NULL;
- if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0) - goto cleanup; /* XXXX free the 'vm' we created ? */ + if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0) { + qemuDomainRemoveInactive(driver, vm); + vm = NULL; + goto cleanup; + }
if (qemuProcessStart(conn, driver, vm, NULL, -1, NULL, NULL, VIR_NETDEV_VPORT_PROFILE_OP_CREATE, @@ -1631,10 +1634,10 @@ static virDomainPtr qemuDomainCreateXML(virConnectPtr conn, virDomainAuditStart(vm, "booted", true);
dom = virGetDomain(conn, vm->def->name, vm->def->uuid); - if (dom) dom->id = vm->def->id; + if (dom) + dom->id = vm->def->id;
- if (vm && - qemuDomainObjEndJob(driver, vm) == 0) + if (qemuDomainObjEndJob(driver, vm) == 0) vm = NULL;
cleanup: @@ -13630,34 +13633,38 @@ static virDomainPtr qemuDomainQemuAttach(virConnectPtr conn,
def = NULL;
- if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0) + if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0) { + qemuDomainRemoveInactive(driver, vm); + vm = NULL; goto cleanup; + }
if (qemuProcessAttach(conn, driver, vm, pid, pidfile, monConfig, monJSON) < 0) { + if (qemuDomainObjEndJob(driver, vm) > 0)
Just realized that qemuDomainObjEndJob returns bool value. We may need a follow up patch to clean it up.
+ qemuDomainRemoveInactive(driver, vm); + vm = NULL; monConfig = NULL; - goto endjob; + goto cleanup; }
monConfig = NULL;
dom = virGetDomain(conn, vm->def->name, vm->def->uuid); - if (dom) dom->id = vm->def->id; + if (dom) + dom->id = vm->def->id;
-endjob: if (qemuDomainObjEndJob(driver, vm) == 0) {
ACK with "{" removed.

On 09/08/2013 08:29 AM, Osier Yang wrote:
On 07/09/13 01:55, Eric Blake wrote:
Failure to attach to a domain during 'virsh qemu-attach' left the list of domains in an odd state:
$ virsh qemu-attach 4176 error: An error occurred, but the cause is unknown
* src/qemu/qemu_driver.c (qemuDomainCreateXML): Don't leak vm on failure to obtain job. (qemuDomainQemuAttach): Match cleanup of qemuDomainCreateXML.
Signed-off-by: Eric Blake <eblake@redhat.com> ---
v1 was discussed here: https://www.redhat.com/archives/libvir-list/2013-September/msg00362.html changes since then: plug another leak of vm, and clean up some style. Expand the commit message to explain unusual paths.
if (qemuProcessAttach(conn, driver, vm, pid, pidfile, monConfig, monJSON) < 0) { + if (qemuDomainObjEndJob(driver, vm) > 0)
Just realized that qemuDomainObjEndJob returns bool value. We may need a follow up patch to clean it up.
I'll work on that; as you say, it can be a followup, and touches more call sites.
ACK with "{" removed.
Thanks; pushed. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org
participants (2)
-
Eric Blake
-
Osier Yang