[libvirt] [PATCH 0/2] qemuDomainUndefineFlags: Two fixes

*** BLURB HERE *** Michal Privoznik (2): qemuDomainUndefineFlags: Grab QEMU_JOB_MODIFY qemuDomainUndefineFlags: unlink nvram file regardless of domain state src/qemu/qemu_driver.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) -- 2.13.0

This API is definitely modifying state of @vm. Therefore it should grab a job. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_driver.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index b3f65f440..574c351ae 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -7325,10 +7325,13 @@ qemuDomainUndefineFlags(virDomainPtr dom, if (virDomainUndefineFlagsEnsureACL(dom->conn, vm->def) < 0) goto cleanup; + if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0) + goto cleanup; + if (!vm->persistent) { virReportError(VIR_ERR_OPERATION_INVALID, "%s", _("cannot undefine transient domain")); - goto cleanup; + goto endjob; } if (!virDomainObjIsActive(vm) && @@ -7338,15 +7341,15 @@ qemuDomainUndefineFlags(virDomainPtr dom, _("cannot delete inactive domain with %d " "snapshots"), nsnapshots); - goto cleanup; + goto endjob; } if (qemuDomainSnapshotDiscardAllMetadata(driver, vm) < 0) - goto cleanup; + goto endjob; } name = qemuDomainManagedSavePath(driver, vm); if (name == NULL) - goto cleanup; + goto endjob; if (virFileExists(name)) { if (flags & VIR_DOMAIN_UNDEFINE_MANAGED_SAVE) { @@ -7354,13 +7357,13 @@ qemuDomainUndefineFlags(virDomainPtr dom, virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("Failed to remove domain managed " "save image")); - goto cleanup; + goto endjob; } } else { virReportError(VIR_ERR_OPERATION_INVALID, "%s", _("Refusing to undefine while domain managed " "save image exists")); - goto cleanup; + goto endjob; } } @@ -7372,17 +7375,17 @@ qemuDomainUndefineFlags(virDomainPtr dom, virReportSystemError(errno, _("failed to remove nvram: %s"), vm->def->os.loader->nvram); - goto cleanup; + goto endjob; } } else if (!(flags & VIR_DOMAIN_UNDEFINE_KEEP_NVRAM)) { virReportError(VIR_ERR_OPERATION_INVALID, "%s", _("cannot delete inactive domain with nvram")); - goto cleanup; + goto endjob; } } if (virDomainDeleteConfig(cfg->configDir, cfg->autostartDir, vm) < 0) - goto cleanup; + goto endjob; event = virDomainEventLifecycleNewFromObj(vm, VIR_DOMAIN_EVENT_UNDEFINED, @@ -7399,6 +7402,8 @@ qemuDomainUndefineFlags(virDomainPtr dom, qemuDomainRemoveInactive(driver, vm); ret = 0; + endjob: + qemuDomainObjEndJob(driver, vm); cleanup: VIR_FREE(name); -- 2.13.0

On Mon, Aug 07, 2017 at 02:20:05PM +0200, Michal Privoznik wrote:
This API is definitely modifying state of @vm. Therefore it should grab a job.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_driver.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index b3f65f440..574c351ae 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -7325,10 +7325,13 @@ qemuDomainUndefineFlags(virDomainPtr dom, if (virDomainUndefineFlagsEnsureACL(dom->conn, vm->def) < 0) goto cleanup;
+ if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0) + goto cleanup; + if (!vm->persistent) { virReportError(VIR_ERR_OPERATION_INVALID, "%s", _("cannot undefine transient domain")); - goto cleanup; + goto endjob; }
if (!virDomainObjIsActive(vm) && @@ -7338,15 +7341,15 @@ qemuDomainUndefineFlags(virDomainPtr dom, _("cannot delete inactive domain with %d " "snapshots"), nsnapshots); - goto cleanup; + goto endjob; } if (qemuDomainSnapshotDiscardAllMetadata(driver, vm) < 0) - goto cleanup; + goto endjob; }
name = qemuDomainManagedSavePath(driver, vm); if (name == NULL) - goto cleanup; + goto endjob;
if (virFileExists(name)) { if (flags & VIR_DOMAIN_UNDEFINE_MANAGED_SAVE) { @@ -7354,13 +7357,13 @@ qemuDomainUndefineFlags(virDomainPtr dom, virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("Failed to remove domain managed " "save image")); - goto cleanup; + goto endjob; } } else { virReportError(VIR_ERR_OPERATION_INVALID, "%s", _("Refusing to undefine while domain managed " "save image exists")); - goto cleanup; + goto endjob; } }
@@ -7372,17 +7375,17 @@ qemuDomainUndefineFlags(virDomainPtr dom, virReportSystemError(errno, _("failed to remove nvram: %s"), vm->def->os.loader->nvram); - goto cleanup; + goto endjob; } } else if (!(flags & VIR_DOMAIN_UNDEFINE_KEEP_NVRAM)) { virReportError(VIR_ERR_OPERATION_INVALID, "%s", _("cannot delete inactive domain with nvram")); - goto cleanup; + goto endjob; } }
if (virDomainDeleteConfig(cfg->configDir, cfg->autostartDir, vm) < 0) - goto cleanup; + goto endjob;
event = virDomainEventLifecycleNewFromObj(vm, VIR_DOMAIN_EVENT_UNDEFINED, @@ -7399,6 +7402,8 @@ qemuDomainUndefineFlags(virDomainPtr dom, qemuDomainRemoveInactive(driver, vm);
You cannot RemoveInactive with a job. Not that it wouldn't make sense, but the code is written that way (at least for now). It takes another MODIFY job itself.
ret = 0; + endjob: + qemuDomainObjEndJob(driver, vm);
cleanup: VIR_FREE(name); -- 2.13.0
-- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list

On 08/07/2017 08:20 AM, Michal Privoznik wrote:
This API is definitely modifying state of @vm. Therefore it should grab a job.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_driver.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-)
Not sure which of or if this patch from the two undefine series is active, but I wonder how this "interacts or intersects" with: https://www.redhat.com/archives/libvir-list/2017-July/msg00921.html which was posted late last month... I looked at it, but really hadn't dug into yet. The summary of the linked patch is it's possible to run an undefine while a define is occurring because locks are dropped during launch processing. However, if there's a job, then wouldn't that mean it'd be less likely or impossible to allow that undefine while the define job was happening? Of course as Martin notes RemoveInactive cannot be run with a job, but the problem from the other patch is that the RemoveInactive is at least partially successful. I guess I just wanted to be sure to note the link between the two just in case you were actively still thinking about this one (and had better ideas for the other patch where I went with the first thing that came to mind w/r/t using a flag). John
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index b3f65f440..574c351ae 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -7325,10 +7325,13 @@ qemuDomainUndefineFlags(virDomainPtr dom, if (virDomainUndefineFlagsEnsureACL(dom->conn, vm->def) < 0) goto cleanup;
+ if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0) + goto cleanup; + if (!vm->persistent) { virReportError(VIR_ERR_OPERATION_INVALID, "%s", _("cannot undefine transient domain")); - goto cleanup; + goto endjob; }
if (!virDomainObjIsActive(vm) && @@ -7338,15 +7341,15 @@ qemuDomainUndefineFlags(virDomainPtr dom, _("cannot delete inactive domain with %d " "snapshots"), nsnapshots); - goto cleanup; + goto endjob; } if (qemuDomainSnapshotDiscardAllMetadata(driver, vm) < 0) - goto cleanup; + goto endjob; }
name = qemuDomainManagedSavePath(driver, vm); if (name == NULL) - goto cleanup; + goto endjob;
if (virFileExists(name)) { if (flags & VIR_DOMAIN_UNDEFINE_MANAGED_SAVE) { @@ -7354,13 +7357,13 @@ qemuDomainUndefineFlags(virDomainPtr dom, virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("Failed to remove domain managed " "save image")); - goto cleanup; + goto endjob; } } else { virReportError(VIR_ERR_OPERATION_INVALID, "%s", _("Refusing to undefine while domain managed " "save image exists")); - goto cleanup; + goto endjob; } }
@@ -7372,17 +7375,17 @@ qemuDomainUndefineFlags(virDomainPtr dom, virReportSystemError(errno, _("failed to remove nvram: %s"), vm->def->os.loader->nvram); - goto cleanup; + goto endjob; } } else if (!(flags & VIR_DOMAIN_UNDEFINE_KEEP_NVRAM)) { virReportError(VIR_ERR_OPERATION_INVALID, "%s", _("cannot delete inactive domain with nvram")); - goto cleanup; + goto endjob; } }
if (virDomainDeleteConfig(cfg->configDir, cfg->autostartDir, vm) < 0) - goto cleanup; + goto endjob;
event = virDomainEventLifecycleNewFromObj(vm, VIR_DOMAIN_EVENT_UNDEFINED, @@ -7399,6 +7402,8 @@ qemuDomainUndefineFlags(virDomainPtr dom, qemuDomainRemoveInactive(driver, vm);
ret = 0; + endjob: + qemuDomainObjEndJob(driver, vm);
cleanup: VIR_FREE(name);

On 08/15/2017 03:00 AM, John Ferlan wrote:
On 08/07/2017 08:20 AM, Michal Privoznik wrote:
This API is definitely modifying state of @vm. Therefore it should grab a job.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_driver.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-)
Not sure which of or if this patch from the two undefine series is active, but I wonder how this "interacts or intersects" with:
https://www.redhat.com/archives/libvir-list/2017-July/msg00921.html
Oh right. This patch of mine can resolve the issue that you're linking. The thing is, in qemuDomainCreateWithFlags() an async job is grabbed (VIR_DOMAIN_JOB_OPERATION_START) and as such only certain type of sync jobs is allowed. In this specific case just destroy is permitted. Therefore, if undefine would grab a modify type of job it would be suspended as long as the async job is set on the domain. Good point! Thanks for catching that.
which was posted late last month... I looked at it, but really hadn't dug into yet. The summary of the linked patch is it's possible to run an undefine while a define is occurring because locks are dropped during launch processing. However, if there's a job, then wouldn't that mean it'd be less likely or impossible to allow that undefine while the define job was happening? Of course as Martin notes RemoveInactive cannot be run with a job, but the problem from the other patch is that the RemoveInactive is at least partially successful.
I guess I just wanted to be sure to note the link between the two just in case you were actively still thinking about this one (and had better ideas for the other patch where I went with the first thing that came to mind w/r/t using a flag).
Yeah, I'm gonna rework this one and resend. We can clearly see that there's no excuse for an API to not grab a job! Michal

https://bugzilla.redhat.com/show_bug.cgi?id=1467245 Currently, there's a bug when undefining a domain with NVRAM store. Basically, the unlink() of the NVRAM store file happens during the undefine procedure iff domain is inactive. So, if domain is running and undefine is called the file is left behind. It won't be removed in the domain cleanup process either (qemuProcessStop). One of the solutions is to remove if regardless of the domain state and rely on qemu having the file opened. This still has a downside that if the domain is defined back the NVRAM store file is going to be new, any changes to the current one are lost (just like with any other file that is deleted while a process has it opened). But is it really a downside? Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_driver.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 574c351ae..992ae2a2e 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -7367,8 +7367,8 @@ qemuDomainUndefineFlags(virDomainPtr dom, } } - if (!virDomainObjIsActive(vm) && - vm->def->os.loader && vm->def->os.loader->nvram && + if (vm->def->os.loader && + vm->def->os.loader->nvram && virFileExists(vm->def->os.loader->nvram)) { if ((flags & VIR_DOMAIN_UNDEFINE_NVRAM)) { if (unlink(vm->def->os.loader->nvram) < 0) { -- 2.13.0

On Mon, Aug 07, 2017 at 02:20:06PM +0200, Michal Privoznik wrote:
https://bugzilla.redhat.com/show_bug.cgi?id=1467245
Currently, there's a bug when undefining a domain with NVRAM store. Basically, the unlink() of the NVRAM store file happens during the undefine procedure iff domain is inactive. So, if domain is running and undefine is called the file is left behind. It won't be removed in the domain cleanup process either (qemuProcessStop). One of the solutions is to remove if regardless of the domain state and rely on qemu having the file opened. This still has a downside that if the domain is defined back the NVRAM store file is going to be new, any changes to the current one are lost (just like with any other file that is deleted while a process has it opened). But is it really a downside?
It might be. Why don't we disable removing it when the domain is running? We have some precedence for this. The place which already deals with this possibility is tools/virsh-domain.c in the cmdUndefine() where we handle --remove-all-storage. If you look at the help of that command it also says: --nvram remove nvram file, if inactive And that makes sense to me. What doesn't, on the other hand, is that it also says: --keep-nvram keep nvram file, if inactive I don't get the "if inactive" there. But I'm not going to check who pushed that. At least not again =)
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_driver.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 574c351ae..992ae2a2e 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -7367,8 +7367,8 @@ qemuDomainUndefineFlags(virDomainPtr dom, } }
- if (!virDomainObjIsActive(vm) && - vm->def->os.loader && vm->def->os.loader->nvram && + if (vm->def->os.loader && + vm->def->os.loader->nvram && virFileExists(vm->def->os.loader->nvram)) { if ((flags & VIR_DOMAIN_UNDEFINE_NVRAM)) { if (unlink(vm->def->os.loader->nvram) < 0) { -- 2.13.0
-- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list

On 08/09/2017 11:41 AM, Martin Kletzander wrote:
On Mon, Aug 07, 2017 at 02:20:06PM +0200, Michal Privoznik wrote:
https://bugzilla.redhat.com/show_bug.cgi?id=1467245
Currently, there's a bug when undefining a domain with NVRAM store. Basically, the unlink() of the NVRAM store file happens during the undefine procedure iff domain is inactive. So, if domain is running and undefine is called the file is left behind. It won't be removed in the domain cleanup process either (qemuProcessStop). One of the solutions is to remove if regardless of the domain state and rely on qemu having the file opened. This still has a downside that if the domain is defined back the NVRAM store file is going to be new, any changes to the current one are lost (just like with any other file that is deleted while a process has it opened). But is it really a downside?
It might be. Why don't we disable removing it when the domain is running? We have some precedence for this. The place which already deals with this possibility is tools/virsh-domain.c in the cmdUndefine() where we handle --remove-all-storage. If you look at the help of that command it also says:
--nvram remove nvram file, if inactive
And that makes sense to me. What doesn't, on the other hand, is that it also says:
--keep-nvram keep nvram file, if inactive
I don't get the "if inactive" there. But I'm not going to check who pushed that. At least not again =)
Okay, I'll write a patch that: a) forbids undefine for active domains, unless b) KEEP_NVRAM flag is specified. Michal

On Wed, Aug 09, 2017 at 01:13:02PM +0200, Michal Privoznik wrote:
On 08/09/2017 11:41 AM, Martin Kletzander wrote:
On Mon, Aug 07, 2017 at 02:20:06PM +0200, Michal Privoznik wrote:
https://bugzilla.redhat.com/show_bug.cgi?id=1467245
Currently, there's a bug when undefining a domain with NVRAM store. Basically, the unlink() of the NVRAM store file happens during the undefine procedure iff domain is inactive. So, if domain is running and undefine is called the file is left behind. It won't be removed in the domain cleanup process either (qemuProcessStop). One of the solutions is to remove if regardless of the domain state and rely on qemu having the file opened. This still has a downside that if the domain is defined back the NVRAM store file is going to be new, any changes to the current one are lost (just like with any other file that is deleted while a process has it opened). But is it really a downside?
It might be. Why don't we disable removing it when the domain is running? We have some precedence for this. The place which already deals with this possibility is tools/virsh-domain.c in the cmdUndefine() where we handle --remove-all-storage. If you look at the help of that command it also says:
--nvram remove nvram file, if inactive
And that makes sense to me. What doesn't, on the other hand, is that it also says:
--keep-nvram keep nvram file, if inactive
I don't get the "if inactive" there. But I'm not going to check who pushed that. At least not again =)
Okay, I'll write a patch that:
a) forbids undefine for active domains, unless b) KEEP_NVRAM flag is specified.
c) or there is no nvram =) Yeah, well, that's caused by the unfortunate design of the way nvram is handled by qemu and qemu driver. Thanks
Michal
-- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list

On Mon, Aug 07, 2017 at 02:20:06PM +0200, Michal Privoznik wrote:
https://bugzilla.redhat.com/show_bug.cgi?id=1467245
Currently, there's a bug when undefining a domain with NVRAM store. Basically, the unlink() of the NVRAM store file happens during the undefine procedure iff domain is inactive. So, if domain is running and undefine is called the file is left behind. It won't be removed in the domain cleanup process either (qemuProcessStop). One of the solutions is to remove if regardless of the domain state and rely on qemu having the file opened. This still has a downside that if the domain is defined back the NVRAM store file is going to be new, any changes to the current one are lost (just like with any other file that is deleted while a process has it opened). But is it really a downside?
We only unlink if the user explicitly gives VIR_DOMAIN_UNDEFINE_NVRAM, so I think that "prolem" scenario you describe is exactly what the user has asked for. ie not a bug - just don't pass VIR_DOMAIN_UNDEFINE_NVRAM if they want to keep it around across an undefine+define pair.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_driver.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 574c351ae..992ae2a2e 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -7367,8 +7367,8 @@ qemuDomainUndefineFlags(virDomainPtr dom, } }
- if (!virDomainObjIsActive(vm) && - vm->def->os.loader && vm->def->os.loader->nvram && + if (vm->def->os.loader && + vm->def->os.loader->nvram && virFileExists(vm->def->os.loader->nvram)) { if ((flags & VIR_DOMAIN_UNDEFINE_NVRAM)) { if (unlink(vm->def->os.loader->nvram) < 0) {
ACK Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
participants (4)
-
Daniel P. Berrange
-
John Ferlan
-
Martin Kletzander
-
Michal Privoznik