[libvirt] [PATCH 0/2] snapshot: add force for risky reverts

I first documented the need for force back in my RFC: https://www.redhat.com/archives/libvir-list/2011-August/msg00361.html but only now got around to implementing it. At the moment, I'm posting the code for early review. I'm still in the process of testing out multiple scenarios, and will send followup mail detailing the actual steps I took using virsh to cover the multiple scenarios. Eric Blake (2): snapshot: add REVERT_FORCE to API snapshot: enforce REVERT_FORCE on qemu include/libvirt/libvirt.h.in | 1 + include/libvirt/virterror.h | 2 + src/libvirt.c | 22 +++++++++++++++++++ src/qemu/qemu_driver.c | 47 +++++++++++++++++++++++++++++++++--------- src/util/virterror.c | 6 +++++ tools/virsh.c | 19 ++++++++++++++++- tools/virsh.pod | 17 +++++++++++++++ 7 files changed, 103 insertions(+), 11 deletions(-) -- 1.7.4.4

Although reverting to a snapshot is a form of data loss, this is normally expected. However, there are two cases where additional surprises (failure to run the reverted state, or a break in connectivity to the domain) can come into play. Requiring extra acknowledgment in these cases will make it less likely that someone can get into an unrecoverable state due to a default revert. Also create a new error code, so users can distinguish when forcing would make a difference, rather than having to blindly request force. * include/libvirt/libvirt.h.in (VIR_DOMAIN_SNAPSHOT_REVERT_FORCE): New flag. * src/libvirt.c (virDomainRevertToSnapshot): Document it. * include/libvirt/virterror.h (VIR_ERR_SNAPSHOT_REVERT_RISKY): New error value. * src/util/virterror.c (virErrorMsg): Implement it. * tools/virsh.c (cmdDomainSnapshotRevert): Add --force to virsh. * tools/virsh.pod (snapshot-revert): Document it. --- include/libvirt/libvirt.h.in | 1 + include/libvirt/virterror.h | 2 ++ src/libvirt.c | 22 ++++++++++++++++++++++ src/util/virterror.c | 6 ++++++ tools/virsh.c | 19 ++++++++++++++++++- tools/virsh.pod | 17 +++++++++++++++++ 6 files changed, 66 insertions(+), 1 deletions(-) diff --git a/include/libvirt/libvirt.h.in b/include/libvirt/libvirt.h.in index 3c7f278..7403a9a 100644 --- a/include/libvirt/libvirt.h.in +++ b/include/libvirt/libvirt.h.in @@ -2736,6 +2736,7 @@ virDomainSnapshotPtr virDomainSnapshotGetParent(virDomainSnapshotPtr snapshot, typedef enum { VIR_DOMAIN_SNAPSHOT_REVERT_RUNNING = 1 << 0, /* Run after revert */ VIR_DOMAIN_SNAPSHOT_REVERT_PAUSED = 1 << 1, /* Pause after revert */ + VIR_DOMAIN_SNAPSHOT_REVERT_FORCE = 1 << 2, /* Allow risky reverts */ } virDomainSnapshotRevertFlags; /* Revert the domain to a point-in-time snapshot. The diff --git a/include/libvirt/virterror.h b/include/libvirt/virterror.h index 0aae622..0c98014 100644 --- a/include/libvirt/virterror.h +++ b/include/libvirt/virterror.h @@ -237,6 +237,8 @@ typedef enum { the given driver */ VIR_ERR_STORAGE_PROBE_FAILED = 75, /* storage pool proble failed */ VIR_ERR_STORAGE_POOL_BUILT = 76, /* storage pool already built */ + VIR_ERR_SNAPSHOT_REVERT_RISKY = 77, /* force was not requested for a + risky domain snapshot revert */ } virErrorNumber; /** diff --git a/src/libvirt.c b/src/libvirt.c index 2b2f6be..c2aabf7 100644 --- a/src/libvirt.c +++ b/src/libvirt.c @@ -16373,6 +16373,28 @@ error: * into an inactive state, so transient domains require the use of one * of these two flags. * + * Reverting to any snapshot discards all configuration changes made since + * the last snapshot. Additionally, reverting to a snapshot from a running + * domain is a form of data loss, since it discards whatever is in the + * guest's RAM at the time. However, the very nature of keeping snapshots + * implies the intent to roll back state, so no additional confirmation is + * normally required for these effects. + * + * However, there are two particular situations where reverting will + * be refused by default, and where @flags must include + * VIR_DOMAIN_SNAPSHOT_REVERT_FORCE to acknowledge the risks. 1) Any + * attempt to revert to a snapshot that lacks the metadata to perform + * ABI compatibility checks (generally the case for snapshots that + * lack a full <domain> when listed by virDomainSnapshotGetXMLDesc(), + * such as those created prior to libvirt 0.9.5). 2) Any attempt to + * revert a running domain to an active state that requires starting a + * new hypervisor instance rather than reusing the existing hypervisor + * (since this would terminate all connections to the domain, such as + * such as VNC or Spice graphics) - this condition arises from active + * snapshots that are provably ABI incomaptible, as well as from + * inactive snapshots with a request to start the domain after the + * revert. + * * Returns 0 if the creation is successful, -1 on error. */ int diff --git a/src/util/virterror.c b/src/util/virterror.c index 26c4981..5006fa2 100644 --- a/src/util/virterror.c +++ b/src/util/virterror.c @@ -1204,6 +1204,12 @@ virErrorMsg(virErrorNumber error, const char *info) else errmsg = _("argument unsupported: %s"); break; + case VIR_ERR_SNAPSHOT_REVERT_RISKY: + if (info == NULL) + errmsg = _("revert requires force"); + else + errmsg = _("revert requires force: %s"); + break; } return (errmsg); } diff --git a/tools/virsh.c b/tools/virsh.c index 655378c..4e79325 100644 --- a/tools/virsh.c +++ b/tools/virsh.c @@ -13571,6 +13571,7 @@ static const vshCmdOptDef opts_snapshot_revert[] = { {"snapshotname", VSH_OT_DATA, VSH_OFLAG_REQ, N_("snapshot name")}, {"running", VSH_OT_BOOL, 0, N_("after reverting, change state to running")}, {"paused", VSH_OT_BOOL, 0, N_("after reverting, change state to paused")}, + {"force", VSH_OT_BOOL, 0, N_("try harder on risky reverts")}, {NULL, 0, 0, NULL} }; @@ -13582,11 +13583,19 @@ cmdDomainSnapshotRevert(vshControl *ctl, const vshCmd *cmd) const char *name = NULL; virDomainSnapshotPtr snapshot = NULL; unsigned int flags = 0; + bool force = false; + int result; if (vshCommandOptBool(cmd, "running")) flags |= VIR_DOMAIN_SNAPSHOT_REVERT_RUNNING; if (vshCommandOptBool(cmd, "paused")) flags |= VIR_DOMAIN_SNAPSHOT_REVERT_PAUSED; + /* We want virsh snapshot-revert --force to work even when talking + * to older servers that did the unsafe revert by default but + * reject the flag, so we probe without the flag, and only use it + * when the error says it will make a difference. */ + if (vshCommandOptBool(cmd, "force")) + force = true; if (!vshConnectionUsability(ctl, ctl->conn)) goto cleanup; @@ -13602,7 +13611,15 @@ cmdDomainSnapshotRevert(vshControl *ctl, const vshCmd *cmd) if (snapshot == NULL) goto cleanup; - if (virDomainRevertToSnapshot(snapshot, flags) < 0) + result = virDomainRevertToSnapshot(snapshot, flags); + if (result < 0 && force && + last_error->code == VIR_ERR_SNAPSHOT_REVERT_RISKY) { + flags |= VIR_DOMAIN_SNAPSHOT_REVERT_FORCE; + virFreeError(last_error); + last_error = NULL; + result = virDomainRevertToSnapshot(snapshot, flags); + } + if (result < 0) goto cleanup; ret = true; diff --git a/tools/virsh.pod b/tools/virsh.pod index 7c91d75..dd60a64 100644 --- a/tools/virsh.pod +++ b/tools/virsh.pod @@ -2002,6 +2002,7 @@ Using I<--security-info> will also include security sensitive information. Output the name of the parent snapshot for the given I<snapshot>, if any. =item B<snapshot-revert> I<domain> I<snapshot> [{I<--running> | I<--paused>}] +[I<--force>] Revert the given domain to the snapshot specified by I<snapshot>. Be aware that this is a destructive action; any changes in the domain since the last @@ -2017,6 +2018,22 @@ I<--running> or I<--paused> flag will perform additional state changes transient domains cannot be inactive, it is required to use one of these flags when reverting to a disk snapshot of a transient domain. +There are two cases where a snapshot revert involves extra risk, which +requires the use of I<--force> to proceed. One is the case of a +snapshot that lacks full domain information for reverting +configuration (such as snapshots created prior to libvirt 0.9.5); +since libvirt cannot prove that the current configuration matches what +was in use at the time of the snapshot, supplying I<--force> assures +libvirt that the snapshot is compatible with the current configuration +(and if it is not, the domain will likely fail to run). The other is +the case of reverting from a running domain to an active state where a +new hypervisor has to be created rather than reusing the existing +hypervisor, because it implies drawbacks such as breaking any existing +VNC or Spice connections; this condition happens with an active +snapshot that uses a provably incompatible configuration, as well as +with an inactive snapshot that is combined with the I<--start> or +I<--pause> flag. + =item B<snapshot-delete> I<domain> I<snapshot> [I<--metadata>] [{I<--children> | I<--children-only>}] -- 1.7.4.4

On 09/30/2011 02:52 PM, Eric Blake wrote:
Although reverting to a snapshot is a form of data loss, this is normally expected. However, there are two cases where additional surprises (failure to run the reverted state, or a break in connectivity to the domain) can come into play. Requiring extra acknowledgment in these cases will make it less likely that someone can get into an unrecoverable state due to a default revert.
Also create a new error code, so users can distinguish when forcing would make a difference, rather than having to blindly request force.
* include/libvirt/libvirt.h.in (VIR_DOMAIN_SNAPSHOT_REVERT_FORCE): New flag. * src/libvirt.c (virDomainRevertToSnapshot): Document it. * include/libvirt/virterror.h (VIR_ERR_SNAPSHOT_REVERT_RISKY): New error value. * src/util/virterror.c (virErrorMsg): Implement it. * tools/virsh.c (cmdDomainSnapshotRevert): Add --force to virsh. * tools/virsh.pod (snapshot-revert): Document it. --- include/libvirt/libvirt.h.in | 1 + include/libvirt/virterror.h | 2 ++ src/libvirt.c | 22 ++++++++++++++++++++++ src/util/virterror.c | 6 ++++++ tools/virsh.c | 19 ++++++++++++++++++- tools/virsh.pod | 17 +++++++++++++++++ 6 files changed, 66 insertions(+), 1 deletions(-)
diff --git a/include/libvirt/libvirt.h.in b/include/libvirt/libvirt.h.in index 3c7f278..7403a9a 100644 --- a/include/libvirt/libvirt.h.in +++ b/include/libvirt/libvirt.h.in @@ -2736,6 +2736,7 @@ virDomainSnapshotPtr virDomainSnapshotGetParent(virDomainSnapshotPtr snapshot, typedef enum { VIR_DOMAIN_SNAPSHOT_REVERT_RUNNING = 1<< 0, /* Run after revert */ VIR_DOMAIN_SNAPSHOT_REVERT_PAUSED = 1<< 1, /* Pause after revert */ + VIR_DOMAIN_SNAPSHOT_REVERT_FORCE = 1<< 2, /* Allow risky reverts */ } virDomainSnapshotRevertFlags;
/* Revert the domain to a point-in-time snapshot. The diff --git a/include/libvirt/virterror.h b/include/libvirt/virterror.h index 0aae622..0c98014 100644 --- a/include/libvirt/virterror.h +++ b/include/libvirt/virterror.h @@ -237,6 +237,8 @@ typedef enum { the given driver */ VIR_ERR_STORAGE_PROBE_FAILED = 75, /* storage pool proble failed */ VIR_ERR_STORAGE_POOL_BUILT = 76, /* storage pool already built */ + VIR_ERR_SNAPSHOT_REVERT_RISKY = 77, /* force was not requested for a + risky domain snapshot revert */ } virErrorNumber;
/** diff --git a/src/libvirt.c b/src/libvirt.c index 2b2f6be..c2aabf7 100644 --- a/src/libvirt.c +++ b/src/libvirt.c @@ -16373,6 +16373,28 @@ error: * into an inactive state, so transient domains require the use of one * of these two flags. * + * Reverting to any snapshot discards all configuration changes made since + * the last snapshot. Additionally, reverting to a snapshot from a running + * domain is a form of data loss, since it discards whatever is in the + * guest's RAM at the time. However, the very nature of keeping snapshots + * implies the intent to roll back state, so no additional confirmation is + * normally required for these effects. + * + * However, there are two particular situations where reverting will + * be refused by default, and where @flags must include + * VIR_DOMAIN_SNAPSHOT_REVERT_FORCE to acknowledge the risks. 1) Any + * attempt to revert to a snapshot that lacks the metadata to perform + * ABI compatibility checks (generally the case for snapshots that + * lack a full<domain> when listed by virDomainSnapshotGetXMLDesc(), + * such as those created prior to libvirt 0.9.5). 2) Any attempt to + * revert a running domain to an active state that requires starting a + * new hypervisor instance rather than reusing the existing hypervisor + * (since this would terminate all connections to the domain, such as + * such as VNC or Spice graphics) - this condition arises from active + * snapshots that are provably ABI incomaptible, as well as from + * inactive snapshots with a request to start the domain after the + * revert. + * * Returns 0 if the creation is successful, -1 on error. */ int diff --git a/src/util/virterror.c b/src/util/virterror.c index 26c4981..5006fa2 100644 --- a/src/util/virterror.c +++ b/src/util/virterror.c @@ -1204,6 +1204,12 @@ virErrorMsg(virErrorNumber error, const char *info) else errmsg = _("argument unsupported: %s"); break; + case VIR_ERR_SNAPSHOT_REVERT_RISKY: + if (info == NULL) + errmsg = _("revert requires force"); + else + errmsg = _("revert requires force: %s"); + break; } return (errmsg); } diff --git a/tools/virsh.c b/tools/virsh.c index 655378c..4e79325 100644 --- a/tools/virsh.c +++ b/tools/virsh.c @@ -13571,6 +13571,7 @@ static const vshCmdOptDef opts_snapshot_revert[] = { {"snapshotname", VSH_OT_DATA, VSH_OFLAG_REQ, N_("snapshot name")}, {"running", VSH_OT_BOOL, 0, N_("after reverting, change state to running")}, {"paused", VSH_OT_BOOL, 0, N_("after reverting, change state to paused")}, + {"force", VSH_OT_BOOL, 0, N_("try harder on risky reverts")}, {NULL, 0, 0, NULL} };
@@ -13582,11 +13583,19 @@ cmdDomainSnapshotRevert(vshControl *ctl, const vshCmd *cmd) const char *name = NULL; virDomainSnapshotPtr snapshot = NULL; unsigned int flags = 0; + bool force = false; + int result;
if (vshCommandOptBool(cmd, "running")) flags |= VIR_DOMAIN_SNAPSHOT_REVERT_RUNNING; if (vshCommandOptBool(cmd, "paused")) flags |= VIR_DOMAIN_SNAPSHOT_REVERT_PAUSED; + /* We want virsh snapshot-revert --force to work even when talking + * to older servers that did the unsafe revert by default but + * reject the flag, so we probe without the flag, and only use it + * when the error says it will make a difference. */ + if (vshCommandOptBool(cmd, "force")) + force = true;
if (!vshConnectionUsability(ctl, ctl->conn)) goto cleanup; @@ -13602,7 +13611,15 @@ cmdDomainSnapshotRevert(vshControl *ctl, const vshCmd *cmd) if (snapshot == NULL) goto cleanup;
- if (virDomainRevertToSnapshot(snapshot, flags)< 0) + result = virDomainRevertToSnapshot(snapshot, flags); + if (result< 0&& force&& + last_error->code == VIR_ERR_SNAPSHOT_REVERT_RISKY) { + flags |= VIR_DOMAIN_SNAPSHOT_REVERT_FORCE;
Are you not adding the FORCE flag the first time to allow for better interoperability with older libvirtd? That's the only question I had. It all looks fine - ACK.
+ virFreeError(last_error); + last_error = NULL; + result = virDomainRevertToSnapshot(snapshot, flags); + } + if (result< 0) goto cleanup;
ret = true; diff --git a/tools/virsh.pod b/tools/virsh.pod index 7c91d75..dd60a64 100644 --- a/tools/virsh.pod +++ b/tools/virsh.pod @@ -2002,6 +2002,7 @@ Using I<--security-info> will also include security sensitive information. Output the name of the parent snapshot for the given I<snapshot>, if any.
=item B<snapshot-revert> I<domain> I<snapshot> [{I<--running> | I<--paused>}] +[I<--force>]
Revert the given domain to the snapshot specified by I<snapshot>. Be aware that this is a destructive action; any changes in the domain since the last @@ -2017,6 +2018,22 @@ I<--running> or I<--paused> flag will perform additional state changes transient domains cannot be inactive, it is required to use one of these flags when reverting to a disk snapshot of a transient domain.
+There are two cases where a snapshot revert involves extra risk, which +requires the use of I<--force> to proceed. One is the case of a +snapshot that lacks full domain information for reverting +configuration (such as snapshots created prior to libvirt 0.9.5); +since libvirt cannot prove that the current configuration matches what +was in use at the time of the snapshot, supplying I<--force> assures +libvirt that the snapshot is compatible with the current configuration +(and if it is not, the domain will likely fail to run). The other is +the case of reverting from a running domain to an active state where a +new hypervisor has to be created rather than reusing the existing +hypervisor, because it implies drawbacks such as breaking any existing +VNC or Spice connections; this condition happens with an active +snapshot that uses a provably incompatible configuration, as well as +with an inactive snapshot that is combined with the I<--start> or +I<--pause> flag. + =item B<snapshot-delete> I<domain> I<snapshot> [I<--metadata>] [{I<--children> | I<--children-only>}]

On 10/04/2011 01:24 PM, Laine Stump wrote:
- if (virDomainRevertToSnapshot(snapshot, flags)< 0) + result = virDomainRevertToSnapshot(snapshot, flags); + if (result< 0&& force&& + last_error->code == VIR_ERR_SNAPSHOT_REVERT_RISKY) { + flags |= VIR_DOMAIN_SNAPSHOT_REVERT_FORCE;
Are you not adding the FORCE flag the first time to allow for better interoperability with older libvirtd?
Exactly. For example, libvirt 0.9.6 does not understand FORCE (and using it would error out with unrecognized flag bit), but just happens to revert to a snapshot created by 0.9.4 on the first call with flags == 0. Libvirt 0.9.7, in the same situation, will error out with the new error value when flags == 0, but with the new error, so that you know you can safely add FORCE. One other consideration is that 0.9.6 understands the new flag VIR_DOMAIN_SNAPSHOT_REVERT_RUNNING, which 0.9.4 did not understand, so blindly looking for the 'unknown flag bit' error is not unique enough to be the indication that we could remove the --force bit and retry, because that still won't resolve on an 0.9.4 server. My choice of --force handling thus seems to be the one most likely to work for 0.9.4, 0.9.6, and 0.9.7, so that the user can blindly type 'virsh snapshot-revert dom old-snapshot --force' and attempt the revert regardless of server version. Conversely, I still wanted 'virsh snapshot-revert' _without_ --force will work in as many situations as the server supports, and that if a server rejects the operation because force is required, the error message will be specific enough to mention that. This is a good thing, so that users don't have to blindly add '--force' and expose themselves to unnecessary risk; rather, they should only add --force after an explicit message says what force would solve, and deciding that the risk of forcing for that particular case is worth it.
That's the only question I had. It all looks fine - ACK.
I'll wait for the 2/2 ack before applying this, then. -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org

Implements the documentation for snapshot revert vs. force. Part of the patch tightens existing behavior (previously, reverting to an old snapshot without <domain> was blindly attempted, now it requires force), while part of it relaxes behavior (previously, it was not possible to revert an active domain to an ABI-incompatible active snapshot, now force allows this transition). * src/qemu/qemu_driver.c (qemuDomainRevertToSnapshot): Check for risky situations, and allow force to get past them. --- src/qemu/qemu_driver.c | 47 +++++++++++++++++++++++++++++++++++++---------- 1 files changed, 37 insertions(+), 10 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 5110102..efd60a7 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -9753,7 +9753,8 @@ static int qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot, * 7. paused -> inactive: EVENT_STOPPED * 8. paused -> running: EVENT_RESUMED * 9. paused -> paused: none - * Also, several transitions occur even if we fail partway through. + * Also, several transitions occur even if we fail partway through, + * and use of FORCE can cause multiple transitions. */ qemuDriverLock(driver); @@ -9789,6 +9790,24 @@ static int qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot, "yet")); goto cleanup; } + if (!(flags & VIR_DOMAIN_SNAPSHOT_REVERT_FORCE)) { + if (!snap->def->dom) { + qemuReportError(VIR_ERR_SNAPSHOT_REVERT_RISKY, + _("snapshot lacks domain '%s' rollback details"), + snap->def->name); + goto cleanup; + } + if (virDomainObjIsActive(vm) && + !(snap->def->state == VIR_DOMAIN_RUNNING + || snap->def->state == VIR_DOMAIN_PAUSED) && + (flags & (VIR_DOMAIN_SNAPSHOT_REVERT_RUNNING | + VIR_DOMAIN_SNAPSHOT_REVERT_PAUSED))) { + qemuReportError(VIR_ERR_SNAPSHOT_REVERT_RISKY, + _("must respawn qemu to start inactive snapshot")); + goto cleanup; + } + } + if (vm->current_snapshot) { vm->current_snapshot->def->current = false; @@ -9818,11 +9837,6 @@ static int qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot, VIR_FREE(xml); if (!config) goto cleanup; - } else { - /* XXX Fail if VIR_DOMAIN_REVERT_FORCE is not set, rather than - * blindly hoping for the best. */ - VIR_WARN("snapshot is lacking rollback information for domain '%s'", - snap->def->name); } if (qemuDomainObjBeginJobWithDriver(driver, vm, QEMU_JOB_MODIFY) < 0) @@ -9843,10 +9857,22 @@ static int qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot, /* Transitions 5, 6, 8, 9 */ /* Check for ABI compatibility. */ if (config && !virDomainDefCheckABIStability(vm->def, config)) { - /* XXX Add VIR_DOMAIN_REVERT_FORCE to permit killing - * and restarting a new qemu, since loadvm monitor - * command won't work. */ - goto endjob; + if (!(flags & VIR_DOMAIN_SNAPSHOT_REVERT_FORCE)) { + /* Alter existing error to give correct category. */ + virErrorPtr err = virGetLastError(); + err->code = VIR_ERR_SNAPSHOT_REVERT_RISKY; + goto endjob; + } + qemuProcessStop(driver, vm, 0, + VIR_DOMAIN_SHUTOFF_FROM_SNAPSHOT); + virDomainAuditStop(vm, "from-snapshot"); + detail = VIR_DOMAIN_EVENT_STOPPED_FROM_SNAPSHOT; + event = virDomainEventNewFromObj(vm, + VIR_DOMAIN_EVENT_STOPPED, + detail); + if (event) + qemuDomainEventQueue(driver, event); + goto load; } priv = vm->privateData; @@ -9882,6 +9908,7 @@ static int qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot, virDomainObjAssignDef(vm, config, false); } else { /* Transitions 2, 3 */ + load: was_stopped = true; if (config) virDomainObjAssignDef(vm, config, false); -- 1.7.4.4

On 09/30/2011 02:52 PM, Eric Blake wrote:
Implements the documentation for snapshot revert vs. force.
Part of the patch tightens existing behavior (previously, reverting to an old snapshot without<domain> was blindly attempted, now it requires force), while part of it relaxes behavior (previously, it was not possible to revert an active domain to an ABI-incompatible active snapshot, now force allows this transition).
* src/qemu/qemu_driver.c (qemuDomainRevertToSnapshot): Check for risky situations, and allow force to get past them. --- src/qemu/qemu_driver.c | 47 +++++++++++++++++++++++++++++++++++++---------- 1 files changed, 37 insertions(+), 10 deletions(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 5110102..efd60a7 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -9753,7 +9753,8 @@ static int qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot, * 7. paused -> inactive: EVENT_STOPPED * 8. paused -> running: EVENT_RESUMED * 9. paused -> paused: none - * Also, several transitions occur even if we fail partway through. + * Also, several transitions occur even if we fail partway through, + * and use of FORCE can cause multiple transitions. */
qemuDriverLock(driver); @@ -9789,6 +9790,24 @@ static int qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot, "yet")); goto cleanup; } + if (!(flags& VIR_DOMAIN_SNAPSHOT_REVERT_FORCE)) { + if (!snap->def->dom) { + qemuReportError(VIR_ERR_SNAPSHOT_REVERT_RISKY, + _("snapshot lacks domain '%s' rollback details"), + snap->def->name); + goto cleanup; + } + if (virDomainObjIsActive(vm)&& + !(snap->def->state == VIR_DOMAIN_RUNNING + || snap->def->state == VIR_DOMAIN_PAUSED)&& + (flags& (VIR_DOMAIN_SNAPSHOT_REVERT_RUNNING | + VIR_DOMAIN_SNAPSHOT_REVERT_PAUSED))) { + qemuReportError(VIR_ERR_SNAPSHOT_REVERT_RISKY, + _("must respawn qemu to start inactive snapshot")); + goto cleanup; + } + } +
if (vm->current_snapshot) { vm->current_snapshot->def->current = false; @@ -9818,11 +9837,6 @@ static int qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot, VIR_FREE(xml); if (!config) goto cleanup; - } else { - /* XXX Fail if VIR_DOMAIN_REVERT_FORCE is not set, rather than - * blindly hoping for the best. */ - VIR_WARN("snapshot is lacking rollback information for domain '%s'", - snap->def->name); }
if (qemuDomainObjBeginJobWithDriver(driver, vm, QEMU_JOB_MODIFY)< 0) @@ -9843,10 +9857,22 @@ static int qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot, /* Transitions 5, 6, 8, 9 */ /* Check for ABI compatibility. */ if (config&& !virDomainDefCheckABIStability(vm->def, config)) { - /* XXX Add VIR_DOMAIN_REVERT_FORCE to permit killing - * and restarting a new qemu, since loadvm monitor - * command won't work. */ - goto endjob; + if (!(flags& VIR_DOMAIN_SNAPSHOT_REVERT_FORCE)) { + /* Alter existing error to give correct category. */ + virErrorPtr err = virGetLastError(); + err->code = VIR_ERR_SNAPSHOT_REVERT_RISKY; + goto endjob; + } + qemuProcessStop(driver, vm, 0, + VIR_DOMAIN_SHUTOFF_FROM_SNAPSHOT); + virDomainAuditStop(vm, "from-snapshot"); + detail = VIR_DOMAIN_EVENT_STOPPED_FROM_SNAPSHOT; + event = virDomainEventNewFromObj(vm, + VIR_DOMAIN_EVENT_STOPPED, + detail); + if (event) + qemuDomainEventQueue(driver, event); + goto load; }
priv = vm->privateData; @@ -9882,6 +9908,7 @@ static int qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot, virDomainObjAssignDef(vm, config, false); } else { /* Transitions 2, 3 */ + load: was_stopped = true; if (config) virDomainObjAssignDef(vm, config, false);
ACK.

On 10/04/2011 01:26 PM, Laine Stump wrote:
On 09/30/2011 02:52 PM, Eric Blake wrote:
Implements the documentation for snapshot revert vs. force.
Part of the patch tightens existing behavior (previously, reverting to an old snapshot without<domain> was blindly attempted, now it requires force), while part of it relaxes behavior (previously, it was not possible to revert an active domain to an ABI-incompatible active snapshot, now force allows this transition).
* src/qemu/qemu_driver.c (qemuDomainRevertToSnapshot): Check for risky situations, and allow force to get past them.
ACK.
Before pushing this, I'm running some sanity tests. So far, this test sequence (adjusted to the fixed code) shows where force helps with older snapshots (I'll send separate email for showing how force helps active ABI-incompatible snapshots): Test 1: $ virsh snapshot-create-as dom snap # offline domain with one qcow2 disk $ virsh edit dom # add a second qcow2 disk $ virsh snapshot-revert dom snap # offline revert doesn't need force $ virsh dumpxml dom # sure enough, second disk is gone $ virsh edit dom # add a second qcow2 disk $ virsh snapshot-dumpxml dom snap > snap.xml $ virsh snapshot-delete --metadata dom snap $ sed -i '\|<domain |,\|</domain>| d' snap.xml $ virsh snapshot-create --redefine fedora_15-32 snap.xml # the delete --metadata/--redefine is necessary, so that libvirt # won't reuse <domain> from the prior definition $ virsh snapshot-revert dom snap # simulates reverting to 0.9.4 snapshot error: revert requires force: snapshot 'snap' lacks domain 'dom' rollback info $ virsh snapshot-revert dom snap --force error: internal error Child process (/usr/bin/qemu-img snapshot -a snap /path/to/second.qcow2) status unexpected: exit status 1 # See why we shouldn't have allowed blind revert? :) $ virsh edit dom # remove that second disk $ virsh snapshot-revert dom snap --force # now that we match expected state, the revert works as desired And here's what I have to squash in for test 1 to succeed as planned. diff --git i/src/qemu/qemu_driver.c w/src/qemu/qemu_driver.c index 1a171cf..07c4fd4 100644 --- i/src/qemu/qemu_driver.c +++ w/src/qemu/qemu_driver.c @@ -9649,7 +9649,8 @@ static int qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot, virDomainDefPtr config = NULL; virCheckFlags(VIR_DOMAIN_SNAPSHOT_REVERT_RUNNING | - VIR_DOMAIN_SNAPSHOT_REVERT_PAUSED, -1); + VIR_DOMAIN_SNAPSHOT_REVERT_PAUSED | + VIR_DOMAIN_SNAPSHOT_REVERT_FORCE, -1); /* We have the following transitions, which create the following events: * 1. inactive -> inactive: none @@ -9701,8 +9702,8 @@ static int qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot, if (!(flags & VIR_DOMAIN_SNAPSHOT_REVERT_FORCE)) { if (!snap->def->dom) { qemuReportError(VIR_ERR_SNAPSHOT_REVERT_RISKY, - _("snapshot lacks domain '%s' rollback details"), - snap->def->name); + _("snapshot '%s' lacks domain '%s' rollback info"), + snap->def->name, vm->def->name); goto cleanup; } if (virDomainObjIsActive(vm) && diff --git i/src/qemu/qemu_domain.c w/src/qemu/qemu_domain.c index 65f721a..dda53f3 100644 --- i/src/qemu/qemu_domain.c +++ w/src/qemu/qemu_domain.c @@ -1441,7 +1441,7 @@ qemuDomainSnapshotForEachQcow2(struct qemud_driver *driver, if (virRun(qemuimgarg, NULL) < 0) { if (try_all) { VIR_WARN("skipping snapshot action on %s", - vm->def->disks[i]->info.alias); + vm->def->disks[i]->dst); skipped = true; continue; } -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org

On 10/04/2011 04:02 PM, Eric Blake wrote:
Before pushing this, I'm running some sanity tests. So far, this test sequence (adjusted to the fixed code) shows where force helps with older snapshots (I'll send separate email for showing how force helps active ABI-incompatible snapshots):
Test 1: $ virsh snapshot-create-as dom snap # offline domain with one qcow2 disk $ virsh edit dom # add a second qcow2 disk $ virsh snapshot-revert dom snap # offline revert doesn't need force $ virsh dumpxml dom # sure enough, second disk is gone
Whoops, this part of the test didn't quite work out, either. I need to revert to the snapshot <domain> prior to determining the list of disks to iterate over, so that we avoid calling qemu-img snapshot -a on the disk image that was not part of the snapshot. Likewise, snapshot-delete should call qemu-img snapshot -d only on the disk images involved in the snapshot.
And here's what I have to squash in for test 1 to succeed as planned.
Rather than squash in the fixed qemu-img iteration unreviewed into this ACK'd patch, I'll submit it as a separate patch. -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org

On 10/04/2011 04:02 PM, Eric Blake wrote:
* src/qemu/qemu_driver.c (qemuDomainRevertToSnapshot): Check for risky situations, and allow force to get past them.
ACK.
Before pushing this, I'm running some sanity tests. So far, this test sequence (adjusted to the fixed code) shows where force helps with older snapshots (I'll send separate email for showing how force helps active ABI-incompatible snapshots):
Test 2: $ virsh define dom # domain with qcow2 disk $ virsh start dom $ virsh snapshot-create-as dom snap $ virsh shutdown dom # and wait for it to work $ virsh edit dom # and add a second disk $ virsh start dom $ virt-manager # and open a window on the guest $ virsh snapshot-revert dom snap error: unsupported configuration: Target domain disk count 1 does not match source 2 # Error was expected(*), cannot do live revert across ABI break $ virsh snapshot-revert dom snap --force # --force let things work, and virt-manager display bounced due to # need to start a new qemu process $ virsh dumpxml dom # confirm only one disk present $ virsh snapshot-revert dom snap # revert happens without complaint and without display bounce (*)That was the message shown before squashing in this patch; after squashing in this patch, the message properly mentions "force": error: revert requires force: Target domain disk count 1 does not match source 2 diff --git i/src/qemu/qemu_driver.c w/src/qemu/qemu_driver.c index 55398f6..2e6f3e4 100644 --- i/src/qemu/qemu_driver.c +++ w/src/qemu/qemu_driver.c @@ -9771,12 +9771,16 @@ static int qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot, /* Transitions 5, 6, 8, 9 */ /* Check for ABI compatibility. */ if (config && !virDomainDefCheckABIStability(vm->def, config)) { + virErrorPtr err = virGetLastError(); + if (!(flags & VIR_DOMAIN_SNAPSHOT_REVERT_FORCE)) { - /* Alter existing error to give correct category. */ - virErrorPtr err = virGetLastError(); - err->code = VIR_ERR_SNAPSHOT_REVERT_RISKY; + /* Re-spawn error using correct category. */ + if (err->code == VIR_ERR_CONFIG_UNSUPPORTED) + qemuReportError(VIR_ERR_SNAPSHOT_REVERT_RISKY, "%s", + err->str2); goto endjob; } + virResetError(err); qemuProcessStop(driver, vm, 0, VIR_DOMAIN_SHUTOFF_FROM_SNAPSHOT); virDomainAuditStop(vm, "from-snapshot"); -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org

On 10/04/2011 04:02 PM, Eric Blake wrote:
* src/qemu/qemu_driver.c (qemuDomainRevertToSnapshot): Check for risky situations, and allow force to get past them.
ACK.
Before pushing this, I'm running some sanity tests. So far, this test sequence (adjusted to the fixed code) shows where force helps with older snapshots (I'll send separate email for showing how force helps active ABI-incompatible snapshots):
Test 3: $ virsh define dom # domain with qcow2 disk $ virsh snapshot-create-as dom snap $ virsh edit dom # and add a second disk $ virsh start dom $ virsh snapshot-revert dom snap # revert succeeds and domain is offline $ virsh dumpxml dom # dom is back to one disk $ virsh snapshot-revert dom snap --running # revert succeeds, and dom boots $ virsh shutdown dom # and wait for it to work $ virsh edit dom # and add a second disk $ virsh start dom $ virt-manager # and open a window on the guest $ virsh snapshot-revert dom snap --running error: revert requires force: must respawn qemu to start inactive snapshot # Error was expected $ virsh snapshot-revert dom snap --running --force # revert succeeds, and virt-manager display bounces $ virsh dumpxml dom # dom is back to one disk And now that I've covered all three cases where I used the new VIR_ERR_SNAPSHOT_REVERT_RISKY error code in qemu_driver.c, I'm comfortable enough with my testing to go ahead and push all three ACK'd patches. -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org

Once we know which set of disks belong to a snapshot, reverting or deleting that snapshot should visit just those disks, rather than also visiting disks that were hot-plugged in the meantime or skipping disks that were hot-unplugged in the meantime. * src/qemu/qemu_domain.c (qemuDomainSnapshotForEachQcow2): Use snapshot domain details when available. Avoid NULL deref. --- Detected while actually testing patch 2/2 in the series. This fixes the issue, but is worth a separate review before I push the series. src/qemu/qemu_domain.c | 24 ++++++++++++++++-------- 1 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 65f721a..85bebd6 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -1402,6 +1402,14 @@ qemuDomainSnapshotForEachQcow2(struct qemud_driver *driver, const char *qemuimgarg[] = { NULL, "snapshot", NULL, NULL, NULL, NULL }; int i; bool skipped = false; + virDomainDefPtr def; + + /* Prefer action on the disks in use at the time the snapshot was + * created; but fall back to current definition if dealing with a + * snapshot created prior to libvirt 0.9.5. */ + def = snap->def->dom; + if (!def) + def = vm->def; qemuimgarg[0] = qemuFindQemuImgBinary(driver); if (qemuimgarg[0] == NULL) { @@ -1412,36 +1420,36 @@ qemuDomainSnapshotForEachQcow2(struct qemud_driver *driver, qemuimgarg[2] = op; qemuimgarg[3] = snap->def->name; - for (i = 0; i < vm->def->ndisks; i++) { + for (i = 0; i < def->ndisks; i++) { /* FIXME: we also need to handle LVM here */ /* FIXME: if we fail halfway through this loop, we are in an * inconsistent state. I'm not quite sure what to do about that */ - if (vm->def->disks[i]->device == VIR_DOMAIN_DISK_DEVICE_DISK) { - if (!vm->def->disks[i]->driverType || - STRNEQ(vm->def->disks[i]->driverType, "qcow2")) { + if (def->disks[i]->device == VIR_DOMAIN_DISK_DEVICE_DISK) { + if (!def->disks[i]->driverType || + STRNEQ(def->disks[i]->driverType, "qcow2")) { if (try_all) { /* Continue on even in the face of error, since other * disks in this VM may have the same snapshot name. */ VIR_WARN("skipping snapshot action on %s", - vm->def->disks[i]->dst); + def->disks[i]->dst); skipped = true; continue; } qemuReportError(VIR_ERR_OPERATION_INVALID, _("Disk device '%s' does not support" " snapshotting"), - vm->def->disks[i]->dst); + def->disks[i]->dst); return -1; } - qemuimgarg[4] = vm->def->disks[i]->src; + qemuimgarg[4] = def->disks[i]->src; if (virRun(qemuimgarg, NULL) < 0) { if (try_all) { VIR_WARN("skipping snapshot action on %s", - vm->def->disks[i]->info.alias); + def->disks[i]->dst); skipped = true; continue; } -- 1.7.4.4

On 10/04/2011 07:39 PM, Eric Blake wrote:
Once we know which set of disks belong to a snapshot, reverting or deleting that snapshot should visit just those disks, rather than also visiting disks that were hot-plugged in the meantime or skipping disks that were hot-unplugged in the meantime.
* src/qemu/qemu_domain.c (qemuDomainSnapshotForEachQcow2): Use snapshot domain details when available. Avoid NULL deref. ---
Detected while actually testing patch 2/2 in the series. This fixes the issue, but is worth a separate review before I push the series.
src/qemu/qemu_domain.c | 24 ++++++++++++++++-------- 1 files changed, 16 insertions(+), 8 deletions(-)
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 65f721a..85bebd6 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -1402,6 +1402,14 @@ qemuDomainSnapshotForEachQcow2(struct qemud_driver *driver, const char *qemuimgarg[] = { NULL, "snapshot", NULL, NULL, NULL, NULL }; int i; bool skipped = false; + virDomainDefPtr def; + + /* Prefer action on the disks in use at the time the snapshot was + * created; but fall back to current definition if dealing with a + * snapshot created prior to libvirt 0.9.5. */ + def = snap->def->dom; + if (!def) + def = vm->def;
qemuimgarg[0] = qemuFindQemuImgBinary(driver); if (qemuimgarg[0] == NULL) { @@ -1412,36 +1420,36 @@ qemuDomainSnapshotForEachQcow2(struct qemud_driver *driver, qemuimgarg[2] = op; qemuimgarg[3] = snap->def->name;
- for (i = 0; i< vm->def->ndisks; i++) { + for (i = 0; i< def->ndisks; i++) { /* FIXME: we also need to handle LVM here */ /* FIXME: if we fail halfway through this loop, we are in an * inconsistent state. I'm not quite sure what to do about that */ - if (vm->def->disks[i]->device == VIR_DOMAIN_DISK_DEVICE_DISK) { - if (!vm->def->disks[i]->driverType || - STRNEQ(vm->def->disks[i]->driverType, "qcow2")) { + if (def->disks[i]->device == VIR_DOMAIN_DISK_DEVICE_DISK) { + if (!def->disks[i]->driverType || + STRNEQ(def->disks[i]->driverType, "qcow2")) { if (try_all) { /* Continue on even in the face of error, since other * disks in this VM may have the same snapshot name. */ VIR_WARN("skipping snapshot action on %s", - vm->def->disks[i]->dst); + def->disks[i]->dst); skipped = true; continue; } qemuReportError(VIR_ERR_OPERATION_INVALID, _("Disk device '%s' does not support" " snapshotting"), - vm->def->disks[i]->dst); + def->disks[i]->dst); return -1; }
- qemuimgarg[4] = vm->def->disks[i]->src; + qemuimgarg[4] = def->disks[i]->src;
if (virRun(qemuimgarg, NULL)< 0) { if (try_all) { VIR_WARN("skipping snapshot action on %s", - vm->def->disks[i]->info.alias); + def->disks[i]->dst); skipped = true; continue; }
ACK.
participants (2)
-
Eric Blake
-
Laine Stump