On Thu, Feb 06, 2014 at 06:23:43PM +0100, Jiri Denemark wrote:
On Thu, Feb 06, 2014 at 17:33:14 +0100, Martin Kletzander wrote:
> Couple of codepaths shared the same code which can be moved out to a
> function and on one of such places, qemuMigrationConfirmPhase(), the
> domain was resumed even if it wasn't running before the migration
> started.
>
> Resolves:
https://bugzilla.redhat.com/show_bug.cgi?id=1057407
>
> Signed-off-by: Martin Kletzander <mkletzan(a)redhat.com>
> ---
> src/qemu/qemu_domain.h | 3 +-
> src/qemu/qemu_migration.c | 117 +++++++++++++++++++++++++---------------------
> 2 files changed, 66 insertions(+), 54 deletions(-)
>
> diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
> index 6a92351..84624f9 100644
> --- a/src/qemu/qemu_domain.h
> +++ b/src/qemu/qemu_domain.h
> @@ -1,7 +1,7 @@
> /*
> * qemu_domain.h: QEMU domain private state
> *
> - * Copyright (C) 2006-2013 Red Hat, Inc.
> + * Copyright (C) 2006-2014 Red Hat, Inc.
> * Copyright (C) 2006 Daniel P. Berrange
> *
> * This library is free software; you can redistribute it and/or
> @@ -161,6 +161,7 @@ struct _qemuDomainObjPrivate {
> char *origname;
> int nbdPort; /* Port used for migration with NBD */
> unsigned short migrationPort;
> + int preMigrationState;
>
> virChrdevsPtr devs;
>
> diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
> index 407fb70..1a29efa 100644
> --- a/src/qemu/qemu_migration.c
> +++ b/src/qemu/qemu_migration.c
> @@ -1075,6 +1075,53 @@ error:
> return NULL;
> }
>
> +static void
> +qemuDomainObjStorePreMigrationState(virDomainObjPtr vm)
> +{
> + qemuDomainObjPrivatePtr priv = vm->privateData;
> + priv->preMigrationState = virDomainObjGetState(vm, NULL);
> +
> + VIR_DEBUG("Storing pre-migration state=%d domain=%p",
> + priv->preMigrationState, vm);
> +}
We are in qemu_migration.c file so this function and the one below
should use qemuMigration prefix.
I've found one exception now, but I'm not basing it on that. I
changed these to qemuMigration{S,Res}toreDomainState().
> +
> +/* Returns true if the domain was resumed, false otherwise */
> +static bool
> +qemuDomainObjRestorePreMigrationState(virConnectPtr conn, virDomainObjPtr vm)
> +{
> + virQEMUDriverPtr driver = conn->privateData;
> + qemuDomainObjPrivatePtr priv = vm->privateData;
> + int state = virDomainObjGetState(vm, NULL);
> + bool ret = false;
> +
> + VIR_DEBUG("driver=%p, vm=%p, pre-mig-state=%d, state=%d",
> + driver, vm, priv->preMigrationState, state);
> +
> + if (state == VIR_DOMAIN_PAUSED &&
> + priv->preMigrationState == VIR_DOMAIN_RUNNING) {
> + /* This is basically the only restore possibility that's safe
> + * and we should attemt to do */
> +
Changed this to attempt.
> + VIR_DEBUG("Restoring pre-migration state due to
migration error");
> +
> + /* we got here through some sort of failure; start the domain again */
> + if (qemuProcessStartCPUs(driver, vm, conn,
> + VIR_DOMAIN_RUNNING_MIGRATION_CANCELED,
> + QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) {
> + /* Hm, we already know we are in error here. We don't want to
> + * overwrite the previous error, though, so we just throw something
> + * to the logs and hope for the best */
> + VIR_ERROR(_("Failed to resume guest %s after failure"),
vm->def->name);
> + goto cleanup;
> + }
> + ret = true;
> + }
> +
> + cleanup:
> + priv->preMigrationState = VIR_DOMAIN_NOSTATE;
> + return ret;
> +}
> +
> /**
> * qemuMigrationStartNBDServer:
> * @driver: qemu driver
> @@ -1952,8 +1999,8 @@ cleanup:
>
>
> /* The caller is supposed to lock the vm and start a migration job. */
> -static char
> -*qemuMigrationBeginPhase(virQEMUDriverPtr driver,
> +static char *
> +qemuMigrationBeginPhase(virQEMUDriverPtr driver,
Heh, it took me a while to see what you did :-) However, the following
lines are not indented correctly now...
It shouldn't be in this patch and there are more cleanups to do, so I
left it out of this patch and will post it with some other cleanups
soon.
> virDomainObjPtr vm,
> const char *xmlin,
> const char *dname,
> @@ -2075,6 +2122,8 @@ qemuMigrationBegin(virConnectPtr conn,
> asyncJob = QEMU_ASYNC_JOB_NONE;
> }
>
> + qemuDomainObjStorePreMigrationState(vm);
> +
> if (!virDomainObjIsActive(vm) && !(flags & VIR_MIGRATE_OFFLINE)) {
> virReportError(VIR_ERR_OPERATION_INVALID,
> "%s", _("domain is not running"));
> @@ -2744,22 +2793,12 @@ qemuMigrationConfirmPhase(virQEMUDriverPtr driver,
> /* cancel any outstanding NBD jobs */
> qemuMigrationCancelDriveMirror(mig, driver, vm);
>
> - /* run 'cont' on the destination, which allows migration on qemu
> - * >= 0.10.6 to work properly. This isn't strictly necessary on
> - * older qemu's, but it also doesn't hurt anything there
> - */
> - if (qemuProcessStartCPUs(driver, vm, conn,
> - VIR_DOMAIN_RUNNING_MIGRATED,
> - QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) {
> - if (virGetLastError() == NULL)
> - virReportError(VIR_ERR_INTERNAL_ERROR,
> - "%s", _("resume operation
failed"));
> - goto cleanup;
> + if (qemuDomainObjRestorePreMigrationState(conn, vm)) {
> + event = virDomainEventLifecycleNewFromObj(vm,
> + VIR_DOMAIN_EVENT_RESUMED,
> +
VIR_DOMAIN_EVENT_RESUMED_MIGRATED);
> }
>
> - event = virDomainEventLifecycleNewFromObj(vm,
> - VIR_DOMAIN_EVENT_RESUMED,
> - VIR_DOMAIN_EVENT_RESUMED_MIGRATED);
> if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm) < 0) {
> VIR_WARN("Failed to save status on vm %s",
vm->def->name);
> goto cleanup;
> @@ -4065,7 +4104,6 @@ qemuMigrationPerformJob(virQEMUDriverPtr driver,
> {
> virObjectEventPtr event = NULL;
> int ret = -1;
> - int resume = 0;
> virErrorPtr orig_err = NULL;
> virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
> bool abort_on_error = !!(flags & VIR_MIGRATE_ABORT_ON_ERROR);
> @@ -4085,7 +4123,7 @@ qemuMigrationPerformJob(virQEMUDriverPtr driver,
> if (!(flags & VIR_MIGRATE_UNSAFE) &&
!qemuMigrationIsSafe(vm->def))
> goto endjob;
>
> - resume = virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING;
> + qemuDomainObjStorePreMigrationState(vm);
>
> if ((flags & (VIR_MIGRATE_TUNNELLED | VIR_MIGRATE_PEER2PEER))) {
> ret = doPeer2PeerMigrate(driver, conn, vm, xmlin,
> @@ -4112,25 +4150,13 @@ qemuMigrationPerformJob(virQEMUDriverPtr driver,
> VIR_DOMAIN_EVENT_STOPPED,
> VIR_DOMAIN_EVENT_STOPPED_MIGRATED);
> }
> - resume = 0;
>
> endjob:
> if (ret < 0)
> orig_err = virSaveLastError();
>
> - if (resume && virDomainObjGetState(vm, NULL) == VIR_DOMAIN_PAUSED) {
> - /* we got here through some sort of failure; start the domain again */
> - if (qemuProcessStartCPUs(driver, vm, conn,
> - VIR_DOMAIN_RUNNING_MIGRATION_CANCELED,
> - QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) {
> - /* Hm, we already know we are in error here. We don't want to
> - * overwrite the previous error, though, so we just throw something
> - * to the logs and hope for the best
> - */
> - VIR_ERROR(_("Failed to resume guest %s after failure"),
> - vm->def->name);
> - }
> -
> + if (!v3proto &&
> + qemuDomainObjRestorePreMigrationState(conn, vm)) {
You could also call qemuDomainObjRestorePreMigrationState
unconditionally since you always reset preMigrationState which makes it
safe to call qemuDomainObjRestorePreMigrationState multiple times but it
does not really matter.
I changed it to unconditional call, I wanted to do that at first
(hence the carefulness inside that function), but rather called it
conditionally because I didn't want to explain that :)
> event = virDomainEventLifecycleNewFromObj(vm,
> VIR_DOMAIN_EVENT_RESUMED,
> VIR_DOMAIN_EVENT_RESUMED_MIGRATED);
> @@ -4179,7 +4205,6 @@ qemuMigrationPerformPhase(virQEMUDriverPtr driver,
> {
> virObjectEventPtr event = NULL;
> int ret = -1;
> - bool resume;
> bool hasrefs;
>
> /* If we didn't start the job in the begin phase, start it now. */
> @@ -4194,32 +4219,18 @@ qemuMigrationPerformPhase(virQEMUDriverPtr driver,
> virCloseCallbacksUnset(driver->closeCallbacks, vm,
> qemuMigrationCleanup);
>
> - resume = virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING;
> ret = doNativeMigrate(driver, vm, uri, cookiein, cookieinlen,
> cookieout, cookieoutlen,
> flags, resource, NULL, graphicsuri);
>
> - if (ret < 0 && resume &&
> - virDomainObjGetState(vm, NULL) == VIR_DOMAIN_PAUSED) {
> - /* we got here through some sort of failure; start the domain again */
> - if (qemuProcessStartCPUs(driver, vm, conn,
> - VIR_DOMAIN_RUNNING_MIGRATION_CANCELED,
> - QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) {
> - /* Hm, we already know we are in error here. We don't want to
> - * overwrite the previous error, though, so we just throw something
> - * to the logs and hope for the best
> - */
> - VIR_ERROR(_("Failed to resume guest %s after failure"),
> - vm->def->name);
> + if (ret < 0) {
> + if (qemuDomainObjRestorePreMigrationState(conn, vm)) {
> + event = virDomainEventLifecycleNewFromObj(vm,
> + VIR_DOMAIN_EVENT_RESUMED,
> +
VIR_DOMAIN_EVENT_RESUMED_MIGRATED);
> }
> -
> - event = virDomainEventLifecycleNewFromObj(vm,
> - VIR_DOMAIN_EVENT_RESUMED,
> - VIR_DOMAIN_EVENT_RESUMED_MIGRATED);
> - }
> -
> - if (ret < 0)
> goto endjob;
> + }
>
> qemuMigrationJobSetPhase(driver, vm, QEMU_MIGRATION_PHASE_PERFORM3_DONE);
>
ACK with the nits addressed.
Pushed with the nits fixed.
Jirka
Martin