[libvirt] [PATCH V6] support offline migration

original migration did not aware of offline case so, add code to support offline migration quietly (did not disturb original migration) by pass VIR_MIGRATE_OFFLINE flag to migration APIs if the domain is really inactive, and migration process will not puzzeled by domain offline and exit unexpectly. these changes did not take care of disk images the domain required, for disk images could be transfered by other APIs as suggested. so, the migration result is just make domain definition alive at target side. Signed-off-by: liguang <lig.fnst@cn.fujitsu.com> --- include/libvirt/libvirt.h.in | 1 + src/qemu/qemu_driver.c | 8 ++++++ src/qemu/qemu_migration.c | 55 ++++++++++++++++++++++++++++++++++++----- src/qemu/qemu_migration.h | 3 +- tools/virsh-domain.c | 6 ++++ 5 files changed, 65 insertions(+), 8 deletions(-) diff --git a/include/libvirt/libvirt.h.in b/include/libvirt/libvirt.h.in index cfe5047..77df2ab 100644 --- a/include/libvirt/libvirt.h.in +++ b/include/libvirt/libvirt.h.in @@ -995,6 +995,7 @@ typedef enum { * whole migration process; this will be used automatically * when supported */ VIR_MIGRATE_UNSAFE = (1 << 9), /* force migration even if it is considered unsafe */ + VIR_MIGRATE_OFFLINE = (1 << 10), /* offline migrate */ } virDomainMigrateFlags; /* Domain migration. */ diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index b12d9bc..e70a5cc 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -9641,6 +9641,8 @@ qemuDomainMigrateBegin3(virDomainPtr domain, } if (!virDomainObjIsActive(vm)) { + if (flags & VIR_MIGRATE_OFFLINE) + goto offline; virReportError(VIR_ERR_OPERATION_INVALID, "%s", _("domain is not running")); goto endjob; @@ -9653,6 +9655,7 @@ qemuDomainMigrateBegin3(virDomainPtr domain, if (qemuDomainCheckEjectableMedia(driver, vm, asyncJob) < 0) goto endjob; +offline: if (!(xml = qemuMigrationBegin(driver, vm, xmlin, dname, cookieout, cookieoutlen, flags))) @@ -9888,6 +9891,11 @@ qemuDomainMigrateConfirm3(virDomainPtr domain, goto cleanup; } + if (flags & VIR_MIGRATE_OFFLINE) { + ret = 0; + goto cleanup; + } + if (!qemuMigrationJobIsActive(vm, QEMU_ASYNC_JOB_MIGRATION_OUT)) goto cleanup; diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 1b21ef6..a54a26e 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -70,6 +70,7 @@ enum qemuMigrationCookieFlags { QEMU_MIGRATION_COOKIE_FLAG_GRAPHICS, QEMU_MIGRATION_COOKIE_FLAG_LOCKSTATE, QEMU_MIGRATION_COOKIE_FLAG_PERSISTENT, + QEMU_MIGRATION_COOKIE_FLAG_OFFLINE, QEMU_MIGRATION_COOKIE_FLAG_LAST }; @@ -77,12 +78,13 @@ enum qemuMigrationCookieFlags { VIR_ENUM_DECL(qemuMigrationCookieFlag); VIR_ENUM_IMPL(qemuMigrationCookieFlag, QEMU_MIGRATION_COOKIE_FLAG_LAST, - "graphics", "lockstate", "persistent"); + "graphics", "lockstate", "persistent", "offline"); enum qemuMigrationCookieFeatures { QEMU_MIGRATION_COOKIE_GRAPHICS = (1 << QEMU_MIGRATION_COOKIE_FLAG_GRAPHICS), QEMU_MIGRATION_COOKIE_LOCKSTATE = (1 << QEMU_MIGRATION_COOKIE_FLAG_LOCKSTATE), QEMU_MIGRATION_COOKIE_PERSISTENT = (1 << QEMU_MIGRATION_COOKIE_FLAG_PERSISTENT), + QEMU_MIGRATION_COOKIE_OFFLINE = (1 << QEMU_MIGRATION_COOKIE_FLAG_OFFLINE), }; typedef struct _qemuMigrationCookieGraphics qemuMigrationCookieGraphics; @@ -439,6 +441,11 @@ qemuMigrationCookieXMLFormat(struct qemud_driver *driver, virBufferAdjustIndent(buf, -2); } + if (mig->flags & QEMU_MIGRATION_COOKIE_OFFLINE) { + virBufferAsprintf(buf, " <offline>\n"); + virBufferAddLit(buf, " </offline>\n"); + } + virBufferAddLit(buf, "</qemu-migration>\n"); return 0; } @@ -662,6 +669,12 @@ qemuMigrationCookieXMLParse(qemuMigrationCookiePtr mig, VIR_FREE(nodes); } + if ((flags & QEMU_MIGRATION_COOKIE_OFFLINE)) { + if (virXPathBoolean("count(./offline) > 0", ctxt)) { + mig->flags |= QEMU_MIGRATION_COOKIE_OFFLINE; + } + } + return 0; error: @@ -721,6 +734,10 @@ qemuMigrationBakeCookie(qemuMigrationCookiePtr mig, qemuMigrationCookieAddPersistent(mig, dom) < 0) return -1; + if (flags & QEMU_MIGRATION_COOKIE_OFFLINE) { + mig->flags |= QEMU_MIGRATION_COOKIE_OFFLINE; + } + if (!(*cookieout = qemuMigrationCookieXMLFormatStr(driver, mig))) return -1; @@ -1151,6 +1168,13 @@ char *qemuMigrationBegin(struct qemud_driver *driver, QEMU_MIGRATION_COOKIE_LOCKSTATE) < 0) goto cleanup; + if (flags & VIR_MIGRATE_OFFLINE) { + if (qemuMigrationBakeCookie(mig, driver, vm, + cookieout, cookieoutlen, + QEMU_MIGRATION_COOKIE_OFFLINE) < 0) + goto cleanup; + } + if (xmlin) { if (!(def = virDomainDefParseString(driver->caps, xmlin, QEMU_EXPECTED_VIRT_TYPES, @@ -1314,6 +1338,15 @@ qemuMigrationPrepareAny(struct qemud_driver *driver, goto endjob; } + if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, + QEMU_MIGRATION_COOKIE_OFFLINE))) + return ret; + + if (mig->flags & QEMU_MIGRATION_COOKIE_OFFLINE) { + ret = 0; + goto cleanup; + } + /* Start the QEMU daemon, with the same command-line arguments plus * -incoming $migrateFrom */ @@ -1856,7 +1889,8 @@ qemuMigrationRun(struct qemud_driver *driver, virLockManagerPluginGetName(driver->lockManager)); return -1; } - + if (flags & VIR_MIGRATE_OFFLINE) + return 0; if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, QEMU_MIGRATION_COOKIE_GRAPHICS))) goto cleanup; @@ -2372,6 +2406,8 @@ static int doPeer2PeerMigrate3(struct qemud_driver *driver, qemuDomainObjExitRemoteWithDriver(driver, vm); } VIR_FREE(dom_xml); + if (flags & VIR_MIGRATE_OFFLINE) + goto cleanup; if (ret == -1) goto cleanup; @@ -2477,7 +2513,7 @@ finish: vm->def->name); cleanup: - if (ddomain) { + if (ddomain || (flags & VIR_MIGRATE_OFFLINE)) { virObjectUnref(ddomain); ret = 0; } else { @@ -2554,7 +2590,7 @@ static int doPeer2PeerMigrate(struct qemud_driver *driver, } /* domain may have been stopped while we were talking to remote daemon */ - if (!virDomainObjIsActive(vm)) { + if (!virDomainObjIsActive(vm) && !(flags & VIR_MIGRATE_OFFLINE)) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("guest unexpectedly quit")); goto cleanup; @@ -2617,7 +2653,7 @@ qemuMigrationPerformJob(struct qemud_driver *driver, if (qemuMigrationJobStart(driver, vm, QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) goto cleanup; - if (!virDomainObjIsActive(vm)) { + if (!virDomainObjIsActive(vm) && !(flags & VIR_MIGRATE_OFFLINE)) { virReportError(VIR_ERR_OPERATION_INVALID, "%s", _("domain is not running")); goto endjob; @@ -2941,6 +2977,8 @@ qemuMigrationFinish(struct qemud_driver *driver, */ if (retcode == 0) { if (!virDomainObjIsActive(vm)) { + if (flags & VIR_MIGRATE_OFFLINE) + goto offline; virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("guest unexpectedly quit")); goto endjob; @@ -3038,7 +3076,7 @@ qemuMigrationFinish(struct qemud_driver *driver, goto endjob; } } - + offline: dom = virGetDomain (dconn, vm->def->name, vm->def->uuid); event = virDomainEventNewFromObj(vm, @@ -3120,7 +3158,10 @@ int qemuMigrationConfirm(struct qemud_driver *driver, if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, 0))) return -1; - + if (flags & VIR_MIGRATE_OFFLINE) { + rv = 0; + goto cleanup; + } /* Did the migration go as planned? If yes, kill off the * domain object, but if no, resume CPUs */ diff --git a/src/qemu/qemu_migration.h b/src/qemu/qemu_migration.h index 1740204..2bcaea0 100644 --- a/src/qemu/qemu_migration.h +++ b/src/qemu/qemu_migration.h @@ -36,7 +36,8 @@ VIR_MIGRATE_NON_SHARED_DISK | \ VIR_MIGRATE_NON_SHARED_INC | \ VIR_MIGRATE_CHANGE_PROTECTION | \ - VIR_MIGRATE_UNSAFE) + VIR_MIGRATE_UNSAFE | \ + VIR_MIGRATE_OFFLINE) enum qemuMigrationJobPhase { QEMU_MIGRATION_PHASE_NONE = 0, diff --git a/tools/virsh-domain.c b/tools/virsh-domain.c index 4684466..ec25043 100644 --- a/tools/virsh-domain.c +++ b/tools/virsh-domain.c @@ -6525,6 +6525,7 @@ static const vshCmdOptDef opts_migrate[] = { {"dname", VSH_OT_DATA, 0, N_("rename to new name during migration (if supported)")}, {"timeout", VSH_OT_INT, 0, N_("force guest to suspend if live migration exceeds timeout (in seconds)")}, {"xml", VSH_OT_STRING, 0, N_("filename containing updated XML for the target")}, + {"offline", VSH_OT_BOOL, 0, N_("for offline migration")}, {NULL, 0, 0, NULL} }; @@ -6591,6 +6592,11 @@ doMigrate(void *opaque) if (vshCommandOptBool(cmd, "unsafe")) flags |= VIR_MIGRATE_UNSAFE; + if (vshCommandOptBool(cmd, "offline")) { + if (!virDomainIsActive(dom)) + flags |= VIR_MIGRATE_OFFLINE; + } + if (xmlfile && virFileReadAll(xmlfile, 8192, &xml) < 0) { vshError(ctl, _("file '%s' doesn't exist"), xmlfile); -- 1.7.2.5

On 09/10/2012 08:20 PM, liguang wrote:
original migration did not aware of offline case so, add code to support offline migration quietly (did not disturb original migration) by pass VIR_MIGRATE_OFFLINE flag to migration APIs if the domain is really inactive, and migration process will not puzzeled by domain
s/puzzeled/puzzled/
offline and exit unexpectly.
s/unexpectly/unexpectedly/
these changes did not take care of disk images the domain required, for disk images could be transfered
s/transfered/transferred/
by other APIs as suggested. so, the migration result is just make domain definition alive at target side.
Signed-off-by: liguang <lig.fnst@cn.fujitsu.com> --- include/libvirt/libvirt.h.in | 1 + src/qemu/qemu_driver.c | 8 ++++++ src/qemu/qemu_migration.c | 55 ++++++++++++++++++++++++++++++++++++----- src/qemu/qemu_migration.h | 3 +- tools/virsh-domain.c | 6 ++++ 5 files changed, 65 insertions(+), 8 deletions(-)
diff --git a/include/libvirt/libvirt.h.in b/include/libvirt/libvirt.h.in index cfe5047..77df2ab 100644 --- a/include/libvirt/libvirt.h.in +++ b/include/libvirt/libvirt.h.in @@ -995,6 +995,7 @@ typedef enum { * whole migration process; this will be used automatically * when supported */ VIR_MIGRATE_UNSAFE = (1 << 9), /* force migration even if it is considered unsafe */ + VIR_MIGRATE_OFFLINE = (1 << 10), /* offline migrate */
Do we really need a new user-visible flag, or can we make this work automatically without having to involve the user? Making this work transparently might involve adding a new driver feature bit (see qemu_driver.c:qemudSupportsFeature for how we made v3 migration transparent). On the other hand, what happens if we do keep this as a user-visible flag? Should 'virsh migrate --offline' silently ignore the flag if the guest is online, or should it error out stating that the guest is running and not offline? Also, I think we NEED to error out if the guest is offline but the --persistent flag is not set; that is, an offline migration only makes sense if the persistent flag has been requested, but I think that 'virsh migrate --persistent' should automatically be smart enough to do an offline migration. You either need to take care of migrating storage if the user does 'virsh migrate [whatever-we-decide-for-offline] --copy-storage-*', or else explicitly reject attempts to migrate storage in parallel with an offline migration.
@@ -439,6 +441,11 @@ qemuMigrationCookieXMLFormat(struct qemud_driver *driver, virBufferAdjustIndent(buf, -2); }
+ if (mig->flags & QEMU_MIGRATION_COOKIE_OFFLINE) { + virBufferAsprintf(buf, " <offline>\n"); + virBufferAddLit(buf, " </offline>\n");
Collapse these two lines into the simpler one-liner: virBufferAddLit(buf, " <offline/>\n"); -- Eric Blake eblake@redhat.com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

On Tue, Sep 11, 2012 at 08:30:08AM -0600, Eric Blake wrote:
On 09/10/2012 08:20 PM, liguang wrote:
original migration did not aware of offline case so, add code to support offline migration quietly (did not disturb original migration) by pass VIR_MIGRATE_OFFLINE flag to migration APIs if the domain is really inactive, and migration process will not puzzeled by domain
s/puzzeled/puzzled/
offline and exit unexpectly.
s/unexpectly/unexpectedly/
these changes did not take care of disk images the domain required, for disk images could be transfered
s/transfered/transferred/
by other APIs as suggested. so, the migration result is just make domain definition alive at target side.
Signed-off-by: liguang <lig.fnst@cn.fujitsu.com> --- include/libvirt/libvirt.h.in | 1 + src/qemu/qemu_driver.c | 8 ++++++ src/qemu/qemu_migration.c | 55 ++++++++++++++++++++++++++++++++++++----- src/qemu/qemu_migration.h | 3 +- tools/virsh-domain.c | 6 ++++ 5 files changed, 65 insertions(+), 8 deletions(-)
diff --git a/include/libvirt/libvirt.h.in b/include/libvirt/libvirt.h.in index cfe5047..77df2ab 100644 --- a/include/libvirt/libvirt.h.in +++ b/include/libvirt/libvirt.h.in @@ -995,6 +995,7 @@ typedef enum { * whole migration process; this will be used automatically * when supported */ VIR_MIGRATE_UNSAFE = (1 << 9), /* force migration even if it is considered unsafe */ + VIR_MIGRATE_OFFLINE = (1 << 10), /* offline migrate */
Do we really need a new user-visible flag, or can we make this work automatically without having to involve the user? On the other hand, what happens if we do keep this as a user-visible flag? Should 'virsh migrate --offline' silently ignore the flag if the guest is online, or should it error out stating that the guest is running and not offline?
Also, I think we NEED to error out if the guest is offline but the --persistent flag is not set; that is, an offline migration only makes sense if the persistent flag has been requested, but I think that 'virsh migrate --persistent' should automatically be smart enough to do an offline migration.
No we must not do that. If a guest has shutoff we cannot assume that the user / app wants to copy it across to the other host. eg consider this scenario admin a: check if guestfoo is running admin b: check if guestfoo is running admin a: migrate guestfoo barhost admin b: migrate guestfoo wizzhost IMHO step 4 should fail unless the admin explicitly requested that they want to copy across the offline config
You either need to take care of migrating storage if the user does 'virsh migrate [whatever-we-decide-for-offline] --copy-storage-*', or else explicitly reject attempts to migrate storage in parallel with an offline migration.
Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On 09/11/2012 08:35 AM, Daniel P. Berrange wrote:
Do we really need a new user-visible flag, or can we make this work automatically without having to involve the user? On the other hand, what happens if we do keep this as a user-visible flag? Should 'virsh migrate --offline' silently ignore the flag if the guest is online, or should it error out stating that the guest is running and not offline?
Also, I think we NEED to error out if the guest is offline but the --persistent flag is not set; that is, an offline migration only makes sense if the persistent flag has been requested, but I think that 'virsh migrate --persistent' should automatically be smart enough to do an offline migration.
No we must not do that. If a guest has shutoff we cannot assume that the user / app wants to copy it across to the other host. eg consider this scenario
admin a: check if guestfoo is running admin b: check if guestfoo is running admin a: migrate guestfoo barhost admin b: migrate guestfoo wizzhost
IMHO step 4 should fail unless the admin explicitly requested that they want to copy across the offline config
Good point - the new flag is necessary, and must be user-visible. At which point, do we argue that use of the MIGRATE_OFFLINE flag automatically implies MIGRATE_PERSISTENT, or should it be an error unless the user explicitly requests both flags? -- Eric Blake eblake@redhat.com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

On Tue, Sep 11, 2012 at 09:33:32AM -0600, Eric Blake wrote:
On 09/11/2012 08:35 AM, Daniel P. Berrange wrote:
Do we really need a new user-visible flag, or can we make this work automatically without having to involve the user? On the other hand, what happens if we do keep this as a user-visible flag? Should 'virsh migrate --offline' silently ignore the flag if the guest is online, or should it error out stating that the guest is running and not offline?
Also, I think we NEED to error out if the guest is offline but the --persistent flag is not set; that is, an offline migration only makes sense if the persistent flag has been requested, but I think that 'virsh migrate --persistent' should automatically be smart enough to do an offline migration.
No we must not do that. If a guest has shutoff we cannot assume that the user / app wants to copy it across to the other host. eg consider this scenario
admin a: check if guestfoo is running admin b: check if guestfoo is running admin a: migrate guestfoo barhost admin b: migrate guestfoo wizzhost
IMHO step 4 should fail unless the admin explicitly requested that they want to copy across the offline config
Good point - the new flag is necessary, and must be user-visible. At which point, do we argue that use of the MIGRATE_OFFLINE flag automatically implies MIGRATE_PERSISTENT, or should it be an error unless the user explicitly requests both flags?
It sort of depends what you consider the semantics of MIGRATE_OFFLINE to be. You can consider it to be a flag indicating that the guest must be in the shutoff state curently, or you can consider it to be a permission flag to indicate that migration is allowed if the guest is offline. The difference here is subtle - basically comes down to whether you raise an error if OFFLINE is set, and the guest is currently running. In the case where you consider it a permission flag (which is what is currently implemented), then we must mandate use of the PERSISTENT flag, otherwise you get wierd semantics. eg The persistent config would not be copied if running, but would be copied if shutoff. If you consider it a state flag, then I don't think it matters so much, though I would still prefer to see the MIGRATE_PERSISTENT flag specified explicitly. It would let you deal with a case where you have a shutoff guest, and you migrate the storage (using the appropriate flag) but do not migrate the config. Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

在 2012-09-11二的 18:00 +0100,Daniel P. Berrange写道:
Good point - the new flag is necessary, and must be user-visible. At which point, do we argue that use of the MIGRATE_OFFLINE flag automatically implies MIGRATE_PERSISTENT, or should it be an error unless the user explicitly requests both flags?
It sort of depends what you consider the semantics of MIGRATE_OFFLINE to be. You can consider it to be a flag indicating that the guest must be in the shutoff state curently, or you can consider it to be a permission flag to indicate that migration is allowed if the guest is offline. The difference here is subtle - basically comes down to whether you raise an error if OFFLINE is set, and the guest is currently running.
In the case where you consider it a permission flag (which is what is currently implemented), then we must mandate use of the PERSISTENT flag, otherwise you get wierd semantics. eg The persistent config would not be copied if running, but would be copied if shutoff.
If you consider it a state flag, then I don't think it matters so much, though I would still prefer to see the MIGRATE_PERSISTENT flag specified explicitly. It would let you deal with a case where you have a shutoff guest, and you migrate the storage (using the appropriate flag) but do not migrate the config.
Yes, implicit flags really lead to misunderstanding.
Daniel
-- liguang lig.fnst@cn.fujitsu.com FNST linux kernel team

在 2012-09-11二的 08:30 -0600,Eric Blake写道:
You either need to take care of migrating storage if the user does 'virsh migrate [whatever-we-decide-for-offline] --copy-storage-*', or else explicitly reject attempts to migrate storage in parallel with an offline migration.
right, I'll reject --copy-storage-* and error out.
@@ -439,6 +441,11 @@ qemuMigrationCookieXMLFormat(struct qemud_driver *driver, virBufferAdjustIndent(buf, -2); }
+ if (mig->flags & QEMU_MIGRATION_COOKIE_OFFLINE) { + virBufferAsprintf(buf, " <offline>\n"); + virBufferAddLit(buf, " </offline>\n");
Collapse these two lines into the simpler one-liner:
virBufferAddLit(buf, " <offline/>\n");
OK, I'll collapse them into one. Thanks! -- liguang lig.fnst@cn.fujitsu.com FNST linux kernel team

On Tue, Sep 11, 2012 at 10:20:19AM +0800, liguang wrote:
original migration did not aware of offline case so, add code to support offline migration quietly (did not disturb original migration) by pass VIR_MIGRATE_OFFLINE flag to migration APIs if the domain is really inactive, and migration process will not puzzeled by domain offline and exit unexpectly. these changes did not take care of disk images the domain required, for disk images could be transfered by other APIs as suggested. so, the migration result is just make domain definition alive at target side.
I'm not entirely happy with the semantics wrt to the PERSIST_DEST flag - this is basically doing persistence without having the flag specified. Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|
participants (3)
-
Daniel P. Berrange
-
Eric Blake
-
liguang