[libvirt] [PATCH 0/3] Add some new features into migration

This patch set adds three new features into migration: 1. Cancel migration if user presses Ctrl-C when migration is in progress 2. show migration's progress 3. auto-suspend the guest OS at timeout, where the migration will complete offline Hu Tao (1): Cancel migration if user presses Ctrl-C when migration is in progress Wen Congyang (2): show mig progress force guest suspend at timeout tools/virsh.c | 211 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- tools/virsh.pod | 7 ++- 2 files changed, 206 insertions(+), 12 deletions(-)

From: Hu Tao <hutao@cn.fujitsu.com> Date: Tue, 11 Jan 2011 15:01:24 +0800 Subject: [PATCH 1/3] Cancel migration if user presses Ctrl-C when migration is in progress While migration is in progress and virsh is waiting for its completion, user may want to terminate the progress by pressing Ctrl-C. But virsh just exits on user's Ctrl-C leaving migration in background that user isn't even aware of. It's not reasonable. This patch changes the behaviour for migration. For other commands Ctrl-C still terminates virsh itself. --- tools/virsh.c | 127 ++++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 files changed, 116 insertions(+), 11 deletions(-) diff --git a/tools/virsh.c b/tools/virsh.c index aba7945..aba4fd1 100644 --- a/tools/virsh.c +++ b/tools/virsh.c @@ -31,6 +31,7 @@ #include <sys/stat.h> #include <inttypes.h> #include <signal.h> +#include <poll.h> #include <libxml/parser.h> #include <libxml/tree.h> @@ -54,6 +55,7 @@ #include "files.h" #include "../daemon/event.h" #include "configmake.h" +#include "threads.h" static char *progname; @@ -492,6 +494,15 @@ out: last_error = NULL; } +static bool intCatched = FALSE; + +static void vshCatchInt(int sig ATTRIBUTE_UNUSED, + siginfo_t *siginfo ATTRIBUTE_UNUSED, + void *context ATTRIBUTE_UNUSED) +{ + intCatched = TRUE; +} + /* * Detection of disconnections and automatic reconnection support */ @@ -3409,24 +3420,38 @@ static const vshCmdOptDef opts_migrate[] = { {NULL, 0, 0, NULL} }; -static int -cmdMigrate (vshControl *ctl, const vshCmd *cmd) +static void +doMigrate (void *opaque) { + char ret = '1'; virDomainPtr dom = NULL; const char *desturi; const char *migrateuri; const char *dname; - int flags = 0, found, ret = FALSE; + int flags = 0, found; + sigset_t sigmask, oldsigmask; + struct { + vshControl *ctl; + vshCmd *cmd; + int writefd; + } *data = opaque; + vshControl *ctl = data->ctl; + vshCmd *cmd = data->cmd; + + sigemptyset(&sigmask); + sigaddset(&sigmask, SIGINT); + if (pthread_sigmask(SIG_BLOCK, &sigmask, &oldsigmask) < 0) + goto out_sig; if (!vshConnectionUsability (ctl, ctl->conn)) - return FALSE; + goto out; if (!(dom = vshCommandOptDomain (ctl, cmd, NULL))) - return FALSE; + goto out; desturi = vshCommandOptString (cmd, "desturi", &found); if (!found) - goto done; + goto out; migrateuri = vshCommandOptString (cmd, "migrateuri", NULL); @@ -3460,29 +3485,109 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) if (migrateuri != NULL) { vshError(ctl, "%s", _("migrate: Unexpected migrateuri for peer2peer/direct migration")); - goto done; + goto out; } if (virDomainMigrateToURI (dom, desturi, flags, dname, 0) == 0) - ret = TRUE; + ret = '0'; } else { /* For traditional live migration, connect to the destination host directly. */ virConnectPtr dconn = NULL; virDomainPtr ddom = NULL; dconn = virConnectOpenAuth (desturi, virConnectAuthPtrDefault, 0); - if (!dconn) goto done; + if (!dconn) goto out; ddom = virDomainMigrate (dom, dconn, flags, dname, migrateuri, 0); if (ddom) { virDomainFree(ddom); - ret = TRUE; + ret = '0'; } virConnectClose (dconn); } - done: +out: + pthread_sigmask(SIG_BLOCK, &oldsigmask, NULL); +out_sig: if (dom) virDomainFree (dom); + ignore_value(safewrite(data->writefd, &ret, sizeof(ret))); +} + +static int +cmdMigrate (vshControl *ctl, const vshCmd *cmd) +{ + virDomainPtr dom = NULL; + int p[2] = {-1, -1}; + int ret = -1; + virThread workerThread; + struct pollfd pollfd; + char retchar; + struct sigaction sig_action; + struct sigaction old_sig_action; + + struct { + vshControl *ctl; + const vshCmd *cmd; + int writefd; + } data; + + if (!(dom = vshCommandOptDomain (ctl, cmd, NULL))) + return FALSE; + + if (pipe(p) < 0) + goto cleanup; + + data.ctl = ctl; + data.cmd = cmd; + data.writefd = p[1]; + + if (virThreadCreate(&workerThread, + true, + doMigrate, + &data) < 0) + goto cleanup; + + intCatched = FALSE; + sig_action.sa_sigaction = vshCatchInt; + sig_action.sa_flags = SA_SIGINFO; + sigemptyset(&sig_action.sa_mask); + sigaction(SIGINT, &sig_action, &old_sig_action); + + pollfd.fd = p[0]; + pollfd.events = POLLIN; + pollfd.revents = 0; + + ret = poll(&pollfd, 1, -1); + if (ret > 0) { + if (saferead(p[0], &retchar, sizeof(retchar)) > 0) { + if (retchar == '0') + ret = TRUE; + else + ret = FALSE; + } else + ret = FALSE; + } else if (ret < 0) { + if (errno == EINTR && intCatched) { + virDomainAbortJob(dom); + ret = FALSE; + intCatched = FALSE; + } + } else { + /* timed out */ + ret = FALSE; + } + + sigaction(SIGINT, &old_sig_action, NULL); + + virThreadJoin(&workerThread); + +cleanup: + if (dom) + virDomainFree(dom); + if (p[0] != -1) { + close(p[0]); + close(p[1]); + } return ret; } -- 1.7.1

On 01/23/2011 07:20 PM, Wen Congyang wrote:
From: Hu Tao <hutao@cn.fujitsu.com> Date: Tue, 11 Jan 2011 15:01:24 +0800 Subject: [PATCH 1/3] Cancel migration if user presses Ctrl-C when migration is in progress
While migration is in progress and virsh is waiting for its completion, user may want to terminate the progress by pressing Ctrl-C. But virsh just exits on user's Ctrl-C leaving migration in background that user isn't even aware of. It's not reasonable.
This patch changes the behaviour for migration. For other commands Ctrl-C still terminates virsh itself.
+static bool intCatched = FALSE;
s/Catched/Caught/g Don't mix FALSE and bool (either int and FALSE, or bool and false; with the caveat that int and TRUE/FALSE are historical baggage in this file and we are gradually trying to convert them over to bool and true/false).
+ +static void vshCatchInt(int sig ATTRIBUTE_UNUSED, + siginfo_t *siginfo ATTRIBUTE_UNUSED, + void *context ATTRIBUTE_UNUSED) +{ + intCatched = TRUE;
Furthermore, POSIX states that variables used to communicate between signal handlers and other threads should be [volatile] sig_atomic_t, rather than bool (why? because on some platforms where bool is a byte and smaller than a machine word, it may result in a read-modify-write cycle that could interfere with neighboring bytes, when contrasted with a machine word write of using a sig_atomic_t). So use of bool for this one variable is non-portable to begin with.
-static int -cmdMigrate (vshControl *ctl, const vshCmd *cmd) +static void +doMigrate (void *opaque) { + char ret = '1'; virDomainPtr dom = NULL; const char *desturi; const char *migrateuri; const char *dname; - int flags = 0, found, ret = FALSE; + int flags = 0, found; + sigset_t sigmask, oldsigmask; + struct { + vshControl *ctl; + vshCmd *cmd; + int writefd; + } *data = opaque;
Rather than declaring the struct inline, let's typedef it in advance, so that both caller and consumer share the same struct declaration (that way, if we ever have to add to the struct, we only have to do it in one place).
+ vshControl *ctl = data->ctl; + vshCmd *cmd = data->cmd; + + sigemptyset(&sigmask); + sigaddset(&sigmask, SIGINT); + if (pthread_sigmask(SIG_BLOCK, &sigmask, &oldsigmask) < 0) + goto out_sig;
Mingw lacks pthread_sigmask, and gnulib doesn't have it (yet). This may cause some compilation portability problems to mingw, but I can help with that (and it can be a followup patch - I won't make it a condition for getting this much-needed improvement in).
+static int +cmdMigrate (vshControl *ctl, const vshCmd *cmd) +{
+ + if (!(dom = vshCommandOptDomain (ctl, cmd, NULL)))
Style - new code should use vshCommandOptDomain(ctl, without a space.
+ + intCatched = FALSE; + sig_action.sa_sigaction = vshCatchInt; + sig_action.sa_flags = SA_SIGINFO; + sigemptyset(&sig_action.sa_mask); + sigaction(SIGINT, &sig_action, &old_sig_action); + + pollfd.fd = p[0]; + pollfd.events = POLLIN; + pollfd.revents = 0; + + ret = poll(&pollfd, 1, -1); + if (ret > 0) { + if (saferead(p[0], &retchar, sizeof(retchar)) > 0) { + if (retchar == '0') + ret = TRUE; + else + ret = FALSE; + } else + ret = FALSE; + } else if (ret < 0) { + if (errno == EINTR && intCatched) { + virDomainAbortJob(dom); + ret = FALSE; + intCatched = FALSE; + }
Don't you need a retry loop here, if you get EINTR but an interrupt was not marked as caught?
+ } else { + /* timed out */ + ret = FALSE; + } + + sigaction(SIGINT, &old_sig_action, NULL); + + virThreadJoin(&workerThread); + +cleanup: + if (dom) + virDomainFree(dom); + if (p[0] != -1) { + close(p[0]); + close(p[1]);
'make syntax-check' should have caught these violations (if it didn't, then that's something that we should fix). This should be: cleanup: virDomainFree(dom); VIR_FORCE_CLOSE(p[0]); VIR_FORCE_CLOSE(p[1]); which avoids issues with double-close, and which avoids useless if statements on free-like functions. -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org

At 01/25/2011 08:37 AM, Eric Blake Write:
On 01/23/2011 07:20 PM, Wen Congyang wrote:
From: Hu Tao <hutao@cn.fujitsu.com> Date: Tue, 11 Jan 2011 15:01:24 +0800 Subject: [PATCH 1/3] Cancel migration if user presses Ctrl-C when migration is in progress
While migration is in progress and virsh is waiting for its completion, user may want to terminate the progress by pressing Ctrl-C. But virsh just exits on user's Ctrl-C leaving migration in background that user isn't even aware of. It's not reasonable.
This patch changes the behaviour for migration. For other commands Ctrl-C still terminates virsh itself.
+static bool intCatched = FALSE;
s/Catched/Caught/g
Don't mix FALSE and bool (either int and FALSE, or bool and false; with the caveat that int and TRUE/FALSE are historical baggage in this file and we are gradually trying to convert them over to bool and true/false).
+ +static void vshCatchInt(int sig ATTRIBUTE_UNUSED, + siginfo_t *siginfo ATTRIBUTE_UNUSED, + void *context ATTRIBUTE_UNUSED) +{ + intCatched = TRUE;
Furthermore, POSIX states that variables used to communicate between signal handlers and other threads should be [volatile] sig_atomic_t, rather than bool (why? because on some platforms where bool is a byte and smaller than a machine word, it may result in a read-modify-write cycle that could interfere with neighboring bytes, when contrasted with a machine word write of using a sig_atomic_t). So use of bool for this one variable is non-portable to begin with.
-static int -cmdMigrate (vshControl *ctl, const vshCmd *cmd) +static void +doMigrate (void *opaque) { + char ret = '1'; virDomainPtr dom = NULL; const char *desturi; const char *migrateuri; const char *dname; - int flags = 0, found, ret = FALSE; + int flags = 0, found; + sigset_t sigmask, oldsigmask; + struct { + vshControl *ctl; + vshCmd *cmd; + int writefd; + } *data = opaque;
Rather than declaring the struct inline, let's typedef it in advance, so that both caller and consumer share the same struct declaration (that way, if we ever have to add to the struct, we only have to do it in one place).
+ vshControl *ctl = data->ctl; + vshCmd *cmd = data->cmd; + + sigemptyset(&sigmask); + sigaddset(&sigmask, SIGINT); + if (pthread_sigmask(SIG_BLOCK, &sigmask, &oldsigmask) < 0) + goto out_sig;
Mingw lacks pthread_sigmask, and gnulib doesn't have it (yet). This may cause some compilation portability problems to mingw, but I can help with that (and it can be a followup patch - I won't make it a condition for getting this much-needed improvement in).
+static int +cmdMigrate (vshControl *ctl, const vshCmd *cmd) +{
+ + if (!(dom = vshCommandOptDomain (ctl, cmd, NULL)))
Style - new code should use vshCommandOptDomain(ctl, without a space.
+ + intCatched = FALSE; + sig_action.sa_sigaction = vshCatchInt; + sig_action.sa_flags = SA_SIGINFO; + sigemptyset(&sig_action.sa_mask); + sigaction(SIGINT, &sig_action, &old_sig_action); + + pollfd.fd = p[0]; + pollfd.events = POLLIN; + pollfd.revents = 0; + + ret = poll(&pollfd, 1, -1); + if (ret > 0) { + if (saferead(p[0], &retchar, sizeof(retchar)) > 0) { + if (retchar == '0') + ret = TRUE; + else + ret = FALSE; + } else + ret = FALSE; + } else if (ret < 0) { + if (errno == EINTR && intCatched) { + virDomainAbortJob(dom); + ret = FALSE; + intCatched = FALSE; + }
Don't you need a retry loop here, if you get EINTR but an interrupt was not marked as caught?
+ } else { + /* timed out */ + ret = FALSE; + } + + sigaction(SIGINT, &old_sig_action, NULL); + + virThreadJoin(&workerThread); + +cleanup: + if (dom) + virDomainFree(dom); + if (p[0] != -1) { + close(p[0]); + close(p[1]);
'make syntax-check' should have caught these violations (if it didn't, then that's something that we should fix). This should be:
unfortunately, 'make syntax-check' does not catch these violations.
cleanup: virDomainFree(dom); VIR_FORCE_CLOSE(p[0]); VIR_FORCE_CLOSE(p[1]);
which avoids issues with double-close, and which avoids useless if statements on free-like functions.

On 01/25/2011 12:05 AM, Wen Congyang wrote:
+cleanup:
+ if (dom) + virDomainFree(dom);
My bad for thinking this was a violation of the useless-if-before-free rule; virDomainFree is _not_ free-like: it returns an int, and issues an error if domain is NULL.
+ if (p[0] != -1) { + close(p[0]); + close(p[1]);
'make syntax-check' should have caught these violations (if it didn't, then that's something that we should fix). This should be: unfortunately, 'make syntax-check' does not catch these violations.
Oops. Stupid typo on my part, back in commit f1fe9671. Full patch coming up, once I fix the remaining violations that the typo fix caught. Oh, and I should probably add a rule to prohibit popen while I'm at it (virCommand is better, but I thought of it because pclose is problematic). diff --git i/cfg.mk w/cfg.mk index 066fa3d..10eaab2 100644 --- i/cfg.mk +++ w/cfg.mk @@ -241,7 +241,7 @@ sc_avoid_write: # Avoid functions that can lead to double-close bugs. sc_prohibit_close: - @prohibit='\<[f]close *\(' \ + @prohibit='\<[f]?close *\(' \ halt='use VIR_{FORCE_}[F]CLOSE instead of [f]close' \ $(_sc_search_regexp) @prohibit='\<fdopen *\(' \ -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org

Signed-off-by: Wen Congyang <wency@cn.fujitsu.com> --- tools/virsh.c | 78 +++++++++++++++++++++++++++++++++++++++++++----------- tools/virsh.pod | 4 ++- 2 files changed, 65 insertions(+), 17 deletions(-) diff --git a/tools/virsh.c b/tools/virsh.c index aba4fd1..ee8d769 100644 --- a/tools/virsh.c +++ b/tools/virsh.c @@ -3413,6 +3413,7 @@ static const vshCmdOptDef opts_migrate[] = { {"suspend", VSH_OT_BOOL, 0, N_("do not restart the domain on the destination host")}, {"copy-storage-all", VSH_OT_BOOL, 0, N_("migration with non-shared storage with full disk copy")}, {"copy-storage-inc", VSH_OT_BOOL, 0, N_("migration with non-shared storage with incremental copy (same base image shared between source and destination)")}, + {"verbose", VSH_OT_BOOL, 0, N_("display the progress of migration")}, {"domain", VSH_OT_DATA, VSH_OFLAG_REQ, N_("domain name, id or uuid")}, {"desturi", VSH_OT_DATA, VSH_OFLAG_REQ, N_("connection URI of the destination host as seen from the client(normal migration) or source(p2p migration)")}, {"migrateuri", VSH_OT_DATA, 0, N_("migration URI, usually can be omitted")}, @@ -3513,6 +3514,28 @@ out_sig: ignore_value(safewrite(data->writefd, &ret, sizeof(ret))); } +static void +print_job_progress(unsigned long long remaining, unsigned long long total) +{ + int progress; + + if (total == 0) + /* migration has not been started */ + return; + + if (remaining == 0) { + /* migration has completed */ + progress = 100; + } else if (remaining * 100 / total == 0) { + /* migration has not completed, do not print [100 %] */ + progress = 99; + } else { + progress = 100 - remaining * 100 / total; + } + + fprintf(stderr, "\rMigration: [%3d %%]", progress); +} + static int cmdMigrate (vshControl *ctl, const vshCmd *cmd) { @@ -3524,6 +3547,9 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) char retchar; struct sigaction sig_action; struct sigaction old_sig_action; + virDomainJobInfo jobinfo; + bool verbose = FALSE; + sigset_t sigmask, oldsigmask; struct { vshControl *ctl; @@ -3534,6 +3560,9 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) if (!(dom = vshCommandOptDomain (ctl, cmd, NULL))) return FALSE; + if (vshCommandOptBool (cmd, "verbose")) + verbose = TRUE; + if (pipe(p) < 0) goto cleanup; @@ -3556,25 +3585,42 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) pollfd.fd = p[0]; pollfd.events = POLLIN; pollfd.revents = 0; + sigemptyset(&sigmask); + sigaddset(&sigmask, SIGINT); - ret = poll(&pollfd, 1, -1); - if (ret > 0) { - if (saferead(p[0], &retchar, sizeof(retchar)) > 0) { - if (retchar == '0') - ret = TRUE; - else + while (1) { + ret = poll(&pollfd, 1, 500); + if (ret > 0) { + if (saferead(p[0], &retchar, sizeof(retchar)) > 0) { + if (retchar == '0') { + ret = TRUE; + if (verbose) { + /* print [100 %] */ + print_job_progress(0, 1); + } + } else + ret = FALSE; + } else ret = FALSE; - } else - ret = FALSE; - } else if (ret < 0) { - if (errno == EINTR && intCatched) { - virDomainAbortJob(dom); - ret = FALSE; - intCatched = FALSE; + break; + } + + if (ret < 0) { + if (errno == EINTR && intCatched) { + virDomainAbortJob(dom); + ret = FALSE; + intCatched = FALSE; + break; + } + } + + if (verbose) { + pthread_sigmask(SIG_BLOCK, &sigmask, &oldsigmask); + ret = virDomainGetJobInfo(dom, &jobinfo); + pthread_sigmask(SIG_BLOCK, &oldsigmask, NULL); + if (ret == 0) + print_job_progress(jobinfo.dataRemaining, jobinfo.dataTotal); } - } else { - /* timed out */ - ret = FALSE; } sigaction(SIGINT, &old_sig_action, NULL); diff --git a/tools/virsh.pod b/tools/virsh.pod index 1f15fef..ce7e2e7 100644 --- a/tools/virsh.pod +++ b/tools/virsh.pod @@ -489,7 +489,8 @@ type attribute for the <domain> element of XML. =item B<migrate> optional I<--live> I<--p2p> I<--direct> I<--tunnelled> I<--persistent> I<--undefinesource> I<--suspend> I<--copy-storage-all> -I<--copy-storage-inc> I<domain-id> I<desturi> I<migrateuri> I<dname> +I<--copy-storage-inc> I<--verbose> I<domain-id> I<desturi> I<migrateuri> +I<dname> Migrate domain to another host. Add I<--live> for live migration; I<--p2p> for peer-2-peer migration; I<--direct> for direct migration; or I<--tunnelled> @@ -499,6 +500,7 @@ and I<--suspend> leaves the domain paused on the destination host. I<--copy-storage-all> indicates migration with non-shared storage with full disk copy, I<--copy-storage-inc> indicates migration with non-shared storage with incremental copy (same base image shared between source and destination). +I<--verbose> displays the progress of migration. The I<desturi> is the connection URI of the destination host, and I<migrateuri> is the migration URI, which usually can be omitted. -- 1.7.1

On 01/23/2011 07:20 PM, Wen Congyang wrote: s/mig/migration/ in the subject line, so that the log is a bit more legible
+static void +print_job_progress(unsigned long long remaining, unsigned long long total) +{ + int progress; + + if (total == 0) + /* migration has not been started */ + return; + + if (remaining == 0) { + /* migration has completed */ + progress = 100; + } else if (remaining * 100 / total == 0) {
Is overflow a risk here, or will total never exceed UINT64_MAX/100?
+ /* migration has not completed, do not print [100 %] */ + progress = 99; + } else { + progress = 100 - remaining * 100 / total; + } + + fprintf(stderr, "\rMigration: [%3d %%]", progress);
Are we guaranteed that this function always gets called with a 100% total as the last thing to be printed? Or if you have a really fast migration, can the thread that calls this leave output stuck at 99% because things completed between the last output and when the monitor thread goes away?
+} + static int cmdMigrate (vshControl *ctl, const vshCmd *cmd) { @@ -3524,6 +3547,9 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) char retchar; struct sigaction sig_action; struct sigaction old_sig_action; + virDomainJobInfo jobinfo; + bool verbose = FALSE;
s/FALSE/false/
+ sigset_t sigmask, oldsigmask;
struct { vshControl *ctl; @@ -3534,6 +3560,9 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) if (!(dom = vshCommandOptDomain (ctl, cmd, NULL))) return FALSE;
+ if (vshCommandOptBool (cmd, "verbose")) + verbose = TRUE;
s/TRUE/true/
+ while (1) {
+ if (ret < 0) { + if (errno == EINTR && intCatched) {
s/intCatched/intCaught/ from patch 1, and I'm still not sure that you are properly handling EINTR when your flag reports that no SIGINT was caught.
+++ b/tools/virsh.pod @@ -489,7 +489,8 @@ type attribute for the <domain> element of XML.
=item B<migrate> optional I<--live> I<--p2p> I<--direct> I<--tunnelled> I<--persistent> I<--undefinesource> I<--suspend> I<--copy-storage-all> -I<--copy-storage-inc> I<domain-id> I<desturi> I<migrateuri> I<dname> +I<--copy-storage-inc> I<--verbose> I<domain-id> I<desturi> I<migrateuri> +I<dname>
Thanks for the doc update! Too many patches forget that. -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org

If the memory of guest OS is changed constantly, the live migration can not be ended ever for ever. We can use the command 'virsh migrate-setmaxdowntime' to control the live migration. But the value of maxdowntime is diffcult to calculate because it depends on the transfer speed of network and constantly changing memroy size. We need a easy way to control the live migration. This patch adds the support of forcing guest to suspend at timeout. With this patch, when we migrate the guest OS, we can specify a timeout. If the live migration timeouts, auto-suspend the guest OS, where the migration will complete offline. Signed-off-by: Wen Congyang <wency@cn.fujitsu.com> --- tools/virsh.c | 38 ++++++++++++++++++++++++++++++++++++++ tools/virsh.pod | 5 ++++- 2 files changed, 42 insertions(+), 1 deletions(-) diff --git a/tools/virsh.c b/tools/virsh.c index ee8d769..ddce47e 100644 --- a/tools/virsh.c +++ b/tools/virsh.c @@ -3418,6 +3418,7 @@ static const vshCmdOptDef opts_migrate[] = { {"desturi", VSH_OT_DATA, VSH_OFLAG_REQ, N_("connection URI of the destination host as seen from the client(normal migration) or source(p2p migration)")}, {"migrateuri", VSH_OT_DATA, 0, N_("migration URI, usually can be omitted")}, {"dname", VSH_OT_DATA, 0, N_("rename to new name during migration (if supported)")}, + {"timeout", VSH_OT_INT, 0, N_("force guest to suspend if live migration exceeds timeout (in seconds)")}, {NULL, 0, 0, NULL} }; @@ -3544,12 +3545,16 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) int ret = -1; virThread workerThread; struct pollfd pollfd; + int found; char retchar; struct sigaction sig_action; struct sigaction old_sig_action; virDomainJobInfo jobinfo; bool verbose = FALSE; sigset_t sigmask, oldsigmask; + int timeout; + struct timeval start, curr; + bool live_flag = FALSE; struct { vshControl *ctl; @@ -3563,6 +3568,29 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) if (vshCommandOptBool (cmd, "verbose")) verbose = TRUE; + if (vshCommandOptBool (cmd, "live")) + live_flag = TRUE; + timeout = vshCommandOptInt(cmd, "timeout", &found); + if (found) { + if (! live_flag) { + vshError(ctl, "%s", _("migrate: Unexpected timeout for offline migration")); + goto cleanup; + } + + if (timeout < 1) { + vshError(ctl, "%s", _("migrate: Invalid timeout")); + goto cleanup; + } + + /* Ensure that we can multiply by 1000 without overflowing. */ + if (timeout > INT_MAX / 1000) { + vshError(ctl, "%s", _("migrate: Timeout is too big")); + goto cleanup; + } + } else { + timeout = 0; + } + if (pipe(p) < 0) goto cleanup; @@ -3588,6 +3616,7 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) sigemptyset(&sigmask); sigaddset(&sigmask, SIGINT); + GETTIMEOFDAY(&start); while (1) { ret = poll(&pollfd, 1, 500); if (ret > 0) { @@ -3614,6 +3643,15 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) } } + GETTIMEOFDAY(&curr); + if ( timeout && ((int)(curr.tv_sec - start.tv_sec) * 1000 + \ + (int)(curr.tv_usec - start.tv_usec) / 1000) > timeout * 1000 ) { + /* suspend the domain when migration timeouts. */ + vshDebug(ctl, 5, "suspend the domain when migration timeouts\n"); + virDomainSuspend(dom); + timeout = 0; + } + if (verbose) { pthread_sigmask(SIG_BLOCK, &sigmask, &oldsigmask); ret = virDomainGetJobInfo(dom, &jobinfo); diff --git a/tools/virsh.pod b/tools/virsh.pod index ce7e2e7..1c9a3d1 100644 --- a/tools/virsh.pod +++ b/tools/virsh.pod @@ -490,7 +490,7 @@ type attribute for the <domain> element of XML. =item B<migrate> optional I<--live> I<--p2p> I<--direct> I<--tunnelled> I<--persistent> I<--undefinesource> I<--suspend> I<--copy-storage-all> I<--copy-storage-inc> I<--verbose> I<domain-id> I<desturi> I<migrateuri> -I<dname> +I<dname> I<--timeout> Migrate domain to another host. Add I<--live> for live migration; I<--p2p> for peer-2-peer migration; I<--direct> for direct migration; or I<--tunnelled> @@ -507,6 +507,9 @@ I<migrateuri> is the migration URI, which usually can be omitted. I<dname> is used for renaming the domain to new name during migration, which also usually can be omitted. +I<--timeout> forces guest to suspend when live migration exceeds timeout, and +then the migration will complete offline. It can only be used with I<--live>. + B<Note>: The I<desturi> parameter for normal migration and peer2peer migration has different semantics: -- 1.7.1

Hi Eric, This updated series addresses problems you pointed out. Thanks for review. This patch set adds three new features into migration: 1. Cancel migration if user presses Ctrl-C when migration is in progress 2. show migration's progress 3. auto-suspend the guest OS at timeout, where the migration will complete offline Hu Tao (1): Cancel migration if user presses Ctrl-C when migration is in progress Wen Congyang (1): Show migration progress. Force guest suspend at timeout tools/virsh.c | 212 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- tools/virsh.pod | 7 ++- 2 files changed, 207 insertions(+), 12 deletions(-) -- 1.7.3.1

While migration is in progress and virsh is waiting for its completion, user may want to terminate the progress by pressing Ctrl-C. But virsh just exits on user's Ctrl-C leaving migration in background that user isn't even aware of. It's not reasonable. This patch changes the behaviour for migration. For other commands Ctrl-C still terminates virsh itself. --- tools/virsh.c | 126 ++++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 files changed, 115 insertions(+), 11 deletions(-) diff --git a/tools/virsh.c b/tools/virsh.c index aba7945..a9d9110 100644 --- a/tools/virsh.c +++ b/tools/virsh.c @@ -31,6 +31,7 @@ #include <sys/stat.h> #include <inttypes.h> #include <signal.h> +#include <poll.h> #include <libxml/parser.h> #include <libxml/tree.h> @@ -54,6 +55,7 @@ #include "files.h" #include "../daemon/event.h" #include "configmake.h" +#include "threads.h" static char *progname; @@ -492,6 +494,15 @@ out: last_error = NULL; } +static volatile sig_atomic_t intCaught = 0; + +static void vshCatchInt(int sig ATTRIBUTE_UNUSED, + siginfo_t *siginfo ATTRIBUTE_UNUSED, + void *context ATTRIBUTE_UNUSED) +{ + intCaught = 1; +} + /* * Detection of disconnections and automatic reconnection support */ @@ -3409,24 +3420,40 @@ static const vshCmdOptDef opts_migrate[] = { {NULL, 0, 0, NULL} }; -static int -cmdMigrate (vshControl *ctl, const vshCmd *cmd) +typedef struct __vshCtrlData { + vshControl *ctl; + const vshCmd *cmd; + int writefd; +} vshCtrlData; + +static void +doMigrate (void *opaque) { + char ret = '1'; virDomainPtr dom = NULL; const char *desturi; const char *migrateuri; const char *dname; - int flags = 0, found, ret = FALSE; + int flags = 0, found; + sigset_t sigmask, oldsigmask; + vshCtrlData *data = opaque; + vshControl *ctl = data->ctl; + const vshCmd *cmd = data->cmd; + + sigemptyset(&sigmask); + sigaddset(&sigmask, SIGINT); + if (pthread_sigmask(SIG_BLOCK, &sigmask, &oldsigmask) < 0) + goto out_sig; if (!vshConnectionUsability (ctl, ctl->conn)) - return FALSE; + goto out; if (!(dom = vshCommandOptDomain (ctl, cmd, NULL))) - return FALSE; + goto out; desturi = vshCommandOptString (cmd, "desturi", &found); if (!found) - goto done; + goto out; migrateuri = vshCommandOptString (cmd, "migrateuri", NULL); @@ -3460,29 +3487,106 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) if (migrateuri != NULL) { vshError(ctl, "%s", _("migrate: Unexpected migrateuri for peer2peer/direct migration")); - goto done; + goto out; } if (virDomainMigrateToURI (dom, desturi, flags, dname, 0) == 0) - ret = TRUE; + ret = '0'; } else { /* For traditional live migration, connect to the destination host directly. */ virConnectPtr dconn = NULL; virDomainPtr ddom = NULL; dconn = virConnectOpenAuth (desturi, virConnectAuthPtrDefault, 0); - if (!dconn) goto done; + if (!dconn) goto out; ddom = virDomainMigrate (dom, dconn, flags, dname, migrateuri, 0); if (ddom) { virDomainFree(ddom); - ret = TRUE; + ret = '0'; } virConnectClose (dconn); } - done: +out: + pthread_sigmask(SIG_SETMASK, &oldsigmask, NULL); +out_sig: if (dom) virDomainFree (dom); + ignore_value(safewrite(data->writefd, &ret, sizeof(ret))); +} + +static int +cmdMigrate (vshControl *ctl, const vshCmd *cmd) +{ + virDomainPtr dom = NULL; + int p[2] = {-1, -1}; + int ret = -1; + virThread workerThread; + struct pollfd pollfd; + char retchar; + struct sigaction sig_action; + struct sigaction old_sig_action; + + vshCtrlData data; + + if (!(dom = vshCommandOptDomain(ctl, cmd, NULL))) + return FALSE; + + if (pipe(p) < 0) + goto cleanup; + + data.ctl = ctl; + data.cmd = cmd; + data.writefd = p[1]; + + if (virThreadCreate(&workerThread, + true, + doMigrate, + &data) < 0) + goto cleanup; + + intCaught = 0; + sig_action.sa_sigaction = vshCatchInt; + sig_action.sa_flags = SA_SIGINFO; + sigemptyset(&sig_action.sa_mask); + sigaction(SIGINT, &sig_action, &old_sig_action); + + pollfd.fd = p[0]; + pollfd.events = POLLIN; + pollfd.revents = 0; + +repoll: + ret = poll(&pollfd, 1, -1); + if (ret > 0) { + if (saferead(p[0], &retchar, sizeof(retchar)) > 0) { + if (retchar == '0') + ret = TRUE; + else + ret = FALSE; + } else + ret = FALSE; + } else if (ret < 0) { + if (errno == EINTR) { + if (intCaught) { + virDomainAbortJob(dom); + ret = FALSE; + intCaught = 0; + } else + goto repoll; + } + } else { + /* timed out */ + ret = FALSE; + } + + sigaction(SIGINT, &old_sig_action, NULL); + + virThreadJoin(&workerThread); + +cleanup: + virDomainFree(dom); + VIR_FORCE_CLOSE(p[0]); + VIR_FORCE_CLOSE(p[1]); return ret; } -- 1.7.3.1 -- Thanks, Hu Tao

from: Wen Congyang <wency@cn.fujitsu.com> If the memory of guest OS is changed constantly, the live migration can not be ended ever for ever. We can use the command 'virsh migrate-setmaxdowntime' to control the live migration. But the value of maxdowntime is diffcult to calculate because it depends on the transfer speed of network and constantly changing memroy size. We need a easy way to control the live migration. This patch adds the support of forcing guest to suspend at timeout. With this patch, when we migrate the guest OS, we can specify a timeout. If the live migration timeouts, auto-suspend the guest OS, where the migration will complete offline. --- tools/virsh.c | 38 ++++++++++++++++++++++++++++++++++++++ tools/virsh.pod | 5 ++++- 2 files changed, 42 insertions(+), 1 deletions(-) diff --git a/tools/virsh.c b/tools/virsh.c index 356d8d5..82159c0 100644 --- a/tools/virsh.c +++ b/tools/virsh.c @@ -3418,6 +3418,7 @@ static const vshCmdOptDef opts_migrate[] = { {"desturi", VSH_OT_DATA, VSH_OFLAG_REQ, N_("connection URI of the destination host as seen from the client(normal migration) or source(p2p migration)")}, {"migrateuri", VSH_OT_DATA, 0, N_("migration URI, usually can be omitted")}, {"dname", VSH_OT_DATA, 0, N_("rename to new name during migration (if supported)")}, + {"timeout", VSH_OT_INT, 0, N_("force guest to suspend if live migration exceeds timeout (in seconds)")}, {NULL, 0, 0, NULL} }; @@ -3548,12 +3549,16 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) int ret = -1; virThread workerThread; struct pollfd pollfd; + int found; char retchar; struct sigaction sig_action; struct sigaction old_sig_action; virDomainJobInfo jobinfo; bool verbose = false; sigset_t sigmask, oldsigmask; + int timeout; + struct timeval start, curr; + bool live_flag = false; vshCtrlData data; @@ -3563,6 +3568,29 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) if (vshCommandOptBool (cmd, "verbose")) verbose = true; + if (vshCommandOptBool (cmd, "live")) + live_flag = TRUE; + timeout = vshCommandOptInt(cmd, "timeout", &found); + if (found) { + if (! live_flag) { + vshError(ctl, "%s", _("migrate: Unexpected timeout for offline migration")); + goto cleanup; + } + + if (timeout < 1) { + vshError(ctl, "%s", _("migrate: Invalid timeout")); + goto cleanup; + } + + /* Ensure that we can multiply by 1000 without overflowing. */ + if (timeout > INT_MAX / 1000) { + vshError(ctl, "%s", _("migrate: Timeout is too big")); + goto cleanup; + } + } else { + timeout = 0; + } + if (pipe(p) < 0) goto cleanup; @@ -3588,6 +3616,7 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) sigemptyset(&sigmask); sigaddset(&sigmask, SIGINT); + GETTIMEOFDAY(&start); while (1) { repoll: ret = poll(&pollfd, 1, 500); @@ -3618,6 +3647,15 @@ repoll: break; } + GETTIMEOFDAY(&curr); + if ( timeout && ((int)(curr.tv_sec - start.tv_sec) * 1000 + \ + (int)(curr.tv_usec - start.tv_usec) / 1000) > timeout * 1000 ) { + /* suspend the domain when migration timeouts. */ + vshDebug(ctl, 5, "suspend the domain when migration timeouts\n"); + virDomainSuspend(dom); + timeout = 0; + } + if (verbose) { pthread_sigmask(SIG_BLOCK, &sigmask, &oldsigmask); ret = virDomainGetJobInfo(dom, &jobinfo); diff --git a/tools/virsh.pod b/tools/virsh.pod index ce7e2e7..1c9a3d1 100644 --- a/tools/virsh.pod +++ b/tools/virsh.pod @@ -490,7 +490,7 @@ type attribute for the <domain> element of XML. =item B<migrate> optional I<--live> I<--p2p> I<--direct> I<--tunnelled> I<--persistent> I<--undefinesource> I<--suspend> I<--copy-storage-all> I<--copy-storage-inc> I<--verbose> I<domain-id> I<desturi> I<migrateuri> -I<dname> +I<dname> I<--timeout> Migrate domain to another host. Add I<--live> for live migration; I<--p2p> for peer-2-peer migration; I<--direct> for direct migration; or I<--tunnelled> @@ -507,6 +507,9 @@ I<migrateuri> is the migration URI, which usually can be omitted. I<dname> is used for renaming the domain to new name during migration, which also usually can be omitted. +I<--timeout> forces guest to suspend when live migration exceeds timeout, and +then the migration will complete offline. It can only be used with I<--live>. + B<Note>: The I<desturi> parameter for normal migration and peer2peer migration has different semantics: -- 1.7.3.1 -- Thanks, Hu Tao

from: Wen Congyang <wency@cn.fujitsu.com> Show migration progress if `migrate --verbose'. --- tools/virsh.c | 86 ++++++++++++++++++++++++++++++++++++++++++------------ tools/virsh.pod | 4 ++- 2 files changed, 70 insertions(+), 20 deletions(-) diff --git a/tools/virsh.c b/tools/virsh.c index a9d9110..356d8d5 100644 --- a/tools/virsh.c +++ b/tools/virsh.c @@ -3413,6 +3413,7 @@ static const vshCmdOptDef opts_migrate[] = { {"suspend", VSH_OT_BOOL, 0, N_("do not restart the domain on the destination host")}, {"copy-storage-all", VSH_OT_BOOL, 0, N_("migration with non-shared storage with full disk copy")}, {"copy-storage-inc", VSH_OT_BOOL, 0, N_("migration with non-shared storage with incremental copy (same base image shared between source and destination)")}, + {"verbose", VSH_OT_BOOL, 0, N_("display the progress of migration")}, {"domain", VSH_OT_DATA, VSH_OFLAG_REQ, N_("domain name, id or uuid")}, {"desturi", VSH_OT_DATA, VSH_OFLAG_REQ, N_("connection URI of the destination host as seen from the client(normal migration) or source(p2p migration)")}, {"migrateuri", VSH_OT_DATA, 0, N_("migration URI, usually can be omitted")}, @@ -3515,6 +3516,30 @@ out_sig: ignore_value(safewrite(data->writefd, &ret, sizeof(ret))); } +static void +print_job_progress(unsigned long long remaining, unsigned long long total) +{ + int progress; + + if (total == 0) + /* migration has not been started */ + return; + + if (remaining == 0) { + /* migration has completed */ + progress = 100; + } else { + /* use float to avoid overflow */ + progress = (int)(100.0 - remaining * 100.0 / total); + if (progress >= 100) { + /* migration has not completed, do not print [100 %] */ + progress = 99; + } + } + + fprintf(stderr, "\rMigration: [%3d %%]", progress); +} + static int cmdMigrate (vshControl *ctl, const vshCmd *cmd) { @@ -3526,12 +3551,18 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) char retchar; struct sigaction sig_action; struct sigaction old_sig_action; + virDomainJobInfo jobinfo; + bool verbose = false; + sigset_t sigmask, oldsigmask; vshCtrlData data; if (!(dom = vshCommandOptDomain(ctl, cmd, NULL))) return FALSE; + if (vshCommandOptBool (cmd, "verbose")) + verbose = true; + if (pipe(p) < 0) goto cleanup; @@ -3554,29 +3585,46 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) pollfd.fd = p[0]; pollfd.events = POLLIN; pollfd.revents = 0; + sigemptyset(&sigmask); + sigaddset(&sigmask, SIGINT); + while (1) { repoll: - ret = poll(&pollfd, 1, -1); - if (ret > 0) { - if (saferead(p[0], &retchar, sizeof(retchar)) > 0) { - if (retchar == '0') - ret = TRUE; - else - ret = FALSE; - } else - ret = FALSE; - } else if (ret < 0) { - if (errno == EINTR) { - if (intCaught) { - virDomainAbortJob(dom); - ret = FALSE; - intCaught = 0; + ret = poll(&pollfd, 1, 500); + if (ret > 0) { + if (saferead(p[0], &retchar, sizeof(retchar)) > 0) { + if (retchar == '0') { + ret = TRUE; + if (verbose) { + /* print [100 %] */ + print_job_progress(0, 1); + } + } else + ret = FALSE; } else - goto repoll; + ret = FALSE; + break; + } + + if (ret < 0) { + if (errno == EINTR) { + if (intCaught) { + virDomainAbortJob(dom); + ret = FALSE; + intCaught = 0; + } else + goto repoll; + } + break; + } + + if (verbose) { + pthread_sigmask(SIG_BLOCK, &sigmask, &oldsigmask); + ret = virDomainGetJobInfo(dom, &jobinfo); + pthread_sigmask(SIG_SETMASK, &oldsigmask, NULL); + if (ret == 0) + print_job_progress(jobinfo.dataRemaining, jobinfo.dataTotal); } - } else { - /* timed out */ - ret = FALSE; } sigaction(SIGINT, &old_sig_action, NULL); diff --git a/tools/virsh.pod b/tools/virsh.pod index 1f15fef..ce7e2e7 100644 --- a/tools/virsh.pod +++ b/tools/virsh.pod @@ -489,7 +489,8 @@ type attribute for the <domain> element of XML. =item B<migrate> optional I<--live> I<--p2p> I<--direct> I<--tunnelled> I<--persistent> I<--undefinesource> I<--suspend> I<--copy-storage-all> -I<--copy-storage-inc> I<domain-id> I<desturi> I<migrateuri> I<dname> +I<--copy-storage-inc> I<--verbose> I<domain-id> I<desturi> I<migrateuri> +I<dname> Migrate domain to another host. Add I<--live> for live migration; I<--p2p> for peer-2-peer migration; I<--direct> for direct migration; or I<--tunnelled> @@ -499,6 +500,7 @@ and I<--suspend> leaves the domain paused on the destination host. I<--copy-storage-all> indicates migration with non-shared storage with full disk copy, I<--copy-storage-inc> indicates migration with non-shared storage with incremental copy (same base image shared between source and destination). +I<--verbose> displays the progress of migration. The I<desturi> is the connection URI of the destination host, and I<migrateuri> is the migration URI, which usually can be omitted. -- 1.7.3.1 -- Thanks, Hu Tao

On Tue, Jan 25, 2011 at 06:14:11PM +0800, Hu Tao wrote:
Hi Eric,
This updated series addresses problems you pointed out. Thanks for review.
This patch set adds three new features into migration: 1. Cancel migration if user presses Ctrl-C when migration is in progress 2. show migration's progress 3. auto-suspend the guest OS at timeout, where the migration will complete offline
Hu Tao (1): Cancel migration if user presses Ctrl-C when migration is in progress
Wen Congyang (1): Show migration progress. Force guest suspend at timeout
tools/virsh.c | 212 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- tools/virsh.pod | 7 ++- 2 files changed, 207 insertions(+), 12 deletions(-)
ACK to whole series Daniel

On 01/26/2011 05:30 AM, Daniel P. Berrange wrote:
This patch set adds three new features into migration: 1. Cancel migration if user presses Ctrl-C when migration is in progress 2. show migration's progress 3. auto-suspend the guest OS at timeout, where the migration will complete offline
Hu Tao (1): Cancel migration if user presses Ctrl-C when migration is in progress
Wen Congyang (1): Show migration progress. Force guest suspend at timeout
tools/virsh.c | 212 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- tools/virsh.pod | 7 ++- 2 files changed, 207 insertions(+), 12 deletions(-)
ACK to whole series
Now pushed. And thanks to both of you for persisting through our repeated reviews - it's taken a while and several false starts, but the end result is nice! -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org
participants (4)
-
Daniel P. Berrange
-
Eric Blake
-
Hu Tao
-
Wen Congyang