[libvirt] [Patch v3 0/2] Add new feature into live migration

If the memory of guest OS is changed constantly, the live migration can not be ended ever for ever. We can use the command 'virsh migrate-setmaxdowntime' to control the live migration. But the value of maxdowntime is diffcult to calculate because it depends on the transfer speed of network and constantly changing memroy size. We need a easy way to control the live migration. This patch set add the support of auto cold migration fallback on timeout. With this patch set, when we migrate the guest OS, we can specify a timeout. If the live migration timeouts, the migration will fallback to cold migration. Test of this patchset on Linux: Env: a. The size of guest OS's memory: 1GB b. The transfer speed of network: about 100Mb/s c. The size of constantly changing memory: more than 900MB 1. migrate without timeout # virsh migrate --live RHEL6RC qemu+ssh://<dest IP>/system tcp://<dest IP>:49152 The migration does not end after 12 hours. 2. migrate with timeout(30 minutes): # date Fri Dec 17 15:35:24 CST 2010 # virsh migrate --live --timeout 1800 RHEL6RC qemu+ssh://<dest IP>/system tcp:<dest IP>:49152 # date Fri Dec 17 16:05:51 CST 2010 v3: - use the existing virEventXXXTimeout() APT to implement timer v2: - implement timer for Windows - implement dynamic timers Wen Congyang (2): timer impl auto cold migration fallback at timeout tools/Makefile.am | 1 + tools/timer.c | 170 +++++++++++++++++++++++++++++++++++++++++++++++++++++ tools/timer.h | 38 ++++++++++++ tools/virsh.c | 69 +++++++++++++++++++++ 4 files changed, 278 insertions(+), 0 deletions(-) create mode 100644 tools/timer.c create mode 100644 tools/timer.h

* tools/timer.c tools/timer.h: timer implementation * tools/virsh.c: Initialize timer * tools/Makefile.am: build timer Signed-off-by: Wen Congyang <wency@cn.fujitsu.com> --- tools/Makefile.am | 1 + tools/timer.c | 170 +++++++++++++++++++++++++++++++++++++++++++++++++++++ tools/timer.h | 38 ++++++++++++ tools/virsh.c | 3 + 4 files changed, 212 insertions(+), 0 deletions(-) create mode 100644 tools/timer.c create mode 100644 tools/timer.h diff --git a/tools/Makefile.am b/tools/Makefile.am index 8a5fb52..506265d 100644 --- a/tools/Makefile.am +++ b/tools/Makefile.am @@ -38,6 +38,7 @@ virt-pki-validate.1: virt-pki-validate virsh_SOURCES = \ console.c console.h \ + timer.c timer.h \ ../daemon/event.c ../daemon/event.h \ virsh.c diff --git a/tools/timer.c b/tools/timer.c new file mode 100644 index 0000000..88c42bb --- /dev/null +++ b/tools/timer.c @@ -0,0 +1,170 @@ +/* + * timer.c: timer functions + * + * Copyright (C) 2010 Fujitsu Limited + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Wen Congyang <wency@cn.fujitsu.com> + */ + +#include <config.h> + +#include "daemon/event.h" +#include "event.h" +#include "memory.h" +#include "threads.h" +#include "timer.h" +#include "virterror_internal.h" + +#define VIR_FROM_THIS VIR_FROM_NONE + +struct virTimer { + int timer_id; + + int frequency; + virTimerCallback function; + void *opaque; +}; + +/* use timerFunc to prevent the user know timer id. */ +static void timerFunc(int timer_id ATTRIBUTE_UNUSED, void *opaque) +{ + virTimerPtr timer = (virTimerPtr)opaque; + timer->function(timer->opaque); +} + +virTimerPtr virNewTimer(virTimerCallback callback, void *opaque) +{ + virTimerPtr timer = NULL; + + if (VIR_ALLOC(timer) < 0) { + virReportOOMError(); + return NULL; + } + + timer->timer_id = -1; + timer->function = callback; + timer->opaque = opaque; + timer->frequency = -1; + + return timer; +} + +int virAddTimer(virTimerPtr timer, int expire_time) +{ + int ret; + + if ((ret = virEventAddTimeout(expire_time, timerFunc, + timer, NULL)) < 0) { + return -1; + } + timer->timer_id = ret; + return 0; +} + +int virModTimer(virTimerPtr timer, int expire_time) +{ + if (timer->timer_id == -1) + return virAddTimer(timer, expire_time); + + virEventUpdateTimeout(timer->timer_id, expire_time); + return 0; +} + +int virDelTimer(virTimerPtr timer) +{ + if (timer->timer_id == -1) + return 0; + + if (virEventRemoveTimeout(timer->timer_id) < 0) + return -1; + + timer->timer_id = -1; + return 0; +} + +void virFreeTimer(virTimerPtr timer) +{ + VIR_FREE(timer); +} + +static int timer_initialized = 0; +static virThread timer_thread; +static bool timer_running = false; +static bool timer_quit = false; +static virMutex timer_lock; + +static void timerThreadFunc(void *opaque ATTRIBUTE_UNUSED) +{ + while(!timer_quit) { + virEventRunOnce(); + } +} + +int virStartTimer(void) +{ + int ret = -1; + + virMutexLock(&timer_lock); + if (timer_running) { + ret = 0; + goto cleanup; + } + + timer_quit = false; + if (virThreadCreate(&timer_thread, true, timerThreadFunc, NULL) < 0) + goto cleanup; + + timer_running = true; + ret = 0; + +cleanup: + virMutexUnlock(&timer_lock); + return ret; +} + +int virStopTimer(void) +{ + int ret = -1; + virMutexLock(&timer_lock); + if (!timer_running) { + ret = 0; + goto cleanup; + } + + timer_quit = true; + virEventInterrupt(); + virThreadJoin(&timer_thread); + timer_running = false; + +cleanup: + virMutexUnlock(&timer_lock); + return ret; +} + +int virTimerInitialize(void) +{ + if (timer_initialized) + return 0; + + if (virMutexInit(&timer_lock) < 0) + return -1; + + timer_initialized = 1; + timer_running = false; + timer_quit = false; + return 0; +} diff --git a/tools/timer.h b/tools/timer.h new file mode 100644 index 0000000..eba08ec --- /dev/null +++ b/tools/timer.h @@ -0,0 +1,38 @@ +/* + * timer.h: structure and entry points for timer support + * + * Copyright (C) 2010 Fujitsu Limited + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Wen Congyang <wency@cn.fujitsu.com> + */ + +#ifndef __VIR_TIMER_H__ +# define __VIR_TIMER_H__ + +typedef struct virTimer virTimer; +typedef virTimer *virTimerPtr; +typedef void (*virTimerCallback)(void *); + +extern virTimerPtr virNewTimer(virTimerCallback, void *); +extern int virAddTimer(virTimerPtr, int); +extern int virModTimer(virTimerPtr, int); +extern int virDelTimer(virTimerPtr); +extern void virFreeTimer(virTimerPtr); +extern int virStartTimer(void); +extern int virStopTimer(void); +extern int virTimerInitialize(void); +#endif /* __VIR_TIMER_H__ */ diff --git a/tools/virsh.c b/tools/virsh.c index 4e37f2d..cbde085 100644 --- a/tools/virsh.c +++ b/tools/virsh.c @@ -54,6 +54,7 @@ #include "files.h" #include "../daemon/event.h" #include "configmake.h" +#include "timer.h" static char *progname; @@ -11298,6 +11299,8 @@ vshInit(vshControl *ctl) virEventRemoveTimeoutImpl); virEventInit(); + virTimerInitialize(); + ctl->conn = virConnectOpenAuth(ctl->name, virConnectAuthPtrDefault, ctl->readonly ? VIR_CONNECT_RO : 0); -- 1.7.1

On Fri, Dec 17, 2010 at 04:49:26PM +0800, Wen Congyang wrote:
* tools/timer.c tools/timer.h: timer implementation * tools/virsh.c: Initialize timer * tools/Makefile.am: build timer
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
--- tools/Makefile.am | 1 + tools/timer.c | 170 +++++++++++++++++++++++++++++++++++++++++++++++++++++ tools/timer.h | 38 ++++++++++++ tools/virsh.c | 3 + 4 files changed, 212 insertions(+), 0 deletions(-) create mode 100644 tools/timer.c create mode 100644 tools/timer.h
diff --git a/tools/Makefile.am b/tools/Makefile.am index 8a5fb52..506265d 100644 --- a/tools/Makefile.am +++ b/tools/Makefile.am @@ -38,6 +38,7 @@ virt-pki-validate.1: virt-pki-validate
virsh_SOURCES = \ console.c console.h \ + timer.c timer.h \ ../daemon/event.c ../daemon/event.h \ virsh.c
diff --git a/tools/timer.c b/tools/timer.c new file mode 100644 index 0000000..88c42bb --- /dev/null +++ b/tools/timer.c @@ -0,0 +1,170 @@ +/* + * timer.c: timer functions + * + * Copyright (C) 2010 Fujitsu Limited + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Wen Congyang <wency@cn.fujitsu.com> + */ + +#include <config.h> + +#include "daemon/event.h" +#include "event.h" +#include "memory.h" +#include "threads.h" +#include "timer.h" +#include "virterror_internal.h" + +#define VIR_FROM_THIS VIR_FROM_NONE + +struct virTimer { + int timer_id; + + int frequency; + virTimerCallback function; + void *opaque; +}; + +/* use timerFunc to prevent the user know timer id. */ +static void timerFunc(int timer_id ATTRIBUTE_UNUSED, void *opaque) +{ + virTimerPtr timer = (virTimerPtr)opaque; + timer->function(timer->opaque); +} + +virTimerPtr virNewTimer(virTimerCallback callback, void *opaque) +{ + virTimerPtr timer = NULL; + + if (VIR_ALLOC(timer) < 0) { + virReportOOMError(); + return NULL; + } + + timer->timer_id = -1; + timer->function = callback; + timer->opaque = opaque; + timer->frequency = -1; + + return timer; +} + +int virAddTimer(virTimerPtr timer, int expire_time) +{ + int ret; + + if ((ret = virEventAddTimeout(expire_time, timerFunc, + timer, NULL)) < 0) { + return -1; + } + timer->timer_id = ret; + return 0; +} + +int virModTimer(virTimerPtr timer, int expire_time) +{ + if (timer->timer_id == -1) + return virAddTimer(timer, expire_time); + + virEventUpdateTimeout(timer->timer_id, expire_time); + return 0; +} + +int virDelTimer(virTimerPtr timer) +{ + if (timer->timer_id == -1) + return 0; + + if (virEventRemoveTimeout(timer->timer_id) < 0) + return -1; + + timer->timer_id = -1; + return 0; +} + +void virFreeTimer(virTimerPtr timer) +{ + VIR_FREE(timer); +} + +static int timer_initialized = 0; +static virThread timer_thread; +static bool timer_running = false; +static bool timer_quit = false; +static virMutex timer_lock; + +static void timerThreadFunc(void *opaque ATTRIBUTE_UNUSED) +{ + while(!timer_quit) { + virEventRunOnce(); + } +} + +int virStartTimer(void)
Bad name, not really start a timer. This function can be merged with virTimerInitialize().
+{ + int ret = -1; + + virMutexLock(&timer_lock); + if (timer_running) { + ret = 0; + goto cleanup; + } + + timer_quit = false; + if (virThreadCreate(&timer_thread, true, timerThreadFunc, NULL) < 0) + goto cleanup; + + timer_running = true; + ret = 0; + +cleanup: + virMutexUnlock(&timer_lock); + return ret; +} + +int virStopTimer(void) +{ + int ret = -1; + virMutexLock(&timer_lock); + if (!timer_running) { + ret = 0; + goto cleanup; + } + + timer_quit = true; + virEventInterrupt(); + virThreadJoin(&timer_thread); + timer_running = false; + +cleanup: + virMutexUnlock(&timer_lock); + return ret; +} + +int virTimerInitialize(void) +{ + if (timer_initialized) + return 0; + + if (virMutexInit(&timer_lock) < 0) + return -1; + + timer_initialized = 1; + timer_running = false; + timer_quit = false; + return 0; +} diff --git a/tools/timer.h b/tools/timer.h new file mode 100644 index 0000000..eba08ec --- /dev/null +++ b/tools/timer.h @@ -0,0 +1,38 @@ +/* + * timer.h: structure and entry points for timer support + * + * Copyright (C) 2010 Fujitsu Limited + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Wen Congyang <wency@cn.fujitsu.com> + */ + +#ifndef __VIR_TIMER_H__ +# define __VIR_TIMER_H__ + +typedef struct virTimer virTimer; +typedef virTimer *virTimerPtr; +typedef void (*virTimerCallback)(void *); + +extern virTimerPtr virNewTimer(virTimerCallback, void *); +extern int virAddTimer(virTimerPtr, int); +extern int virModTimer(virTimerPtr, int); +extern int virDelTimer(virTimerPtr); +extern void virFreeTimer(virTimerPtr); +extern int virStartTimer(void); +extern int virStopTimer(void); +extern int virTimerInitialize(void); +#endif /* __VIR_TIMER_H__ */ diff --git a/tools/virsh.c b/tools/virsh.c index 4e37f2d..cbde085 100644 --- a/tools/virsh.c +++ b/tools/virsh.c @@ -54,6 +54,7 @@ #include "files.h" #include "../daemon/event.h" #include "configmake.h" +#include "timer.h"
static char *progname;
@@ -11298,6 +11299,8 @@ vshInit(vshControl *ctl) virEventRemoveTimeoutImpl); virEventInit();
+ virTimerInitialize(); + ctl->conn = virConnectOpenAuth(ctl->name, virConnectAuthPtrDefault, ctl->readonly ? VIR_CONNECT_RO : 0); -- 1.7.1
changes to virsh.c should be in the second patch. -- Thanks, Hu Tao

On 12/17/2010 01:49 AM, Wen Congyang wrote:
* tools/timer.c tools/timer.h: timer implementation * tools/virsh.c: Initialize timer * tools/Makefile.am: build timer
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
+#define VIR_FROM_THIS VIR_FROM_NONE + +struct virTimer { + int timer_id; + + int frequency;
In which unit? milliseconds? Worse, this variable is unused.
+/* use timerFunc to prevent the user know timer id. */ +static void timerFunc(int timer_id ATTRIBUTE_UNUSED, void *opaque) +{ + virTimerPtr timer = (virTimerPtr)opaque;
This cast is not necessary (in general, in C you can go from void* to any other type without a cast).
+ timer->function(timer->opaque); +} + +virTimerPtr virNewTimer(virTimerCallback callback, void *opaque)
Would you mind adding some documentation to each public function, describing the parameters and return value? Right now you're only using it with virsh, but it may make sense to move it to src/util, and good documentation will ensure that we like the interface (or the act of documenting it may help you find ways to improve it).
+ +int virAddTimer(virTimerPtr timer, int expire_time) +{ + int ret; + + if ((ret = virEventAddTimeout(expire_time, timerFunc, + timer, NULL)) < 0) {
Is this function safe to call when the timer is already in use? If not, should you add some sanity checking?
+int virModTimer(virTimerPtr timer, int expire_time) +{ + if (timer->timer_id == -1) + return virAddTimer(timer, expire_time); + + virEventUpdateTimeout(timer->timer_id, expire_time); + return 0;
Do we need both Add and Update, or can we have a single SetTimeout interface that nukes the previous timeout value and replaces it with the new parameter (cf. the alarm() function in POSIX).
+} + +int virDelTimer(virTimerPtr timer) +{ + if (timer->timer_id == -1) + return 0; + + if (virEventRemoveTimeout(timer->timer_id) < 0) + return -1;
If you merge Add and Mod into one interface, then having a sentiel expire_time value of -1 would also serve as a way to disarm any existing timer, so you could also merge Del into that same interface.
+void virFreeTimer(virTimerPtr timer) +{
if (timer == NULL) return;
+ VIR_FREE(timer);
Also, modify cfg.mk to list this as a free-like function.
+} + +static int timer_initialized = 0; +static virThread timer_thread; +static bool timer_running = false; +static bool timer_quit = false;
Explicit zero-initialization is not required for file-scope variables (C guarantees it) - three instances.
+int virTimerInitialize(void) +{ + if (timer_initialized) + return 0; + + if (virMutexInit(&timer_lock) < 0) + return -1; + + timer_initialized = 1; + timer_running = false; + timer_quit = false;
At least two bad data races if we expose virTimerInitialize to a multi-threaded environment (and probably more): Thread 1 calls virTimerInitialize, calls virMutexInit, but gets swapped out before time_initialized is set to 1; thread 2 calls virTimerInitialize and tries to reinitialize the mutex, which results in undefined behavior. Thread 1 calls virTimerInitialize, sets timer_initialized to 1, but gets swapped out before timer_running is set to false; thread 2 calls virTimerInitialize, which returns, then starts a timer; thread 1 resumes and sets timer_running to false, nuking thread 2's timer. I'm thinking we need to teach src/util/threads.h about some guaranteed once-only wrappers around pthread_once and the Windows counterpart, and then use virOnce throughout our code base to guarantee race-free one-time initialization for things such as virMutexInit, so that we are immune to data races where two threads both try to initialize the same mutex (this affects more than just your patch); once you have race-free mutex initialization, you can then use that mutex to reliably avoid the remaining races. But for your particular use case, you are only using timers in the context of a single-threaded virsh, so it's something we can defer to later without impacting the utility of your patch.
+ +typedef struct virTimer virTimer;
Our convention is: typedef struct _virTimer virTimer; that is, the struct version has a leading _ if the type is intentionally opaque.
+typedef virTimer *virTimerPtr; +typedef void (*virTimerCallback)(void *); + +extern virTimerPtr virNewTimer(virTimerCallback, void *);
ATTRIBUTE_NONNULL(1)
+extern int virAddTimer(virTimerPtr, int);
ATTRIBUTE_NONNULL(1)
+extern int virModTimer(virTimerPtr, int);
ATTRIBUTE_NONNULL(1)
+extern int virDelTimer(virTimerPtr);
ATTRIBUTE_NONNULL(1)
+extern void virFreeTimer(virTimerPtr); +extern int virStartTimer(void); +extern int virStopTimer(void); +extern int virTimerInitialize(void);
Is the intent here that virTimerInitialize creates the timer resources, then you can use virStartTimer/virStopTimer in pairs at will to control whether timers are in effect, and finally virFreeTimer to remove all resources? I'm agreeing with Hu that this seems a bit complex, and you may be better served by merging virStartTimer into virTimerInitialize (a one-shot call to enable you to call virAddTimer/virFreeTimer at will), and delete virStopTimer (the one-time resource never needs to be reclaimed). See how we have virThreadInitialize, called once at initialization, with no other functions to control whether thread resources are in use. -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org

At 12/18/2010 06:39 AM, Eric Blake Write:
On 12/17/2010 01:49 AM, Wen Congyang wrote:
* tools/timer.c tools/timer.h: timer implementation * tools/virsh.c: Initialize timer * tools/Makefile.am: build timer
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
+#define VIR_FROM_THIS VIR_FROM_NONE + +struct virTimer { + int timer_id; + + int frequency;
In which unit? milliseconds? Worse, this variable is unused.
I forgot to remove this variable.
+/* use timerFunc to prevent the user know timer id. */ +static void timerFunc(int timer_id ATTRIBUTE_UNUSED, void *opaque) +{ + virTimerPtr timer = (virTimerPtr)opaque;
This cast is not necessary (in general, in C you can go from void* to any other type without a cast).
+ timer->function(timer->opaque); +} + +virTimerPtr virNewTimer(virTimerCallback callback, void *opaque)
Would you mind adding some documentation to each public function, describing the parameters and return value? Right now you're only using it with virsh, but it may make sense to move it to src/util, and good documentation will ensure that we like the interface (or the act of documenting it may help you find ways to improve it).
Ok, I will add some documentation to each public function. If the interface is moved to src/util, I think we should move daemon/event.c to src/util too, because we use the API virEventRunOnce().
+ +int virAddTimer(virTimerPtr timer, int expire_time) +{ + int ret; + + if ((ret = virEventAddTimeout(expire_time, timerFunc, + timer, NULL)) < 0) {
Is this function safe to call when the timer is already in use? If not, should you add some sanity checking?
+int virModTimer(virTimerPtr timer, int expire_time) +{ + if (timer->timer_id == -1) + return virAddTimer(timer, expire_time); + + virEventUpdateTimeout(timer->timer_id, expire_time); + return 0;
Do we need both Add and Update, or can we have a single SetTimeout interface that nukes the previous timeout value and replaces it with the
I will use one single virSetTimeout() to replace Add, Update, and Delete().
+} + +int virDelTimer(virTimerPtr timer) +{ + if (timer->timer_id == -1) + return 0; + + if (virEventRemoveTimeout(timer->timer_id) < 0) + return -1;
If you merge Add and Mod into one interface, then having a sentiel expire_time value of -1 would also serve as a way to disarm any existing timer, so you could also merge Del into that same interface.
If we should disarm any existing timer, we should save all existing timer... And the caller pass the timer to the single interface virSetTimeout(), so we should only disarm the specified timer.
+void virFreeTimer(virTimerPtr timer) +{
if (timer == NULL) return;
+ VIR_FREE(timer);
Also, modify cfg.mk to list this as a free-like function.
OK.
+} + +static int timer_initialized = 0; +static virThread timer_thread; +static bool timer_running = false; +static bool timer_quit = false;
Explicit zero-initialization is not required for file-scope variables (C guarantees it) - three instances.
+int virTimerInitialize(void) +{ + if (timer_initialized) + return 0; + + if (virMutexInit(&timer_lock) < 0) + return -1; + + timer_initialized = 1; + timer_running = false; + timer_quit = false;
At least two bad data races if we expose virTimerInitialize to a multi-threaded environment (and probably more):
Thread 1 calls virTimerInitialize, calls virMutexInit, but gets swapped out before time_initialized is set to 1; thread 2 calls virTimerInitialize and tries to reinitialize the mutex, which results in undefined behavior.
Thread 1 calls virTimerInitialize, sets timer_initialized to 1, but gets swapped out before timer_running is set to false; thread 2 calls virTimerInitialize, which returns, then starts a timer; thread 1 resumes and sets timer_running to false, nuking thread 2's timer.
I'm thinking we need to teach src/util/threads.h about some guaranteed once-only wrappers around pthread_once and the Windows counterpart, and then use virOnce throughout our code base to guarantee race-free one-time initialization for things such as virMutexInit, so that we are immune to data races where two threads both try to initialize the same mutex (this affects more than just your patch); once you have race-free mutex initialization, you can then use that mutex to reliably avoid the remaining races.
But for your particular use case, you are only using timers in the context of a single-threaded virsh, so it's something we can defer to later without impacting the utility of your patch.
+ +typedef struct virTimer virTimer;
Our convention is:
typedef struct _virTimer virTimer;
that is, the struct version has a leading _ if the type is intentionally opaque.
+typedef virTimer *virTimerPtr; +typedef void (*virTimerCallback)(void *); + +extern virTimerPtr virNewTimer(virTimerCallback, void *);
ATTRIBUTE_NONNULL(1)
+extern int virAddTimer(virTimerPtr, int);
ATTRIBUTE_NONNULL(1)
+extern int virModTimer(virTimerPtr, int);
ATTRIBUTE_NONNULL(1)
+extern int virDelTimer(virTimerPtr);
ATTRIBUTE_NONNULL(1)
+extern void virFreeTimer(virTimerPtr); +extern int virStartTimer(void); +extern int virStopTimer(void); +extern int virTimerInitialize(void);
Is the intent here that virTimerInitialize creates the timer resources, then you can use virStartTimer/virStopTimer in pairs at will to control whether timers are in effect, and finally virFreeTimer to remove all resources? I'm agreeing with Hu that this seems a bit complex, and you may be better served by merging virStartTimer into virTimerInitialize (a one-shot call to enable you to call virAddTimer/virFreeTimer at will), and delete virStopTimer (the one-time resource never needs to be reclaimed). See how we have virThreadInitialize, called once at initialization, with no other functions to control whether thread resources are in use.
OK. Thanks for your comment.

implement auto cold migration fallback at timeout Signed-off-by: Wen Congyang <wency@cn.fujitsu.com> --- tools/virsh.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 66 insertions(+), 0 deletions(-) diff --git a/tools/virsh.c b/tools/virsh.c index cbde085..b95c8fe 100644 --- a/tools/virsh.c +++ b/tools/virsh.c @@ -3379,9 +3379,32 @@ static const vshCmdOptDef opts_migrate[] = { {"desturi", VSH_OT_DATA, VSH_OFLAG_REQ, N_("connection URI of the destination host")}, {"migrateuri", VSH_OT_DATA, 0, N_("migration URI, usually can be omitted")}, {"dname", VSH_OT_DATA, 0, N_("rename to new name during migration (if supported)")}, + {"timeout", VSH_OT_INT, 0, N_("auto cold migration fallback when live migration timeouts(in seconds)")}, {NULL, 0, 0, NULL} }; +static void migrateTimeoutHandler(void *data) +{ + virDomainPtr dom = (virDomainPtr)data; + virDomainInfo info; + unsigned int id; + + id = virDomainGetID(dom); + if (id == ((unsigned int)-1)) + return; + + /* The error reason has been reported in virDomainGetInfo() and + * virDomainSuspend() when it fails. So we do not check the return value. + */ + if (virDomainGetInfo(dom, &info) == 0) { + if (info.state == VIR_DOMAIN_SHUTOFF) + return; + + /* suspend the domain when migration timeouts. */ + virDomainSuspend(dom); + } +} + static int cmdMigrate (vshControl *ctl, const vshCmd *cmd) { @@ -3389,6 +3412,8 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) const char *desturi; const char *migrateuri; const char *dname; + long long timeout; + virTimerPtr migratetimer = NULL; int flags = 0, found, ret = FALSE; if (!vshConnectionUsability (ctl, ctl->conn)) @@ -3426,6 +3451,28 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) if (vshCommandOptBool (cmd, "copy-storage-inc")) flags |= VIR_MIGRATE_NON_SHARED_INC; + timeout = vshCommandOptLongLong(cmd, "timeout", &found); + if (found) { + if (! flags & VIR_MIGRATE_LIVE) { + vshError(ctl, "%s", _("migrate: Unexpected timeout for cold migration")); + goto done; + } + + if (timeout < 1) { + vshError(ctl, "%s", _("migrate: Invalid timeout")); + goto done; + } + + migratetimer = virNewTimer(migrateTimeoutHandler, (void *)dom); + if (!migratetimer) + goto done; + + if (virStartTimer() < 0) { + vshError(ctl, "%s", _("migrate: failed to start timer")); + goto done; + } + } + if ((flags & VIR_MIGRATE_PEER2PEER) || vshCommandOptBool (cmd, "direct")) { /* For peer2peer migration or direct migration we only expect one URI @@ -3436,6 +3483,13 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) goto done; } + if (migratetimer) { + if (virAddTimer(migratetimer, timeout * 1000) < 0) { + vshError(ctl, "%s", _("migrate: failed to add timer")); + goto done; + } + } + if (virDomainMigrateToURI (dom, desturi, flags, dname, 0) == 0) ret = TRUE; } else { @@ -3446,6 +3500,13 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) dconn = virConnectOpenAuth (desturi, virConnectAuthPtrDefault, 0); if (!dconn) goto done; + if (migratetimer) { + if (virAddTimer(migratetimer, timeout * 1000) < 0) { + vshError(ctl, "%s", _("migrate: failed to add timer")); + goto done; + } + } + ddom = virDomainMigrate (dom, dconn, flags, dname, migrateuri, 0); if (ddom) { virDomainFree(ddom); @@ -3455,6 +3516,11 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) } done: + if (migratetimer) { + virStopTimer(); + virDelTimer(migratetimer); + virFreeTimer(migratetimer); + } if (dom) virDomainFree (dom); return ret; } -- 1.7.1

On Fri, Dec 17, 2010 at 04:49:27PM +0800, Wen Congyang wrote:
implement auto cold migration fallback at timeout
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
--- tools/virsh.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 66 insertions(+), 0 deletions(-)
diff --git a/tools/virsh.c b/tools/virsh.c index cbde085..b95c8fe 100644 --- a/tools/virsh.c +++ b/tools/virsh.c @@ -3379,9 +3379,32 @@ static const vshCmdOptDef opts_migrate[] = { {"desturi", VSH_OT_DATA, VSH_OFLAG_REQ, N_("connection URI of the destination host")}, {"migrateuri", VSH_OT_DATA, 0, N_("migration URI, usually can be omitted")}, {"dname", VSH_OT_DATA, 0, N_("rename to new name during migration (if supported)")}, + {"timeout", VSH_OT_INT, 0, N_("auto cold migration fallback when live migration timeouts(in seconds)")}, {NULL, 0, 0, NULL} };
+static void migrateTimeoutHandler(void *data) +{ + virDomainPtr dom = (virDomainPtr)data; + virDomainInfo info; + unsigned int id; + + id = virDomainGetID(dom); + if (id == ((unsigned int)-1)) + return; + + /* The error reason has been reported in virDomainGetInfo() and + * virDomainSuspend() when it fails. So we do not check the return value. + */ + if (virDomainGetInfo(dom, &info) == 0) { + if (info.state == VIR_DOMAIN_SHUTOFF) + return; + + /* suspend the domain when migration timeouts. */ + virDomainSuspend(dom); + } +} + static int cmdMigrate (vshControl *ctl, const vshCmd *cmd) { @@ -3389,6 +3412,8 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) const char *desturi; const char *migrateuri; const char *dname; + long long timeout; + virTimerPtr migratetimer = NULL; int flags = 0, found, ret = FALSE;
if (!vshConnectionUsability (ctl, ctl->conn)) @@ -3426,6 +3451,28 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) if (vshCommandOptBool (cmd, "copy-storage-inc")) flags |= VIR_MIGRATE_NON_SHARED_INC;
+ timeout = vshCommandOptLongLong(cmd, "timeout", &found); + if (found) { + if (! flags & VIR_MIGRATE_LIVE) { + vshError(ctl, "%s", _("migrate: Unexpected timeout for cold migration")); + goto done; + } + + if (timeout < 1) { + vshError(ctl, "%s", _("migrate: Invalid timeout")); + goto done; + } + + migratetimer = virNewTimer(migrateTimeoutHandler, (void *)dom); + if (!migratetimer) + goto done; + + if (virStartTimer() < 0) {
I thinks it's better to call this function at virsh initialization stage and stop it when virsh ends, then at anytime in the lifetime of virsh we can call virAddTimer/virDelTimer to add/delete a timer.
+ vshError(ctl, "%s", _("migrate: failed to start timer")); + goto done; + } + } + if ((flags & VIR_MIGRATE_PEER2PEER) || vshCommandOptBool (cmd, "direct")) { /* For peer2peer migration or direct migration we only expect one URI @@ -3436,6 +3483,13 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) goto done; }
+ if (migratetimer) { + if (virAddTimer(migratetimer, timeout * 1000) < 0) { + vshError(ctl, "%s", _("migrate: failed to add timer")); + goto done; + } + } + if (virDomainMigrateToURI (dom, desturi, flags, dname, 0) == 0) ret = TRUE; } else { @@ -3446,6 +3500,13 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) dconn = virConnectOpenAuth (desturi, virConnectAuthPtrDefault, 0); if (!dconn) goto done;
+ if (migratetimer) { + if (virAddTimer(migratetimer, timeout * 1000) < 0) { + vshError(ctl, "%s", _("migrate: failed to add timer")); + goto done; + } + } + ddom = virDomainMigrate (dom, dconn, flags, dname, migrateuri, 0); if (ddom) { virDomainFree(ddom); @@ -3455,6 +3516,11 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) }
done: + if (migratetimer) { + virStopTimer();
See above.
+ virDelTimer(migratetimer); + virFreeTimer(migratetimer); + } if (dom) virDomainFree (dom); return ret; } -- 1.7.1
-- Thanks, Hu Tao

On 12/17/2010 01:49 AM, Wen Congyang wrote:
implement auto cold migration fallback at timeout
Maybe it's a language barrier issue thing, but I had to read this patch several times before I understood what you were getting at. Does this wording work any better? If a guest has not completed live migration before timeout, then auto-suspend the guest, where the migration will complete offline.
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
--- tools/virsh.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 66 insertions(+), 0 deletions(-)
Missing a change to tools/virsh.pod to document the new option (that wording should be more complete, in comparison to the one-line blurb for the 'virsh help migrate' output).
diff --git a/tools/virsh.c b/tools/virsh.c index cbde085..b95c8fe 100644 --- a/tools/virsh.c +++ b/tools/virsh.c @@ -3379,9 +3379,32 @@ static const vshCmdOptDef opts_migrate[] = { {"desturi", VSH_OT_DATA, VSH_OFLAG_REQ, N_("connection URI of the destination host")}, {"migrateuri", VSH_OT_DATA, 0, N_("migration URI, usually can be omitted")}, {"dname", VSH_OT_DATA, 0, N_("rename to new name during migration (if supported)")}, + {"timeout", VSH_OT_INT, 0, N_("auto cold migration fallback when live migration timeouts(in seconds)")},
Maybe: "force guest to suspend if live migration exceeds timeout (in seconds)"
{NULL, 0, 0, NULL} };
+static void migrateTimeoutHandler(void *data) +{
I'd float this helper function to live above the cmdMigrate definition; placing it here interrupts the flow between what options cmdMigrate takes and how it uses them.
+ virDomainPtr dom = (virDomainPtr)data;
Cast is not necessary.
cmdMigrate (vshControl *ctl, const vshCmd *cmd) { @@ -3389,6 +3412,8 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) const char *desturi; const char *migrateuri; const char *dname; + long long timeout; + virTimerPtr migratetimer = NULL; int flags = 0, found, ret = FALSE;
if (!vshConnectionUsability (ctl, ctl->conn)) @@ -3426,6 +3451,28 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) if (vshCommandOptBool (cmd, "copy-storage-inc")) flags |= VIR_MIGRATE_NON_SHARED_INC;
+ timeout = vshCommandOptLongLong(cmd, "timeout", &found);
Why long long? No one is going to set a timeout bigger than 4 billion seconds, so plain int would do just fine here. In particular, since virEventAddTimeout can only sleep up to int milliseconds, and you are already multiplying the user's timeout value (in seconds) by 1000, you already have to worry about overflow issues - if the user requests 5 million seconds (which overflows 4 billion milliseconds for the underlying timer max frequency), you must reject the user's value.
+ + migratetimer = virNewTimer(migrateTimeoutHandler, (void *)dom); + if (!migratetimer) + goto done; + + if (virStartTimer() < 0) {
Hmm - given your usage of starting a timer thread as long as any virTimerPtr object exists, maybe you're better off creating some sort of global ref-counting state under the hood of timer.c that tracks how many virTimer objects are currently set to have a time. The user should only have to call a single function to create their timer object, without having to make extra calls to feed your underlying implementation.
@@ -3455,6 +3516,11 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) }
done: + if (migratetimer) { + virStopTimer(); + virDelTimer(migratetimer);
Again, if multiple threads are managing timers, then the user shouldn't be able to kill the timer resources just because one thread no longer needs a timer. The user shouldn't have to call virStopTimer. -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org

At 12/18/2010 07:10 AM, Eric Blake Write:
On 12/17/2010 01:49 AM, Wen Congyang wrote:
implement auto cold migration fallback at timeout
Maybe it's a language barrier issue thing, but I had to read this patch several times before I understood what you were getting at. Does this wording work any better?
If a guest has not completed live migration before timeout, then auto-suspend the guest, where the migration will complete offline. Yes.
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
--- tools/virsh.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 66 insertions(+), 0 deletions(-)
Missing a change to tools/virsh.pod to document the new option (that wording should be more complete, in comparison to the one-line blurb for the 'virsh help migrate' output). I will add it.
diff --git a/tools/virsh.c b/tools/virsh.c index cbde085..b95c8fe 100644 --- a/tools/virsh.c +++ b/tools/virsh.c @@ -3379,9 +3379,32 @@ static const vshCmdOptDef opts_migrate[] = { {"desturi", VSH_OT_DATA, VSH_OFLAG_REQ, N_("connection URI of the destination host")}, {"migrateuri", VSH_OT_DATA, 0, N_("migration URI, usually can be omitted")}, {"dname", VSH_OT_DATA, 0, N_("rename to new name during migration (if supported)")}, + {"timeout", VSH_OT_INT, 0, N_("auto cold migration fallback when live migration timeouts(in seconds)")},
Maybe: "force guest to suspend if live migration exceeds timeout (in seconds)" OK
{NULL, 0, 0, NULL} };
+static void migrateTimeoutHandler(void *data) +{
I'd float this helper function to live above the cmdMigrate definition; placing it here interrupts the flow between what options cmdMigrate takes and how it uses them.
+ virDomainPtr dom = (virDomainPtr)data;
Cast is not necessary.
cmdMigrate (vshControl *ctl, const vshCmd *cmd) { @@ -3389,6 +3412,8 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) const char *desturi; const char *migrateuri; const char *dname; + long long timeout; + virTimerPtr migratetimer = NULL; int flags = 0, found, ret = FALSE;
if (!vshConnectionUsability (ctl, ctl->conn)) @@ -3426,6 +3451,28 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) if (vshCommandOptBool (cmd, "copy-storage-inc")) flags |= VIR_MIGRATE_NON_SHARED_INC;
+ timeout = vshCommandOptLongLong(cmd, "timeout", &found);
Why long long? No one is going to set a timeout bigger than 4 billion seconds, so plain int would do just fine here. In particular, since virEventAddTimeout can only sleep up to int milliseconds, and you are already multiplying the user's timeout value (in seconds) by 1000, you already have to worry about overflow issues - if the user requests 5 million seconds (which overflows 4 billion milliseconds for the underlying timer max frequency), you must reject the user's value.
+ + migratetimer = virNewTimer(migrateTimeoutHandler, (void *)dom); + if (!migratetimer) + goto done; + + if (virStartTimer() < 0) {
Hmm - given your usage of starting a timer thread as long as any virTimerPtr object exists, maybe you're better off creating some sort of global ref-counting state under the hood of timer.c that tracks how many virTimer objects are currently set to have a time. The user should only have to call a single function to create their timer object, without having to make extra calls to feed your underlying implementation.
@@ -3455,6 +3516,11 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) }
done: + if (migratetimer) { + virStopTimer(); + virDelTimer(migratetimer);
Again, if multiple threads are managing timers, then the user shouldn't be able to kill the timer resources just because one thread no longer needs a timer. The user shouldn't have to call virStopTimer.
Thanks for your comment.
participants (3)
-
Eric Blake
-
Hu Tao
-
Wen Congyang