When starting a migration with --timeout, we create a thread to call the
migration API and in parallel setup a timer for the timeout. The
description of --timeout says: "run action specified by --timeout-*
option (suspend by default) if live migration exceeds timeout", which is
not really the way this feature was implemented. Before live migration
starts we first need to contact the source to get the domain definition
and send it to the destination where a new QEMU process has to be
started. This can take some (unpredictably long) time while the timeout
timer is already running. If a very short timeout is set (which doesn't
really make sense, but it's allowed), we may even end up taking the
timeout action before the actual migration had a chance to start.
With this patch the timeout is started only after we get non-zero
dataTotal from virDomainGetJobInfo, which means the migration (of either
storage or memory) really started.
https://issues.redhat.com/browse/RHEL-41264
Signed-off-by: Jiri Denemark <jdenemar(a)redhat.com>
---
tools/virsh-domain.c | 55 ++++++++++++++++++++++++++++----------------
1 file changed, 35 insertions(+), 20 deletions(-)
diff --git a/tools/virsh-domain.c b/tools/virsh-domain.c
index e4923284af..546db955a9 100644
--- a/tools/virsh-domain.c
+++ b/tools/virsh-domain.c
@@ -4237,7 +4237,10 @@ typedef void (*jobWatchTimeoutFunc)(vshControl *ctl, virDomainPtr
dom,
struct virshWatchData {
vshControl *ctl;
virDomainPtr dom;
+ GMainContext *context;
jobWatchTimeoutFunc timeout_func;
+ int timeout_secs;
+ GSource *timeout_src;
void *opaque;
const char *label;
GIOChannel *stdin_ioc;
@@ -4259,6 +4262,20 @@ virshWatchTimeout(gpointer opaque)
}
+static void
+virshWatchSetTimeout(struct virshWatchData *data)
+{
+ vshDebug(data->ctl, VSH_ERR_DEBUG,
+ "watchJob: setting timeout of %d secs\n", data->timeout_secs);
+
+ data->timeout_src = g_timeout_source_new_seconds(data->timeout_secs);
+ g_source_set_callback(data->timeout_src,
+ virshWatchTimeout,
+ data, NULL);
+ g_source_attach(data->timeout_src, data->context);
+}
+
+
static gboolean
virshWatchProgress(gpointer opaque)
{
@@ -4290,10 +4307,17 @@ virshWatchProgress(gpointer opaque)
jobinfo.type == VIR_DOMAIN_JOB_UNBOUNDED)) {
vshTTYDisableInterrupt(data->ctl);
data->jobStarted = true;
+ vshDebug(data->ctl, VSH_ERR_DEBUG,
+ "watchJob: job started\n");
+ }
- if (!data->verbose) {
+ if (data->jobStarted) {
+ if (data->timeout_secs > 0 && !data->timeout_src) {
+ if (jobinfo.dataTotal > 0)
+ virshWatchSetTimeout(data);
+ } else if (!data->verbose) {
vshDebug(data->ctl, VSH_ERR_DEBUG,
- "watchJob: job started, disabling callback\n");
+ "watchJob: disabling callback\n");
return G_SOURCE_REMOVE;
}
}
@@ -4356,13 +4380,15 @@ virshWatchJob(vshControl *ctl,
struct sigaction sig_action;
struct sigaction old_sig_action;
#endif /* !WIN32 */
- g_autoptr(GSource) timeout_src = NULL;
g_autoptr(GSource) progress_src = NULL;
g_autoptr(GSource) stdin_src = NULL;
struct virshWatchData data = {
.ctl = ctl,
.dom = dom,
+ .context = g_main_loop_get_context(eventLoop),
.timeout_func = timeout_func,
+ .timeout_secs = timeout_secs,
+ .timeout_src = NULL,
.opaque = opaque,
.label = label,
.stdin_ioc = NULL,
@@ -4391,27 +4417,14 @@ virshWatchJob(vshControl *ctl,
g_source_set_callback(stdin_src,
(GSourceFunc)virshWatchInterrupt,
&data, NULL);
- g_source_attach(stdin_src,
- g_main_loop_get_context(eventLoop));
- }
-
- if (timeout_secs) {
- vshDebug(ctl, VSH_ERR_DEBUG,
- "watchJob: setting timeout of %d secs\n", timeout_secs);
- timeout_src = g_timeout_source_new_seconds(timeout_secs);
- g_source_set_callback(timeout_src,
- virshWatchTimeout,
- &data, NULL);
- g_source_attach(timeout_src,
- g_main_loop_get_context(eventLoop));
+ g_source_attach(stdin_src, data.context);
}
progress_src = g_timeout_source_new(500);
g_source_set_callback(progress_src,
virshWatchProgress,
&data, NULL);
- g_source_attach(progress_src,
- g_main_loop_get_context(eventLoop));
+ g_source_attach(progress_src, data.context);
g_main_loop_run(eventLoop);
@@ -4420,8 +4433,10 @@ virshWatchJob(vshControl *ctl,
if (*job_err == 0 && verbose) /* print [100 %] */
virshPrintJobProgress(label, 0, 1);
- if (timeout_src)
- g_source_destroy(timeout_src);
+ if (data.timeout_src) {
+ g_source_destroy(data.timeout_src);
+ g_source_unref(data.timeout_src);
+ }
g_source_destroy(progress_src);
if (stdin_src)
g_source_destroy(stdin_src);
--
2.47.0