Bugzilla:https://bugzilla.redhat.com/show_bug.cgi?id=822839
For non-p2p migration, if the network is down in the process of migrate,
The virsh client will hang up for a fair long time. The patch will add
keepalive into virsh to determine the status of network connection with
remote libvirtd, aboring migration job after 30 seconds later since
disconnection.
---
tools/virsh.c | 29 +++++++++++++++++++++++------
1 files changed, 23 insertions(+), 6 deletions(-)
diff --git a/tools/virsh.c b/tools/virsh.c
index 5226bd8..9099328 100644
--- a/tools/virsh.c
+++ b/tools/virsh.c
@@ -414,13 +414,14 @@ typedef struct __vshCtrlData {
vshControl *ctl;
const vshCmd *cmd;
int writefd;
+ virConnectPtr dconn;
} vshCtrlData;
typedef void (*jobWatchTimeoutFunc) (vshControl *ctl, virDomainPtr dom,
void *opaque);
static bool
-vshWatchJob(vshControl *ctl,
+vshWatchJob(vshCtrlData *data,
virDomainPtr dom,
bool verbose,
int pipe_fd,
@@ -3277,6 +3278,7 @@ cmdSave(vshControl *ctl, const vshCmd *cmd)
data.ctl = ctl;
data.cmd = cmd;
data.writefd = p[1];
+ data.dconn = NULL;
if (virThreadCreate(&workerThread,
true,
@@ -3284,7 +3286,7 @@ cmdSave(vshControl *ctl, const vshCmd *cmd)
&data) < 0)
goto cleanup;
- ret = vshWatchJob(ctl, dom, verbose, p[0], 0, NULL, NULL, _("Save"));
+ ret = vshWatchJob(&data, dom, verbose, p[0], 0, NULL, NULL,
_("Save"));
virThreadJoin(&workerThread);
@@ -3584,6 +3586,7 @@ cmdManagedSave(vshControl *ctl, const vshCmd *cmd)
data.ctl = ctl;
data.cmd = cmd;
data.writefd = p[1];
+ data.dconn = NULL;
if (virThreadCreate(&workerThread,
true,
@@ -3591,7 +3594,7 @@ cmdManagedSave(vshControl *ctl, const vshCmd *cmd)
&data) < 0)
goto cleanup;
- ret = vshWatchJob(ctl, dom, verbose, p[0], 0,
+ ret = vshWatchJob(&data, dom, verbose, p[0], 0,
NULL, NULL, _("Managedsave"));
virThreadJoin(&workerThread);
@@ -4062,6 +4065,7 @@ cmdDump(vshControl *ctl, const vshCmd *cmd)
data.ctl = ctl;
data.cmd = cmd;
data.writefd = p[1];
+ data.dconn = NULL;
if (virThreadCreate(&workerThread,
true,
@@ -4069,7 +4073,7 @@ cmdDump(vshControl *ctl, const vshCmd *cmd)
&data) < 0)
goto cleanup;
- ret = vshWatchJob(ctl, dom, verbose, p[0], 0, NULL, NULL, _("Dump"));
+ ret = vshWatchJob(&data, dom, verbose, p[0], 0, NULL, NULL,
_("Dump"));
virThreadJoin(&workerThread);
@@ -7189,6 +7193,10 @@ doMigrate (void *opaque)
dconn = virConnectOpenAuth (desturi, virConnectAuthPtrDefault, 0);
if (!dconn) goto out;
+ data->dconn = dconn;
+ if (virConnectSetKeepAlive(dconn, 5, 5) < 0)
+ vshDebug(ctl, VSH_ERR_WARNING, "migrate: Failed to start
keepalive\n");
+
ddom = virDomainMigrate2(dom, dconn, xml, flags, dname, migrateuri, 0);
if (ddom) {
virDomainFree(ddom);
@@ -7244,7 +7252,7 @@ vshMigrationTimeout(vshControl *ctl,
}
static bool
-vshWatchJob(vshControl *ctl,
+vshWatchJob(vshCtrlData *data,
virDomainPtr dom,
bool verbose,
int pipe_fd,
@@ -7262,6 +7270,7 @@ vshWatchJob(vshControl *ctl,
char retchar;
bool functionReturn = false;
sigset_t sigmask, oldsigmask;
+ vshControl *ctl = data->ctl;
sigemptyset(&sigmask);
sigaddset(&sigmask, SIGINT);
@@ -7305,6 +7314,13 @@ repoll:
goto cleanup;
}
+ if (data->dconn && virConnectIsAlive(data->dconn) <= 0) {
+ virDomainAbortJob(dom);
+ vshError(ctl, "%s",
+ _("Lost connection to destination host"));
+ goto cleanup;
+ }
+
GETTIMEOFDAY(&curr);
if (timeout && (((int)(curr.tv_sec - start.tv_sec) * 1000 +
(int)(curr.tv_usec - start.tv_usec) / 1000) >
@@ -7378,13 +7394,14 @@ cmdMigrate(vshControl *ctl, const vshCmd *cmd)
data.ctl = ctl;
data.cmd = cmd;
data.writefd = p[1];
+ data.dconn = NULL;
if (virThreadCreate(&workerThread,
true,
doMigrate,
&data) < 0)
goto cleanup;
- functionReturn = vshWatchJob(ctl, dom, verbose, p[0], timeout,
+ functionReturn = vshWatchJob(&data, dom, verbose, p[0], timeout,
vshMigrationTimeout, NULL, _("Migration"));
virThreadJoin(&workerThread);
--
1.7.7.5