From e28cb2a03a670e4c0e7641f68f9d9f3accb00ae0 Mon Sep 17 00:00:00 2001
From: Yilu Lin <linyilu(a)huawei.com>
Date: Tue, 4 Aug 2020 02:42:00 -0400
Subject: [PATCH] fix vm schizencephaly when heartbeat stoped
Signed-off-by: Yilu Lin <linyilu(a)huawei.com>
If keepalive messages lost in finish step, vm maybe schizencephaly.
Shutdown src vm for protection.
---
src/qemu/qemu_migration.c | 22 ++++++++++++++++++++--
1 file changed, 20 insertions(+), 2 deletions(-)
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index 2c7bf34..b8296ba 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
@@ -4136,6 +4136,8 @@ qemuMigrationSrcPerformPeer2Peer3(virQEMUDriverPtr driver,
int cookieoutlen = 0;
int ret = -1;
virErrorPtr orig_err = NULL;
+ virErrorPtr finish_err = NULL;
+ bool living = true;
bool cancelled = true;
virStreamPtr st = NULL;
unsigned long destflags;
@@ -4394,7 +4396,16 @@ qemuMigrationSrcPerformPeer2Peer3(virQEMUDriverPtr driver,
* The lock manager plugins should take care of
* safety in this scenario.
*/
- cancelled = ddomain == NULL;
+ if (!cancelled && !ddomain)
+ finish_err = virSaveLastError();
+
+ if (finish_err && finish_err->message &&
+ strstr(finish_err->message, "received hangup / error event on
socket")) {
+ living = false;
+ VIR_ERROR(_("keepalive messages lost in finish step, shutdown src vm for
protection"));
+ } else {
+ cancelled = ddomain == NULL;
+ }
/* If finish3 set an error, and we don't have an earlier
* one we need to preserve it in case confirm3 overwrites
@@ -4427,10 +4438,17 @@ qemuMigrationSrcPerformPeer2Peer3(virQEMUDriverPtr driver,
virObjectUnref(ddomain);
ret = 0;
} else {
- ret = -1;
+ if (!living)
+ ret = 0;
+ else
+ ret = -1;
}
virObjectUnref(st);
+ if (finish_err) {
+ virSetError(finish_err);
+ virFreeError(finish_err);
+ }
virErrorRestore(&orig_err);
VIR_FREE(uri_out);
--
2.19.1