On 9/30/21 1:09 PM, Laurent Vivier wrote:
If we want to save a snapshot of a VM to a file, we used to follow
the
following steps:
1- stop the VM:
(qemu) stop
2- migrate the VM to a file:
(qemu) migrate "exec:cat > snapshot"
3- resume the VM:
(qemu) cont
After that we can restore the snapshot with:
qemu-system-x86_64 ... -incoming "exec:cat snapshot"
(qemu) cont
This is the basics of what libvirt does for a snapshot, and steps 1+2
are what it does for a "managedsave" (where it saves the snapshot to
disk and then terminates the qemu process, for later re-animation).
In those cases, it seems like this new parameter could work for us -
instead of explicitly pausing the guest prior to migrating it to disk,
we would set this new parameter to on, then directly migrate-to-disk
(relying on qemu to do the pause). Care will need to be taken to assure
that error recovery behaves the same though.
There are a couple of cases when libvirt apparently *doesn't* pause the
guest during the migrate-to-disk, both having to do with saving a
coredump of the guest. Since I really have no idea of how
common/important that is (or even if my assessment of the code is
correct), I'm Cc'ing this patch to libvir-list to make sure it catches
the attention of someone who knows the answers and implications.
But when failover is configured, it doesn't work anymore.
As the failover needs to ask the guest OS to unplug the card
the machine cannot be paused.
This patch introduces a new migration parameter, "pause-vm", that
asks the migration to pause the VM during the migration startup
phase after the the card is unplugged.
Once the migration is done, we only need to resume the VM with
"cont" and the card is plugged back:
1- set the parameter:
(qemu) migrate_set_parameter pause-vm on
2- migrate the VM to a file:
(qemu) migrate "exec:cat > snapshot"
The primary failover card (VFIO) is unplugged and the VM is paused.
3- resume the VM:
(qemu) cont
The VM restarts and the primary failover card is plugged back
The VM state sent in the migration stream is "paused", it means
when the snapshot is loaded or if the stream is sent to a destination
QEMU, the VM needs to be resumed manually.
Signed-off-by: Laurent Vivier <lvivier(a)redhat.com>
---
qapi/migration.json | 20 +++++++++++++++---
include/hw/virtio/virtio-net.h | 1 +
hw/net/virtio-net.c | 33 ++++++++++++++++++++++++++++++
migration/migration.c | 37 +++++++++++++++++++++++++++++++++-
monitor/hmp-cmds.c | 8 ++++++++
5 files changed, 95 insertions(+), 4 deletions(-)
diff --git a/qapi/migration.json b/qapi/migration.json
index 88f07baedd06..86284d96ad2a 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -743,6 +743,10 @@
# block device name if there is one, and to their node name
# otherwise. (Since 5.2)
#
+# @pause-vm: Pause the virtual machine before doing the migration.
+# This allows QEMU to unplug a card before doing the
+# migration as it depends on the guest kernel. (since 6.2)
+#
# Since: 2.4
##
{ 'enum': 'MigrationParameter',
@@ -758,7 +762,7 @@
'xbzrle-cache-size', 'max-postcopy-bandwidth',
'max-cpu-throttle', 'multifd-compression',
'multifd-zlib-level' ,'multifd-zstd-level',
- 'block-bitmap-mapping' ] }
+ 'block-bitmap-mapping', 'pause-vm' ] }
##
# @MigrateSetParameters:
@@ -903,6 +907,10 @@
# block device name if there is one, and to their node name
# otherwise. (Since 5.2)
#
+# @pause-vm: Pause the virtual machine before doing the migration.
+# This allows QEMU to unplug a card before doing the
+# migration as it depends on the guest kernel. (since 6.2)
+#
# Since: 2.4
##
# TODO either fuse back into MigrationParameters, or make
@@ -934,7 +942,8 @@
'*multifd-compression': 'MultiFDCompression',
'*multifd-zlib-level': 'uint8',
'*multifd-zstd-level': 'uint8',
- '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } }
+ '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ],
+ '*pause-vm': 'bool' } }
##
# @migrate-set-parameters:
@@ -1099,6 +1108,10 @@
# block device name if there is one, and to their node name
# otherwise. (Since 5.2)
#
+# @pause-vm: Pause the virtual machine before doing the migration.
+# This allows QEMU to unplug a card before doing the
+# migration as it depends on the guest kernel. (since 6.2)
+#
# Since: 2.4
##
{ 'struct': 'MigrationParameters',
@@ -1128,7 +1141,8 @@
'*multifd-compression': 'MultiFDCompression',
'*multifd-zlib-level': 'uint8',
'*multifd-zstd-level': 'uint8',
- '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } }
+ '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ],
+ '*pause-vm': 'bool' } }
##
# @query-migrate-parameters:
diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
index 824a69c23f06..a6c186e28b45 100644
--- a/include/hw/virtio/virtio-net.h
+++ b/include/hw/virtio/virtio-net.h
@@ -210,6 +210,7 @@ struct VirtIONet {
bool failover;
DeviceListener primary_listener;
Notifier migration_state;
+ VMChangeStateEntry *vm_state;
VirtioNetRssData rss_data;
struct NetRxPkt *rx_pkt;
struct EBPFRSSContext ebpf_rss;
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index f205331dcf8c..c83364eac47b 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -39,6 +39,7 @@
#include "migration/misc.h"
#include "standard-headers/linux/ethtool.h"
#include "sysemu/sysemu.h"
+#include "sysemu/runstate.h"
#include "trace.h"
#include "monitor/qdev.h"
#include "hw/pci/pci.h"
@@ -3303,6 +3304,35 @@ static void virtio_net_migration_state_notifier(Notifier
*notifier, void *data)
virtio_net_handle_migration_primary(n, s);
}
+static void virtio_net_failover_restart_cb(void *opaque, bool running,
+ RunState state)
+{
+ DeviceState *dev;
+ VirtIONet *n = opaque;
+ Error *err = NULL;
+ PCIDevice *pdev;
+
+ if (!running) {
+ return;
+ }
+
+ dev = failover_find_primary_device(n);
+ if (!dev) {
+ return;
+ }
+
+ pdev = PCI_DEVICE(dev);
+ if (!pdev->partially_hotplugged) {
+ return;
+ }
+
+ if (!failover_replug_primary(n, dev, &err)) {
+ if (err) {
+ error_report_err(err);
+ }
+ }
+}
+
static bool failover_hide_primary_device(DeviceListener *listener,
QemuOpts *device_opts)
{
@@ -3360,6 +3390,8 @@ static void virtio_net_device_realize(DeviceState *dev, Error
**errp)
device_listener_register(&n->primary_listener);
n->migration_state.notify = virtio_net_migration_state_notifier;
add_migration_state_change_notifier(&n->migration_state);
+ n->vm_state = qemu_add_vm_change_state_handler(
+ virtio_net_failover_restart_cb, n);
n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
}
@@ -3508,6 +3540,7 @@ static void virtio_net_device_unrealize(DeviceState *dev)
if (n->failover) {
device_listener_unregister(&n->primary_listener);
remove_migration_state_change_notifier(&n->migration_state);
+ qemu_del_vm_change_state_handler(n->vm_state);
}
max_queues = n->multiqueue ? n->max_queues : 1;
diff --git a/migration/migration.c b/migration/migration.c
index bb909781b7f5..9c96986d4abf 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -901,6 +901,9 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp)
s->parameters.block_bitmap_mapping);
}
+ params->has_pause_vm = true;
+ params->pause_vm = s->parameters.pause_vm;
+
return params;
}
@@ -1549,6 +1552,11 @@ static void migrate_params_test_apply(MigrateSetParameters
*params,
dest->has_block_bitmap_mapping = true;
dest->block_bitmap_mapping = params->block_bitmap_mapping;
}
+
+ if (params->has_pause_vm) {
+ dest->has_pause_vm = true;
+ dest->pause_vm = params->pause_vm;
+ }
}
static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
@@ -1671,6 +1679,10 @@ static void migrate_params_apply(MigrateSetParameters *params,
Error **errp)
QAPI_CLONE(BitmapMigrationNodeAliasList,
params->block_bitmap_mapping);
}
+
+ if (params->has_pause_vm) {
+ s->parameters.pause_vm = params->pause_vm;
+ }
}
void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
@@ -1718,6 +1730,12 @@ void qmp_migrate_start_postcopy(Error **errp)
" started");
return;
}
+
+ if (s->parameters.pause_vm) {
+ error_setg(errp, "Postcopy cannot be started if pause-vm is on");
+ return;
+ }
+
/*
* we don't error if migration has finished since that would be racy
* with issuing this command.
@@ -3734,13 +3752,27 @@ static void qemu_savevm_wait_unplug(MigrationState *s, int
old_state,
"failure");
}
}
-
migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG, new_state);
} else {
migrate_set_state(&s->state, old_state, new_state);
}
}
+/* stop the VM before starting the migration but after device unplug */
+static void pause_vm_after_unplug(MigrationState *s)
+{
+ if (s->parameters.pause_vm) {
+ qemu_mutex_lock_iothread();
+ qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
+ s->vm_was_running = runstate_is_running();
+ if (vm_stop_force_state(RUN_STATE_PAUSED)) {
+ migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
+ MIGRATION_STATUS_FAILED);
+ }
+ qemu_mutex_unlock_iothread();
+ }
+}
+
/*
* Master migration thread on the source VM.
* It drives the migration and pumps the data down the outgoing channel.
@@ -3790,6 +3822,8 @@ static void *migration_thread(void *opaque)
qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
MIGRATION_STATUS_ACTIVE);
+ pause_vm_after_unplug(s);
+
s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
trace_migration_thread_setup_complete();
@@ -4265,6 +4299,7 @@ static void migration_instance_init(Object *obj)
params->has_announce_max = true;
params->has_announce_rounds = true;
params->has_announce_step = true;
+ params->has_pause_vm = true;
qemu_sem_init(&ms->postcopy_pause_sem, 0);
qemu_sem_init(&ms->postcopy_pause_rp_sem, 0);
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index b5e71d9e6f52..71bc8c15335b 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -513,6 +513,10 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict)
}
}
}
+
+ monitor_printf(mon, "%s: %s\n",
+ MigrationParameter_str(MIGRATION_PARAMETER_PAUSE_VM),
+ params->pause_vm ? "on" : "off");
}
qapi_free_MigrationParameters(params);
@@ -1399,6 +1403,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
error_setg(&err, "The block-bitmap-mapping parameter can only be set
"
"through QMP");
break;
+ case MIGRATION_PARAMETER_PAUSE_VM:
+ p->has_pause_vm = true;
+ visit_type_bool(v, param, &p->pause_vm, &err);
+ break;
default:
assert(0);
}