ping...
Can everyone help review this patch, please?
On 2023/12/7 20:52, tugy(a)chinatelecom.cn wrote:
> From: Guoyi Tu <tugy(a)chinatelecom.cn>
>
> Currently, libvirt creates a thread pool with only on thread to handle all
> qemu monitor events for virtual machines, In the cases that if the thread
> gets stuck while handling a monitor EOF event, such as unable to kill the
> virtual machine process or release resources, the events of other virtual
> machine will be also blocked, which will lead to the abnormal behavior of
> other virtual machines.
>
> For instance, when another virtual machine completes a shutdown operation
> and the monitor EOF event has been queued but remains unprocessed, we
> immediately destroy and start the virtual machine again, at a later time
> when EOF event get processed, the processMonitorEOFEvent() will kill the
> virtual machine that just started.
>
> To address this issue, in the processMonitorEOFEvent(), we check whether
> the current virtual machine's id is equal to the the one at the time
> the event was generated. If they do not match, we immediately return.
>
> Signed-off-by: Guoyi Tu <tugy(a)chinatelecom.cn>
> Signed-off-by: dengpengcheng <dengpc12(a)chinatelecom.cn>
> ---
> src/qemu/qemu_domain.c | 2 +-
> src/qemu/qemu_driver.c | 11 +++++++++--
> src/qemu/qemu_process.c | 2 +-
> 3 files changed, 11 insertions(+), 4 deletions(-)
>
> diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
> index 953808fcfe..435ee621df 100644
> --- a/src/qemu/qemu_domain.c
> +++ b/src/qemu/qemu_domain.c
> @@ -11470,7 +11470,6 @@ qemuProcessEventFree(struct qemuProcessEvent *event)
> case QEMU_PROCESS_EVENT_NETDEV_STREAM_DISCONNECTED:
> case QEMU_PROCESS_EVENT_NIC_RX_FILTER_CHANGED:
> case QEMU_PROCESS_EVENT_SERIAL_CHANGED:
> - case QEMU_PROCESS_EVENT_MONITOR_EOF:
> case QEMU_PROCESS_EVENT_GUEST_CRASHLOADED:
> g_free(event->data);
> break;
> @@ -11484,6 +11483,7 @@ qemuProcessEventFree(struct qemuProcessEvent *event)
> case QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION:
> case QEMU_PROCESS_EVENT_RESET:
> case QEMU_PROCESS_EVENT_NBDKIT_EXITED:
> + case QEMU_PROCESS_EVENT_MONITOR_EOF:
> case QEMU_PROCESS_EVENT_LAST:
> break;
> }
> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> index 64afae6450..cfc5b79657 100644
> --- a/src/qemu/qemu_driver.c
> +++ b/src/qemu/qemu_driver.c
> @@ -3854,7 +3854,8 @@ processJobStatusChangeEvent(virDomainObj *vm,
>
> static void
> processMonitorEOFEvent(virQEMUDriver *driver,
> - virDomainObj *vm)
> + virDomainObj *vm,
> + int domid)
> {
> qemuDomainObjPrivate *priv = vm->privateData;
> int eventReason = VIR_DOMAIN_EVENT_STOPPED_SHUTDOWN;
> @@ -3863,6 +3864,12 @@ processMonitorEOFEvent(virQEMUDriver *driver,
> unsigned int stopFlags = 0;
> virObjectEvent *event = NULL;
>
> + if (vm->def->id != domid) {
> + VIR_ERROR("Domain %s was restarted, ignoring EOF",
> + vm->def->name);
> + return;
> + }
> +
> if (qemuProcessBeginStopJob(vm, VIR_JOB_DESTROY, true) < 0)
> return;
>
> @@ -4082,7 +4089,7 @@ static void qemuProcessEventHandler(void *data, void *opaque)
> processJobStatusChangeEvent(vm, processEvent->data);
> break;
> case QEMU_PROCESS_EVENT_MONITOR_EOF:
> - processMonitorEOFEvent(driver, vm);
> + processMonitorEOFEvent(driver, vm, GPOINTER_TO_INT(processEvent->data));
> break;
> case QEMU_PROCESS_EVENT_PR_DISCONNECT:
> processPRDisconnectEvent(vm);
> diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
> index fc05b4b24f..696d526a5d 100644
> --- a/src/qemu/qemu_process.c
> +++ b/src/qemu/qemu_process.c
> @@ -316,7 +316,7 @@ qemuProcessHandleMonitorEOF(qemuMonitor *mon,
> }
>
> qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_MONITOR_EOF,
> - 0, 0, NULL);
> + 0, 0, GINT_TO_POINTER(vm->def->id));
>
> /* We don't want this EOF handler to be called over and over while the
> * thread is waiting for a job.