[PATCH 0/5] qemuProcessUpdateCPU: do not change 'fallback' for pSeries guests

Hi, Patch 5/5 contains a fix for [1]. The first 4 patches are g_auto() cleanups I made along the way while investigating the bug. [1] https://bugzilla.redhat.com/show_bug.cgi?id=1660711 Daniel Henrique Barboza (5): cpu_conf.c: modernize virCPUDefCopyWithoutModel and virCPUDefCopy cpu_arm.c: modernize virCPUarmUpdate cpu_s390.c: modernize virCPUs390Update qemu_process.c: modernize qemuProcessUpdateCPU code path qemuProcessUpdateCPU: do not change 'fallback' to ALLOW for pSeries guests src/conf/cpu_conf.c | 22 +++++----------- src/cpu/cpu_arm.c | 14 ++++------ src/cpu/cpu_s390.c | 18 +++++-------- src/qemu/qemu_process.c | 58 +++++++++++++++++------------------------ 4 files changed, 43 insertions(+), 69 deletions(-) -- 2.26.2

Use automatic cleanup of variables. Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com> --- src/conf/cpu_conf.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/src/conf/cpu_conf.c b/src/conf/cpu_conf.c index 07404c6fb0..c6d36e0cb5 100644 --- a/src/conf/cpu_conf.c +++ b/src/conf/cpu_conf.c @@ -227,7 +227,7 @@ virCPUDefStealModel(virCPUDefPtr dst, virCPUDefPtr virCPUDefCopyWithoutModel(const virCPUDef *cpu) { - virCPUDefPtr copy; + g_autoptr(virCPUDef) copy = NULL; if (!cpu) return NULL; @@ -246,42 +246,34 @@ virCPUDefCopyWithoutModel(const virCPUDef *cpu) if (cpu->cache) { if (VIR_ALLOC(copy->cache) < 0) - goto error; + return NULL; *copy->cache = *cpu->cache; } if (cpu->tsc) { if (VIR_ALLOC(copy->tsc) < 0) - goto error; + return NULL; *copy->tsc = *cpu->tsc; } - return copy; - - error: - virCPUDefFree(copy); - return NULL; + return g_steal_pointer(©); } virCPUDefPtr virCPUDefCopy(const virCPUDef *cpu) { - virCPUDefPtr copy; + g_autoptr(virCPUDef) copy = NULL; if (!(copy = virCPUDefCopyWithoutModel(cpu))) return NULL; if (virCPUDefCopyModel(copy, cpu, false) < 0) - goto error; - - return copy; + return NULL; - error: - virCPUDefFree(copy); - return NULL; + return g_steal_pointer(©); } -- 2.26.2

On Fri, May 22, 2020 at 16:56:16 -0300, Daniel Henrique Barboza wrote:
Use automatic cleanup of variables.
Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com> --- src/conf/cpu_conf.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-)
Reviewed-by: Jiri Denemark <jdenemar@redhat.com>

Use automatic cleanup of variables. Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com> --- src/cpu/cpu_arm.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/cpu/cpu_arm.c b/src/cpu/cpu_arm.c index 6f6c6a1479..cd4f720c95 100644 --- a/src/cpu/cpu_arm.c +++ b/src/cpu/cpu_arm.c @@ -415,8 +415,7 @@ static int virCPUarmUpdate(virCPUDefPtr guest, const virCPUDef *host) { - int ret = -1; - virCPUDefPtr updated = NULL; + g_autoptr(virCPUDef) updated = NULL; if (guest->mode != VIR_CPU_MODE_HOST_MODEL) return 0; @@ -424,24 +423,21 @@ virCPUarmUpdate(virCPUDefPtr guest, if (!host) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", _("unknown host CPU model")); - goto cleanup; + return -1; } if (!(updated = virCPUDefCopyWithoutModel(guest))) - goto cleanup; + return -1; updated->mode = VIR_CPU_MODE_CUSTOM; if (virCPUDefCopyModel(updated, host, true) < 0) - goto cleanup; + return -1; virCPUDefStealModel(guest, updated, false); guest->mode = VIR_CPU_MODE_CUSTOM; guest->match = VIR_CPU_MATCH_EXACT; - ret = 0; - cleanup: - virCPUDefFree(updated); - return ret; + return 0; } -- 2.26.2

On Fri, May 22, 2020 at 16:56:17 -0300, Daniel Henrique Barboza wrote:
Use automatic cleanup of variables.
Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com> --- src/cpu/cpu_arm.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-)
Reviewed-by: Jiri Denemark <jdenemar@redhat.com>

Use automatic cleanup of variables. Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com> --- src/cpu/cpu_s390.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/src/cpu/cpu_s390.c b/src/cpu/cpu_s390.c index dd030c5a11..c1c5686244 100644 --- a/src/cpu/cpu_s390.c +++ b/src/cpu/cpu_s390.c @@ -45,8 +45,7 @@ static int virCPUs390Update(virCPUDefPtr guest, const virCPUDef *host) { - virCPUDefPtr updated = NULL; - int ret = -1; + g_autoptr(virCPUDef) updated = NULL; size_t i; if (guest->mode == VIR_CPU_MODE_CUSTOM) { @@ -58,37 +57,34 @@ virCPUs390Update(virCPUDefPtr guest, virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", _("optional CPU features are not supported")); } - goto cleanup; + return -1; } if (!host) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", _("unknown host CPU model")); - goto cleanup; + return -1; } if (!(updated = virCPUDefCopyWithoutModel(guest))) - goto cleanup; + return -1; updated->mode = VIR_CPU_MODE_CUSTOM; if (virCPUDefCopyModel(updated, host, true) < 0) - goto cleanup; + return -1; for (i = 0; i < guest->nfeatures; i++) { if (virCPUDefUpdateFeature(updated, guest->features[i].name, guest->features[i].policy) < 0) - goto cleanup; + return -1; } virCPUDefStealModel(guest, updated, false); guest->mode = VIR_CPU_MODE_CUSTOM; guest->match = VIR_CPU_MATCH_EXACT; - ret = 0; - cleanup: - virCPUDefFree(updated); - return ret; + return 0; } -- 2.26.2

On Fri, May 22, 2020 at 16:56:18 -0300, Daniel Henrique Barboza wrote:
Use automatic cleanup of variables.
Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com> --- src/cpu/cpu_s390.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-)
Reviewed-by: Jiri Denemark <jdenemar@redhat.com>

Use automatic cleanup on qemuProcessUpdateCPU and the functions called by it. Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com> --- src/qemu/qemu_process.c | 50 ++++++++++++++--------------------------- 1 file changed, 17 insertions(+), 33 deletions(-) diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 45af8f810c..a1ef1d42b0 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -4187,8 +4187,8 @@ qemuProcessFetchGuestCPU(virQEMUDriverPtr driver, virCPUDataPtr *disabled) { qemuDomainObjPrivatePtr priv = vm->privateData; - virCPUDataPtr dataEnabled = NULL; - virCPUDataPtr dataDisabled = NULL; + g_autoptr(virCPUData) dataEnabled = NULL; + g_autoptr(virCPUData) dataDisabled = NULL; bool generic; int rc; @@ -4201,7 +4201,7 @@ qemuProcessFetchGuestCPU(virQEMUDriverPtr driver, return 0; if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0) - goto error; + return -1; if (generic) { rc = qemuMonitorGetGuestCPU(priv->mon, @@ -4213,19 +4213,14 @@ qemuProcessFetchGuestCPU(virQEMUDriverPtr driver, } if (qemuDomainObjExitMonitor(driver, vm) < 0) - goto error; + return -1; if (rc == -1) - goto error; + return -1; - *enabled = dataEnabled; - *disabled = dataDisabled; + *enabled = g_steal_pointer(&dataEnabled); + *disabled = g_steal_pointer(&dataDisabled); return 0; - - error: - virCPUDataFree(dataEnabled); - virCPUDataFree(dataDisabled); - return -1; } @@ -4261,9 +4256,8 @@ qemuProcessUpdateLiveGuestCPU(virDomainObjPtr vm, { virDomainDefPtr def = vm->def; qemuDomainObjPrivatePtr priv = vm->privateData; - virCPUDefPtr orig = NULL; + g_autoptr(virCPUDef) orig = NULL; int rc; - int ret = -1; if (!enabled) return 0; @@ -4274,10 +4268,10 @@ qemuProcessUpdateLiveGuestCPU(virDomainObjPtr vm, return 0; if (!(orig = virCPUDefCopy(def->cpu))) - goto cleanup; + return -1; if ((rc = virCPUUpdateLive(def->os.arch, def->cpu, enabled, disabled)) < 0) { - goto cleanup; + return -1; } else if (rc == 0) { /* Store the original CPU in priv if QEMU changed it and we didn't * get the original CPU via migration, restore, or snapshot revert. @@ -4288,11 +4282,7 @@ qemuProcessUpdateLiveGuestCPU(virDomainObjPtr vm, def->cpu->check = VIR_CPU_CHECK_FULL; } - ret = 0; - - cleanup: - virCPUDefFree(orig); - return ret; + return 0; } @@ -4351,10 +4341,9 @@ qemuProcessUpdateCPU(virQEMUDriverPtr driver, virDomainObjPtr vm, qemuDomainAsyncJob asyncJob) { - virCPUDataPtr cpu = NULL; - virCPUDataPtr disabled = NULL; + g_autoptr(virCPUData) cpu = NULL; + g_autoptr(virCPUData) disabled = NULL; g_autoptr(virDomainCapsCPUModels) models = NULL; - int ret = -1; /* The host CPU model comes from host caps rather than QEMU caps so * fallback must be allowed no matter what the user specified in the XML. @@ -4362,21 +4351,16 @@ qemuProcessUpdateCPU(virQEMUDriverPtr driver, vm->def->cpu->fallback = VIR_CPU_FALLBACK_ALLOW; if (qemuProcessFetchGuestCPU(driver, vm, asyncJob, &cpu, &disabled) < 0) - goto cleanup; + return -1; if (qemuProcessUpdateLiveGuestCPU(vm, cpu, disabled) < 0) - goto cleanup; + return -1; if (qemuProcessFetchCPUDefinitions(driver, vm, asyncJob, &models) < 0 || virCPUTranslate(vm->def->os.arch, vm->def->cpu, models) < 0) - goto cleanup; - - ret = 0; + return -1; - cleanup: - virCPUDataFree(cpu); - virCPUDataFree(disabled); - return ret; + return 0; } -- 2.26.2

On Fri, May 22, 2020 at 16:56:19 -0300, Daniel Henrique Barboza wrote:
Use automatic cleanup on qemuProcessUpdateCPU and the functions called by it.
Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com> --- src/qemu/qemu_process.c | 50 ++++++++++++++--------------------------- 1 file changed, 17 insertions(+), 33 deletions(-)
Reviewed-by: Jiri Denemark <jdenemar@redhat.com> I pushed patches 1-4.

Commit v3.10.0-182-g237f045d9a ("qemu: Ignore fallback CPU attribute on reconnect") forced CPU 'fallback' to ALLOW, regardless of user choice. This fixed a situation in which guests created with older Libvirt versions, which used CPU mode 'host-model' in runtime, would fail to launch in a newer Libvirt if the fallback was set to FORBID. This would lead to a scenario where the CPU was translated to 'host-model' to 'custom', but then the FORBID setting would make the translation process fail. This fix has a side effect for PSeries guests. PSeries can operate with 'host-model' in runtime due to specific PPC64 mechanics regarding compatibility mode. In fact, the update() implementation of the cpuDriverPPC64 driver is a NO-OP if CPU mode is 'host-model', and the driver does not implement translate(). The result is that PSeries guests aren't affected by the problem, but they are being affected by the fix - users are seeing 'fallback' mode being changed without necessity during daemon restart. All other cpuArchDrivers implements update() and changes guest mode to VIR_CPU_MODE_CUSTOM, meaning that PSeries is currently the only exception to this logic. Let's make it official. https://bugzilla.redhat.com/show_bug.cgi?id=1660711 CC: Jiri Denemark <jdenemar@redhat.com> Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com> --- src/qemu/qemu_process.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index a1ef1d42b0..fec1720f33 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -4347,8 +4347,14 @@ qemuProcessUpdateCPU(virQEMUDriverPtr driver, /* The host CPU model comes from host caps rather than QEMU caps so * fallback must be allowed no matter what the user specified in the XML. + * + * Note: PSeries domains are able to run with host-model CPU by design, + * even on Libvirt newer than 2.3, never replacing host-model with + * custom in the virCPUUpdate() call prior to this function. It is not + * needed to change the user defined 'fallback' attribute in this case. */ - vm->def->cpu->fallback = VIR_CPU_FALLBACK_ALLOW; + if (!qemuDomainIsPSeries(vm->def)) + vm->def->cpu->fallback = VIR_CPU_FALLBACK_ALLOW; if (qemuProcessFetchGuestCPU(driver, vm, asyncJob, &cpu, &disabled) < 0) return -1; -- 2.26.2

On Fri, May 22, 2020 at 16:56:20 -0300, Daniel Henrique Barboza wrote:
Commit v3.10.0-182-g237f045d9a ("qemu: Ignore fallback CPU attribute on reconnect") forced CPU 'fallback' to ALLOW, regardless of user choice. This fixed a situation in which guests created with older Libvirt versions, which used CPU mode 'host-model' in runtime, would fail to launch in a newer Libvirt if the fallback was set to FORBID. This would lead to a scenario where the CPU was translated to 'host-model' to 'custom', but then the FORBID setting would make the translation process fail.
This fix has a side effect for PSeries guests. PSeries can operate with 'host-model' in runtime due to specific PPC64 mechanics regarding compatibility mode. In fact, the update() implementation of the cpuDriverPPC64 driver is a NO-OP if CPU mode is 'host-model', and the driver does not implement translate(). The result is that PSeries guests aren't affected by the problem, but they are being affected by the fix - users are seeing 'fallback' mode being changed without necessity during daemon restart.
All other cpuArchDrivers implements update() and changes guest mode to VIR_CPU_MODE_CUSTOM, meaning that PSeries is currently the only exception to this logic. Let's make it official.
https://bugzilla.redhat.com/show_bug.cgi?id=1660711
CC: Jiri Denemark <jdenemar@redhat.com> Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com> --- src/qemu/qemu_process.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index a1ef1d42b0..fec1720f33 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -4347,8 +4347,14 @@ qemuProcessUpdateCPU(virQEMUDriverPtr driver,
/* The host CPU model comes from host caps rather than QEMU caps so * fallback must be allowed no matter what the user specified in the XML. + * + * Note: PSeries domains are able to run with host-model CPU by design, + * even on Libvirt newer than 2.3, never replacing host-model with + * custom in the virCPUUpdate() call prior to this function. It is not + * needed to change the user defined 'fallback' attribute in this case. */ - vm->def->cpu->fallback = VIR_CPU_FALLBACK_ALLOW; + if (!qemuDomainIsPSeries(vm->def)) + vm->def->cpu->fallback = VIR_CPU_FALLBACK_ALLOW;
if (qemuProcessFetchGuestCPU(driver, vm, asyncJob, &cpu, &disabled) < 0) return -1;
qemuProcessUpdateCPU should not be called at all in this case. The caller (qemuProcessRefreshCPU) is supposed to decide whether the guest CPU needs to be changed: /* If the domain with a host-model CPU was started by an old libvirt * (< 2.3) which didn't replace the CPU with a custom one, let's do it now * since the rest of our code does not really expect a host-model CPU in a * running domain. */ if (vm->def->cpu->mode == VIR_CPU_MODE_HOST_MODEL) { if (!(hostmig = virCPUCopyMigratable(host->arch, host))) return -1; if (!(cpu = virCPUDefCopyWithoutModel(hostmig)) || virCPUDefCopyModelFilter(cpu, hostmig, false, virQEMUCapsCPUFilterFeatures, &host->arch) < 0) return -1; if (virCPUUpdate(vm->def->os.arch, vm->def->cpu, cpu) < 0) return -1; if (qemuProcessUpdateCPU(driver, vm, QEMU_ASYNC_JOB_NONE) < 0) return -1; So better solution would be to move your new comment and check just after the check for VIR_CPU_MODE_HOST_MODEL: if (vm->def->cpu->mode == VIR_CPU_MODE_HOST_MODEL) { /* PSeries... */ if (qemuDomainIsPSeries(vm->def)) return 0; ... Jirka

On 5/25/20 7:38 AM, Jiri Denemark wrote:
On Fri, May 22, 2020 at 16:56:20 -0300, Daniel Henrique Barboza wrote:
Commit v3.10.0-182-g237f045d9a ("qemu: Ignore fallback CPU attribute on reconnect") forced CPU 'fallback' to ALLOW, regardless of user choice. This fixed a situation in which guests created with older Libvirt versions, which used CPU mode 'host-model' in runtime, would fail to launch in a newer Libvirt if the fallback was set to FORBID. This would lead to a scenario where the CPU was translated to 'host-model' to 'custom', but then the FORBID setting would make the translation process fail.
This fix has a side effect for PSeries guests. PSeries can operate with 'host-model' in runtime due to specific PPC64 mechanics regarding compatibility mode. In fact, the update() implementation of the cpuDriverPPC64 driver is a NO-OP if CPU mode is 'host-model', and the driver does not implement translate(). The result is that PSeries guests aren't affected by the problem, but they are being affected by the fix - users are seeing 'fallback' mode being changed without necessity during daemon restart.
All other cpuArchDrivers implements update() and changes guest mode to VIR_CPU_MODE_CUSTOM, meaning that PSeries is currently the only exception to this logic. Let's make it official.
https://bugzilla.redhat.com/show_bug.cgi?id=1660711
CC: Jiri Denemark <jdenemar@redhat.com> Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com> --- src/qemu/qemu_process.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index a1ef1d42b0..fec1720f33 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -4347,8 +4347,14 @@ qemuProcessUpdateCPU(virQEMUDriverPtr driver,
/* The host CPU model comes from host caps rather than QEMU caps so * fallback must be allowed no matter what the user specified in the XML. + * + * Note: PSeries domains are able to run with host-model CPU by design, + * even on Libvirt newer than 2.3, never replacing host-model with + * custom in the virCPUUpdate() call prior to this function. It is not + * needed to change the user defined 'fallback' attribute in this case. */ - vm->def->cpu->fallback = VIR_CPU_FALLBACK_ALLOW; + if (!qemuDomainIsPSeries(vm->def)) + vm->def->cpu->fallback = VIR_CPU_FALLBACK_ALLOW;
if (qemuProcessFetchGuestCPU(driver, vm, asyncJob, &cpu, &disabled) < 0) return -1;
qemuProcessUpdateCPU should not be called at all in this case. The caller (qemuProcessRefreshCPU) is supposed to decide whether the guest CPU needs to be changed:
/* If the domain with a host-model CPU was started by an old libvirt * (< 2.3) which didn't replace the CPU with a custom one, let's do it now * since the rest of our code does not really expect a host-model CPU in a * running domain. */ if (vm->def->cpu->mode == VIR_CPU_MODE_HOST_MODEL) { if (!(hostmig = virCPUCopyMigratable(host->arch, host))) return -1;
if (!(cpu = virCPUDefCopyWithoutModel(hostmig)) || virCPUDefCopyModelFilter(cpu, hostmig, false, virQEMUCapsCPUFilterFeatures, &host->arch) < 0) return -1;
if (virCPUUpdate(vm->def->os.arch, vm->def->cpu, cpu) < 0) return -1;
if (qemuProcessUpdateCPU(driver, vm, QEMU_ASYNC_JOB_NONE) < 0) return -1;
So better solution would be to move your new comment and check just after the check for VIR_CPU_MODE_HOST_MODEL:
if (vm->def->cpu->mode == VIR_CPU_MODE_HOST_MODEL) { /* PSeries... */ if (qemuDomainIsPSeries(vm->def)) return 0;
Got it. I'll also update the commit msg to mention that the problem resides in doing the proper handling in qemuProcessRefreshCPU(), and that your patch in qemuProcessUpdateCPU() I mentioned there simply exposed the problem. Thanks, DHB
...
Jirka
participants (2)
-
Daniel Henrique Barboza
-
Jiri Denemark