[libvirt PATCH 0/4] increase locked memory limit when a vDPA device is present

We recently learned that vDPA devices may/will need to pin/lock all of guest memory, and so just as we do for VFIO devices, we need to increase the locked memory limit when a vDPA device is present. https://bugzilla.redhat.com/1939776 Laine Stump (4): conf: new function virDomainDefHasVDPANet() qemu: simplify qemuDomainGetMemLockLimitBytes() qemu: account for mdev devices in getPPC64MemLockLimitBytes() qemu: increase locked memory limit when a vDPA device is present src/conf/domain_conf.c | 14 ++++++++++++++ src/conf/domain_conf.h | 3 +++ src/libvirt_private.syms | 1 + src/qemu/qemu_domain.c | 26 +++----------------------- 4 files changed, 21 insertions(+), 23 deletions(-) -- 2.30.2

This function returns true if the domain has any interfaces that are type='vdpa'. Signed-off-by: Laine Stump <laine@redhat.com> --- src/conf/domain_conf.c | 14 ++++++++++++++ src/conf/domain_conf.h | 3 +++ src/libvirt_private.syms | 1 + 3 files changed, 18 insertions(+) diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index f071bf93d0..736e9de3c8 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -32500,6 +32500,20 @@ virDomainDefHasMdevHostdev(const virDomainDef *def) } +bool +virDomainDefHasVDPANet(const virDomainDef *def) +{ + size_t i; + + for (i = 0; i < def->nnets; i++) { + if (virDomainNetGetActualType(def->nets[i]) == VIR_DOMAIN_NET_TYPE_VDPA) + return true; + } + + return false; +} + + bool virDomainDefHasOldStyleUEFI(const virDomainDef *def) { diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index da32016b01..aa15184821 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -4061,6 +4061,9 @@ virDomainDefHasVFIOHostdev(const virDomainDef *def); bool virDomainDefHasMdevHostdev(const virDomainDef *def); +bool +virDomainDefHasVDPANet(const virDomainDef *def); + bool virDomainDefHasOldStyleUEFI(const virDomainDef *def); diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 526dcee11a..1bf8165520 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -332,6 +332,7 @@ virDomainDefHasOldStyleROUEFI; virDomainDefHasOldStyleUEFI; virDomainDefHasUSB; virDomainDefHasVcpusOffline; +virDomainDefHasVDPANet; virDomainDefHasVFIOHostdev; virDomainDefLifecycleActionAllowed; virDomainDefMaybeAddController; -- 2.30.2

On 3/23/21 3:50 PM, Laine Stump wrote:
This function returns true if the domain has any interfaces that are type='vdpa'.
Signed-off-by: Laine Stump <laine@redhat.com> ---
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
src/conf/domain_conf.c | 14 ++++++++++++++ src/conf/domain_conf.h | 3 +++ src/libvirt_private.syms | 1 + 3 files changed, 18 insertions(+)
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index f071bf93d0..736e9de3c8 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -32500,6 +32500,20 @@ virDomainDefHasMdevHostdev(const virDomainDef *def) }
+bool +virDomainDefHasVDPANet(const virDomainDef *def) +{ + size_t i; + + for (i = 0; i < def->nnets; i++) { + if (virDomainNetGetActualType(def->nets[i]) == VIR_DOMAIN_NET_TYPE_VDPA) + return true; + } + + return false; +} + + bool virDomainDefHasOldStyleUEFI(const virDomainDef *def) { diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index da32016b01..aa15184821 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -4061,6 +4061,9 @@ virDomainDefHasVFIOHostdev(const virDomainDef *def); bool virDomainDefHasMdevHostdev(const virDomainDef *def);
+bool +virDomainDefHasVDPANet(const virDomainDef *def); + bool virDomainDefHasOldStyleUEFI(const virDomainDef *def);
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 526dcee11a..1bf8165520 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -332,6 +332,7 @@ virDomainDefHasOldStyleROUEFI; virDomainDefHasOldStyleUEFI; virDomainDefHasUSB; virDomainDefHasVcpusOffline; +virDomainDefHasVDPANet; virDomainDefHasVFIOHostdev; virDomainDefLifecycleActionAllowed; virDomainDefMaybeAddController;

This function goes through a loop checking if each hostdev is a VFIO or mdev device, and then later it calls virDomainDefHasNVMEDisk(). The function qemuDomainNeedsVFIO() does exactly the same thing, so let's just call that instead. Signed-off-by: Laine Stump <laine@redhat.com> --- src/qemu/qemu_domain.c | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 5f0c7f0531..5238a52095 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -9258,8 +9258,6 @@ qemuDomainGetMemLockLimitBytes(virDomainDefPtr def, bool forceVFIO) { unsigned long long memKB = 0; - bool usesVFIO = false; - size_t i; /* prefer the hard limit */ if (virMemoryLimitIsSet(def->mem.hard_limit)) { @@ -9296,20 +9294,7 @@ qemuDomainGetMemLockLimitBytes(virDomainDefPtr def, * * Note that this may not be valid for all platforms. */ - if (!forceVFIO) { - for (i = 0; i < def->nhostdevs; i++) { - if (virHostdevIsVFIODevice(def->hostdevs[i]) || - virHostdevIsMdevDevice(def->hostdevs[i])) { - usesVFIO = true; - break; - } - } - - if (virDomainDefHasNVMeDisk(def)) - usesVFIO = true; - } - - if (usesVFIO || forceVFIO) + if (forceVFIO || qemuDomainNeedsVFIO(def)) memKB = virDomainDefGetMemoryTotal(def) + 1024 * 1024; return memKB << 10; -- 2.30.2

On 3/23/21 3:50 PM, Laine Stump wrote:
This function goes through a loop checking if each hostdev is a VFIO or mdev device, and then later it calls virDomainDefHasNVMEDisk(). The function qemuDomainNeedsVFIO() does exactly the same thing, so let's just call that instead.
Signed-off-by: Laine Stump <laine@redhat.com> ---
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
src/qemu/qemu_domain.c | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-)
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 5f0c7f0531..5238a52095 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -9258,8 +9258,6 @@ qemuDomainGetMemLockLimitBytes(virDomainDefPtr def, bool forceVFIO) { unsigned long long memKB = 0; - bool usesVFIO = false; - size_t i;
/* prefer the hard limit */ if (virMemoryLimitIsSet(def->mem.hard_limit)) { @@ -9296,20 +9294,7 @@ qemuDomainGetMemLockLimitBytes(virDomainDefPtr def, * * Note that this may not be valid for all platforms. */ - if (!forceVFIO) { - for (i = 0; i < def->nhostdevs; i++) { - if (virHostdevIsVFIODevice(def->hostdevs[i]) || - virHostdevIsMdevDevice(def->hostdevs[i])) { - usesVFIO = true; - break; - } - } - - if (virDomainDefHasNVMeDisk(def)) - usesVFIO = true; - } - - if (usesVFIO || forceVFIO) + if (forceVFIO || qemuDomainNeedsVFIO(def)) memKB = virDomainDefGetMemoryTotal(def) + 1024 * 1024;
return memKB << 10;

This function is a specialized version of qemuDomainGetMemLockLimitBytes() for PPC64. Simplifying it in the same manner as the previous patch has the nice side effect of accounting for the possibility of an mdev device in the config. (I don't know if mdev devices are supported on PPC, but even if not then a) the additional check for mdev devices gained by using qemuDomainNeedsVFIO() in place of open coding will be an effective NOP, and b) if mdev devices are supported on PPC64 in the future, this function will be prepared for it). Signed-off-by: Laine Stump <laine@redhat.com> --- src/qemu/qemu_domain.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 5238a52095..e0379e81ba 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -9122,7 +9122,6 @@ getPPC64MemLockLimitBytes(virDomainDefPtr def, unsigned long long passthroughLimit = 0; size_t i, nPCIHostBridges = 0; virPCIDeviceAddressPtr pciAddr; - bool usesVFIO = false; bool nvlink2Capable = false; for (i = 0; i < def->ncontrollers; i++) { @@ -9138,7 +9137,6 @@ getPPC64MemLockLimitBytes(virDomainDefPtr def, virDomainHostdevDefPtr dev = def->hostdevs[i]; if (virHostdevIsVFIODevice(dev)) { - usesVFIO = true; pciAddr = &dev->source.subsys.u.pci.addr; if (virPCIDeviceAddressIsValid(pciAddr, false)) { @@ -9153,9 +9151,6 @@ getPPC64MemLockLimitBytes(virDomainDefPtr def, } } - if (virDomainDefHasNVMeDisk(def)) - usesVFIO = true; - memory = virDomainDefGetMemoryTotal(def); if (def->mem.max_memory) @@ -9180,7 +9175,7 @@ getPPC64MemLockLimitBytes(virDomainDefPtr def, 8192; /* NVLink2 support in QEMU is a special case of the passthrough - * mechanics explained in the usesVFIO case below. The GPU RAM + * mechanics explained in the forceVFIO case below. The GPU RAM * is placed with a gap after maxMemory. The current QEMU * implementation puts the NVIDIA RAM above the PCI MMIO, which * starts at 32TiB and is the MMIO reserved for the guest main RAM. @@ -9204,7 +9199,7 @@ getPPC64MemLockLimitBytes(virDomainDefPtr def, passthroughLimit = maxMemory + 128 * (1ULL<<30) / 512 * nPCIHostBridges + 8192; - } else if (usesVFIO || forceVFIO) { + } else if (forceVFIO || qemuDomainNeedsVFIO(def)) { /* For regular (non-NVLink2 present) VFIO passthrough, the value * of passthroughLimit is: * -- 2.30.2

On 3/23/21 3:50 PM, Laine Stump wrote:
This function is a specialized version of qemuDomainGetMemLockLimitBytes() for PPC64. Simplifying it in the same manner as the previous patch has the nice side effect of accounting for the possibility of an mdev device in the config.
(I don't know if mdev devices are supported on PPC, but even if not then a) the additional check for mdev devices gained by using qemuDomainNeedsVFIO() in place of open coding will be an effective NOP, and b) if mdev devices are supported on PPC64 in the future, this function will be prepared for it).
PowerPC guest does not support mdev. I agree that there is no harm in considering that this might change in the future though. One less piece of code to change is always welcome. Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
Signed-off-by: Laine Stump <laine@redhat.com> --- src/qemu/qemu_domain.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-)
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 5238a52095..e0379e81ba 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -9122,7 +9122,6 @@ getPPC64MemLockLimitBytes(virDomainDefPtr def, unsigned long long passthroughLimit = 0; size_t i, nPCIHostBridges = 0; virPCIDeviceAddressPtr pciAddr; - bool usesVFIO = false; bool nvlink2Capable = false;
for (i = 0; i < def->ncontrollers; i++) { @@ -9138,7 +9137,6 @@ getPPC64MemLockLimitBytes(virDomainDefPtr def, virDomainHostdevDefPtr dev = def->hostdevs[i];
if (virHostdevIsVFIODevice(dev)) { - usesVFIO = true;
pciAddr = &dev->source.subsys.u.pci.addr; if (virPCIDeviceAddressIsValid(pciAddr, false)) { @@ -9153,9 +9151,6 @@ getPPC64MemLockLimitBytes(virDomainDefPtr def, } }
- if (virDomainDefHasNVMeDisk(def)) - usesVFIO = true; - memory = virDomainDefGetMemoryTotal(def);
if (def->mem.max_memory) @@ -9180,7 +9175,7 @@ getPPC64MemLockLimitBytes(virDomainDefPtr def, 8192;
/* NVLink2 support in QEMU is a special case of the passthrough - * mechanics explained in the usesVFIO case below. The GPU RAM + * mechanics explained in the forceVFIO case below. The GPU RAM * is placed with a gap after maxMemory. The current QEMU * implementation puts the NVIDIA RAM above the PCI MMIO, which * starts at 32TiB and is the MMIO reserved for the guest main RAM. @@ -9204,7 +9199,7 @@ getPPC64MemLockLimitBytes(virDomainDefPtr def, passthroughLimit = maxMemory + 128 * (1ULL<<30) / 512 * nPCIHostBridges + 8192; - } else if (usesVFIO || forceVFIO) { + } else if (forceVFIO || qemuDomainNeedsVFIO(def)) { /* For regular (non-NVLink2 present) VFIO passthrough, the value * of passthroughLimit is: *

Just like VFIO devices, vDPA devices may need to have all guest memory pages locked/pinned in order to operate properly. In the case of VFIO devices (including mdev and NVME, which also use VFIO) libvirt automatically increases the locked memory limit when one of those devices is present. This patch modifies that code to also increase the limit if there are any vDPA devices present. Resolves: https://bugzilla.redhat.com/1939776 Signed-off-by: Laine Stump <laine@redhat.com> --- src/qemu/qemu_domain.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index e0379e81ba..76e8903dbc 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -9199,7 +9199,7 @@ getPPC64MemLockLimitBytes(virDomainDefPtr def, passthroughLimit = maxMemory + 128 * (1ULL<<30) / 512 * nPCIHostBridges + 8192; - } else if (forceVFIO || qemuDomainNeedsVFIO(def)) { + } else if (forceVFIO || qemuDomainNeedsVFIO(def) || virDomainDefHasVDPANet(def)) { /* For regular (non-NVLink2 present) VFIO passthrough, the value * of passthroughLimit is: * @@ -9289,7 +9289,7 @@ qemuDomainGetMemLockLimitBytes(virDomainDefPtr def, * * Note that this may not be valid for all platforms. */ - if (forceVFIO || qemuDomainNeedsVFIO(def)) + if (forceVFIO || qemuDomainNeedsVFIO(def) || virDomainDefHasVDPANet(def)) memKB = virDomainDefGetMemoryTotal(def) + 1024 * 1024; return memKB << 10; -- 2.30.2

On 3/23/21 3:50 PM, Laine Stump wrote:
Just like VFIO devices, vDPA devices may need to have all guest memory pages locked/pinned in order to operate properly. In the case of VFIO devices (including mdev and NVME, which also use VFIO) libvirt automatically increases the locked memory limit when one of those devices is present. This patch modifies that code to also increase the limit if there are any vDPA devices present.
Resolves: https://bugzilla.redhat.com/1939776 Signed-off-by: Laine Stump <laine@redhat.com> ---
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
src/qemu/qemu_domain.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index e0379e81ba..76e8903dbc 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -9199,7 +9199,7 @@ getPPC64MemLockLimitBytes(virDomainDefPtr def, passthroughLimit = maxMemory + 128 * (1ULL<<30) / 512 * nPCIHostBridges + 8192; - } else if (forceVFIO || qemuDomainNeedsVFIO(def)) { + } else if (forceVFIO || qemuDomainNeedsVFIO(def) || virDomainDefHasVDPANet(def)) { /* For regular (non-NVLink2 present) VFIO passthrough, the value * of passthroughLimit is: * @@ -9289,7 +9289,7 @@ qemuDomainGetMemLockLimitBytes(virDomainDefPtr def, * * Note that this may not be valid for all platforms. */ - if (forceVFIO || qemuDomainNeedsVFIO(def)) + if (forceVFIO || qemuDomainNeedsVFIO(def) || virDomainDefHasVDPANet(def)) memKB = virDomainDefGetMemoryTotal(def) + 1024 * 1024;
return memKB << 10;
participants (2)
-
Daniel Henrique Barboza
-
Laine Stump