[PATCH] bhyve: support NUMA configuration for domains
Bhyve supports NUMA domains configuration using the '-n' command line argument: -n id,size,cpus[,domain_policy] Here, "id" is a numeric NUMA domain id, "size" is the total VM memory size with units format similar to the "-m" switch, "cpus" is a cpuset, and "domain_policy" is an optional domainset(9) memory allocation policy. The "domain_policy" is currently not used by the libvirt driver. This argument is repeated for every NUMA domain to be configured, e.g.: bhyve \ ... -n id=0,size=107,cpus=0-3 -n id=1,size=107,cpus=4-7 To support that: * Add a corresponding capability; it is considered supported if the bhyve binary has the '-n' command line switch. * Generate command line arguments for NUMA from <cpu><numa>..</numa></cpu> domain configuration. Additionally, validate that: * NUMA domains can be only configured with the UEFI loaders. * No more than 8 domains configured per VM as limited by bhyve. Signed-off-by: Roman Bogorodskiy <bogorodskiy@gmail.com> --- src/bhyve/bhyve_capabilities.c | 3 ++ src/bhyve/bhyve_capabilities.h | 1 + src/bhyve/bhyve_command.c | 26 ++++++++++++ src/bhyve/bhyve_domain.c | 19 +++++++++ .../bhyvexml2argv-numa-empty-cpuset.xml | 29 +++++++++++++ .../bhyvexml2argv-numa-too-many-domains.xml | 36 ++++++++++++++++ .../x86_64/bhyvexml2argv-numa.args | 14 +++++++ .../x86_64/bhyvexml2argv-numa.ldargs | 1 + .../x86_64/bhyvexml2argv-numa.xml | 29 +++++++++++++ tests/bhyvexml2argvtest.c | 10 ++++- .../x86_64/bhyvexml2xmlout-numa.xml | 42 +++++++++++++++++++ tests/bhyvexml2xmltest.c | 1 + 12 files changed, 210 insertions(+), 1 deletion(-) create mode 100644 tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa-empty-cpuset.xml create mode 100644 tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa-too-many-domains.xml create mode 100644 tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa.args create mode 100644 tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa.ldargs create mode 100644 tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa.xml create mode 100644 tests/bhyvexml2xmloutdata/x86_64/bhyvexml2xmlout-numa.xml diff --git a/src/bhyve/bhyve_capabilities.c b/src/bhyve/bhyve_capabilities.c index d2a48ed30c..c3fb88fe9f 100644 --- a/src/bhyve/bhyve_capabilities.c +++ b/src/bhyve/bhyve_capabilities.c @@ -258,6 +258,9 @@ bhyveProbeCapsFromHelp(unsigned int *caps, char *binary) if (strstr(help, "-A:") != NULL) *caps |= BHYVE_CAP_ACPI; + if (strstr(help, "-n:") != NULL) + *caps |= BHYVE_CAP_NUMA; + return 0; } diff --git a/src/bhyve/bhyve_capabilities.h b/src/bhyve/bhyve_capabilities.h index d5346df7ba..31fd9ab86a 100644 --- a/src/bhyve/bhyve_capabilities.h +++ b/src/bhyve/bhyve_capabilities.h @@ -56,6 +56,7 @@ typedef enum { BHYVE_CAP_VIRTIO_RND = 1 << 10, BHYVE_CAP_NVME = 1 << 11, BHYVE_CAP_ACPI = 1 << 12, + BHYVE_CAP_NUMA = 1 << 13, } virBhyveCapsFlags; int virBhyveProbeGrubCaps(virBhyveGrubCapsFlags *caps); diff --git a/src/bhyve/bhyve_command.c b/src/bhyve/bhyve_command.c index 37618812bc..931d7dd551 100644 --- a/src/bhyve/bhyve_command.c +++ b/src/bhyve/bhyve_command.c @@ -905,6 +905,7 @@ virBhyveProcessBuildBhyveCmd(struct _bhyveConn *driver, virDomainDef *def, unsigned nusbcontrollers = 0; unsigned nisacontrollers = 0; unsigned nvcpus = virDomainDefGetVcpus(def); + size_t ncells = virDomainNumaGetNodeCount(def->numa); /* CPUs */ virCommandAddArg(cmd, "-c"); @@ -955,6 +956,31 @@ virBhyveProcessBuildBhyveCmd(struct _bhyveConn *driver, virDomainDef *def, } } + /* NUMA */ + if (ncells) { + if (!(bhyveDriverGetBhyveCaps(driver) & BHYVE_CAP_NUMA)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Installed bhyve binary does not support NUMA configuration")); + return NULL; + } + + if (def->os.bootloader || !def->os.loader) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("NUMA configuration is only supported when booting using UEFI")); + return NULL; + } + + for (i = 0; i < ncells; i++) { + unsigned long long memSize = virDomainNumaGetNodeMemorySize(def->numa, i); + virBitmap *cpus = virDomainNumaGetNodeCpumask(def->numa, i); + g_autofree char *cpumask = virBitmapFormat(cpus); + + virCommandAddArg(cmd, "-n"); + virCommandAddArgFormat(cmd, "id=%zu,size=%llu,cpus=%s", i, VIR_DIV_UP(memSize, 1024), + cpumask); + } + } + /* Memory */ virCommandAddArg(cmd, "-m"); virCommandAddArgFormat(cmd, "%llu", diff --git a/src/bhyve/bhyve_domain.c b/src/bhyve/bhyve_domain.c index 85960c6e12..4594d7673f 100644 --- a/src/bhyve/bhyve_domain.c +++ b/src/bhyve/bhyve_domain.c @@ -411,6 +411,7 @@ bhyveDomainDefValidate(const virDomainDef *def, void *parseOpaque G_GNUC_UNUSED) { size_t i; + size_t ncells; virStorageSource *src = NULL; g_autoptr(GHashTable) nvme_controllers = g_hash_table_new(g_direct_hash, g_direct_equal); @@ -445,6 +446,24 @@ bhyveDomainDefValidate(const virDomainDef *def, return -1; } + ncells = virDomainNumaGetNodeCount(def->numa); + if (ncells) { + if (ncells > 8) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Only up to 8 NUMA domains are supported")); + return -1; + } + + for (i = 0; i < ncells; i++) { + if (!virDomainNumaGetNodeCpumask(def->numa, i)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("NUMA domain id %1$zu: empty cpusets are not allowed"), + i); + return -1; + } + } + } + if (!def->os.loader) return 0; diff --git a/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa-empty-cpuset.xml b/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa-empty-cpuset.xml new file mode 100644 index 0000000000..9a5fc282ba --- /dev/null +++ b/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa-empty-cpuset.xml @@ -0,0 +1,29 @@ +<domain type='bhyve'> + <name>bhyve</name> + <uuid>df3be7e7-a104-11e3-aeb0-50e5492bd3dc</uuid> + <memory>219136</memory> + <vcpu>8</vcpu> + <os firmware='efi'> + <type>hvm</type> + </os> + <cpu> + <numa> + <cell id='0' cpus='0-3' memory='109568' unit='KiB'/> + <cell id='1' memory='109568' unit='KiB'/> + </numa> + </cpu> + <devices> + <disk type='file'> + <driver name='file' type='raw'/> + <source file='/tmp/freebsd.img'/> + <target dev='hda' bus='sata'/> + <address type='drive' controller='0' bus='0' target='2' unit='0'/> + </disk> + <interface type='bridge'> + <mac address='52:54:00:b9:94:02'/> + <model type='virtio'/> + <source bridge="virbr0"/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> + </interface> + </devices> +</domain> diff --git a/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa-too-many-domains.xml b/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa-too-many-domains.xml new file mode 100644 index 0000000000..bcabe5cd85 --- /dev/null +++ b/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa-too-many-domains.xml @@ -0,0 +1,36 @@ +<domain type='bhyve'> + <name>bhyve</name> + <uuid>df3be7e7-a104-11e3-aeb0-50e5492bd3dc</uuid> + <memory>876544</memory> + <vcpu>9</vcpu> + <os firmware='efi'> + <type>hvm</type> + </os> + <cpu> + <numa> + <cell id='0' cpus='0' memory='109568' unit='KiB'/> + <cell id='1' cpus='1' memory='109568' unit='KiB'/> + <cell id='2' cpus='2' memory='109568' unit='KiB'/> + <cell id='3' cpus='3' memory='109568' unit='KiB'/> + <cell id='4' cpus='4' memory='109568' unit='KiB'/> + <cell id='5' cpus='5' memory='109568' unit='KiB'/> + <cell id='6' cpus='6' memory='109568' unit='KiB'/> + <cell id='7' cpus='7' memory='109568' unit='KiB'/> + <cell id='8' cpus='8' memory='109568' unit='KiB'/> + </numa> + </cpu> + <devices> + <disk type='file'> + <driver name='file' type='raw'/> + <source file='/tmp/freebsd.img'/> + <target dev='hda' bus='sata'/> + <address type='drive' controller='0' bus='0' target='2' unit='0'/> + </disk> + <interface type='bridge'> + <mac address='52:54:00:b9:94:02'/> + <model type='virtio'/> + <source bridge="virbr0"/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> + </interface> + </devices> +</domain> diff --git a/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa.args b/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa.args new file mode 100644 index 0000000000..15efd1c357 --- /dev/null +++ b/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa.args @@ -0,0 +1,14 @@ +bhyve \ +-c 8 \ +-n id=0,size=107,cpus=0-3 \ +-n id=1,size=107,cpus=4-7 \ +-m 214 \ +-u \ +-H \ +-P \ +-s 0:0,hostbridge \ +-l bootrom,fakefirmwaredir/BHYVE_UEFI.fd,fakenvramdir/bhyve_VARS.fd \ +-s 1:0,lpc \ +-s 2:0,ahci,hd:/tmp/freebsd.img \ +-s 3:0,virtio-net,faketapdev,mac=52:54:00:b9:94:02 \ +bhyve diff --git a/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa.ldargs b/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa.ldargs new file mode 100644 index 0000000000..421376db9e --- /dev/null +++ b/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa.ldargs @@ -0,0 +1 @@ +dummy diff --git a/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa.xml b/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa.xml new file mode 100644 index 0000000000..8a0da7830d --- /dev/null +++ b/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa.xml @@ -0,0 +1,29 @@ +<domain type='bhyve'> + <name>bhyve</name> + <uuid>df3be7e7-a104-11e3-aeb0-50e5492bd3dc</uuid> + <memory>219136</memory> + <vcpu>8</vcpu> + <os firmware='efi'> + <type>hvm</type> + </os> + <cpu> + <numa> + <cell id='0' cpus='0-3' memory='109568' unit='KiB'/> + <cell id='1' cpus='4-7' memory='109568' unit='KiB'/> + </numa> + </cpu> + <devices> + <disk type='file'> + <driver name='file' type='raw'/> + <source file='/tmp/freebsd.img'/> + <target dev='hda' bus='sata'/> + <address type='drive' controller='0' bus='0' target='2' unit='0'/> + </disk> + <interface type='bridge'> + <mac address='52:54:00:b9:94:02'/> + <model type='virtio'/> + <source bridge="virbr0"/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> + </interface> + </devices> +</domain> diff --git a/tests/bhyvexml2argvtest.c b/tests/bhyvexml2argvtest.c index 2330e70bbf..b7749fec6f 100644 --- a/tests/bhyvexml2argvtest.c +++ b/tests/bhyvexml2argvtest.c @@ -202,7 +202,7 @@ mymain(void) BHYVE_CAP_FBUF | BHYVE_CAP_XHCI | \ BHYVE_CAP_CPUTOPOLOGY | BHYVE_CAP_SOUND_HDA | \ BHYVE_CAP_VNC_PASSWORD | BHYVE_CAP_VIRTIO_9P | \ - BHYVE_CAP_NVME; + BHYVE_CAP_NVME | BHYVE_CAP_NUMA; DO_TEST("base"); DO_TEST("wired"); @@ -254,6 +254,11 @@ mymain(void) DO_TEST("isa-controller"); DO_TEST_FAILURE("isa-multiple-controllers"); DO_TEST("firmware-efi"); + DO_TEST("numa"); + DO_TEST_FAILURE("numa-empty-cpuset"); + DO_TEST_FAILURE("numa-too-many-domains"); + driver.bhyvecaps &= ~BHYVE_CAP_NUMA; + DO_TEST_FAILURE("numa"); fakefirmwaredir = g_steal_pointer(&driver.config->firmwareDir); driver.config->firmwareDir = g_steal_pointer(&fakefirmwareemptydir); DO_TEST_PREPARE_ERROR("firmware-efi"); @@ -345,10 +350,13 @@ mymain(void) driver.caps = virBhyveCapsBuild(); /* bhyve does not support UTC clock on ARM */ driver.bhyvecaps ^= BHYVE_CAP_RTC_UTC; + /* bhyve does not support NUMA on ARM */ + driver.bhyvecaps &= ~BHYVE_CAP_NUMA; DO_TEST("base"); DO_TEST("console"); DO_TEST("bootloader"); + DO_TEST_FAILURE("numa"); virObjectUnref(driver.caps); virObjectUnref(driver.xmlopt); diff --git a/tests/bhyvexml2xmloutdata/x86_64/bhyvexml2xmlout-numa.xml b/tests/bhyvexml2xmloutdata/x86_64/bhyvexml2xmlout-numa.xml new file mode 100644 index 0000000000..ecc147db78 --- /dev/null +++ b/tests/bhyvexml2xmloutdata/x86_64/bhyvexml2xmlout-numa.xml @@ -0,0 +1,42 @@ +<domain type='bhyve'> + <name>bhyve</name> + <uuid>df3be7e7-a104-11e3-aeb0-50e5492bd3dc</uuid> + <memory unit='KiB'>219136</memory> + <currentMemory unit='KiB'>219136</currentMemory> + <vcpu placement='static'>8</vcpu> + <os firmware='efi'> + <type arch='x86_64'>hvm</type> + <boot dev='hd'/> + </os> + <cpu> + <numa> + <cell id='0' cpus='0-3' memory='109568' unit='KiB'/> + <cell id='1' cpus='4-7' memory='109568' unit='KiB'/> + </numa> + </cpu> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <disk type='file' device='disk'> + <driver name='file' type='raw'/> + <source file='/tmp/freebsd.img'/> + <target dev='hda' bus='sata'/> + <address type='drive' controller='0' bus='0' target='2' unit='0'/> + </disk> + <controller type='pci' index='0' model='pci-root'/> + <controller type='isa' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x0'/> + </controller> + <controller type='sata' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/> + </controller> + <interface type='bridge'> + <mac address='52:54:00:b9:94:02'/> + <source bridge='virbr0'/> + <model type='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> + </interface> + </devices> +</domain> diff --git a/tests/bhyvexml2xmltest.c b/tests/bhyvexml2xmltest.c index 7f9de2bc36..950aaea672 100644 --- a/tests/bhyvexml2xmltest.c +++ b/tests/bhyvexml2xmltest.c @@ -132,6 +132,7 @@ mymain(void) DO_TEST_DIFFERENT("passthru-multiple-devs"); DO_TEST_DIFFERENT("slirp"); DO_TEST_DIFFERENT("virtio-scsi"); + DO_TEST_DIFFERENT("numa"); /* Address allocation tests */ DO_TEST_DIFFERENT("addr-single-sata-disk"); -- 2.52.0
participants (1)
-
Roman Bogorodskiy