Bhyve supports NUMA domains configuration using the '-n'
command line argument:
-n id,size,cpus[,domain_policy]
Here, "id" is a numeric NUMA domain id, "size" is the total VM
memory size with units format similar to the "-m" switch,
"cpus" is a cpuset, and "domain_policy" is an optional
domainset(9) memory allocation policy. The "domain_policy"
is currently not used by the libvirt driver.
This argument is repeated for every NUMA domain to be configured, e.g.:
bhyve \
...
-n id=0,size=107,cpus=0-3
-n id=1,size=107,cpus=4-7
To support that:
* Add a corresponding capability; it is considered supported
if the bhyve binary has the '-n' command line switch.
* Generate command line arguments for NUMA from
<cpu><numa>..</numa></cpu> domain configuration.
Additionally, validate that:
* NUMA domains can be only configured with the UEFI loaders.
* No more than 8 domains configured per VM as limited by bhyve.
Signed-off-by: Roman Bogorodskiy <bogorodskiy(a)gmail.com>
---
src/bhyve/bhyve_capabilities.c | 3 ++
src/bhyve/bhyve_capabilities.h | 1 +
src/bhyve/bhyve_command.c | 26 ++++++++++++
src/bhyve/bhyve_domain.c | 19 +++++++++
.../bhyvexml2argv-numa-empty-cpuset.xml | 29 +++++++++++++
.../bhyvexml2argv-numa-too-many-domains.xml | 36 ++++++++++++++++
.../x86_64/bhyvexml2argv-numa.args | 14 +++++++
.../x86_64/bhyvexml2argv-numa.ldargs | 1 +
.../x86_64/bhyvexml2argv-numa.xml | 29 +++++++++++++
tests/bhyvexml2argvtest.c | 10 ++++-
.../x86_64/bhyvexml2xmlout-numa.xml | 42 +++++++++++++++++++
tests/bhyvexml2xmltest.c | 1 +
12 files changed, 210 insertions(+), 1 deletion(-)
create mode 100644 tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa-empty-cpuset.xml
create mode 100644 tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa-too-many-domains.xml
create mode 100644 tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa.args
create mode 100644 tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa.ldargs
create mode 100644 tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa.xml
create mode 100644 tests/bhyvexml2xmloutdata/x86_64/bhyvexml2xmlout-numa.xml
diff --git a/src/bhyve/bhyve_capabilities.c b/src/bhyve/bhyve_capabilities.c
index d2a48ed30c..c3fb88fe9f 100644
--- a/src/bhyve/bhyve_capabilities.c
+++ b/src/bhyve/bhyve_capabilities.c
@@ -258,6 +258,9 @@ bhyveProbeCapsFromHelp(unsigned int *caps, char *binary)
if (strstr(help, "-A:") != NULL)
*caps |= BHYVE_CAP_ACPI;
+ if (strstr(help, "-n:") != NULL)
+ *caps |= BHYVE_CAP_NUMA;
+
return 0;
}
diff --git a/src/bhyve/bhyve_capabilities.h b/src/bhyve/bhyve_capabilities.h
index d5346df7ba..31fd9ab86a 100644
--- a/src/bhyve/bhyve_capabilities.h
+++ b/src/bhyve/bhyve_capabilities.h
@@ -56,6 +56,7 @@ typedef enum {
BHYVE_CAP_VIRTIO_RND = 1 << 10,
BHYVE_CAP_NVME = 1 << 11,
BHYVE_CAP_ACPI = 1 << 12,
+ BHYVE_CAP_NUMA = 1 << 13,
} virBhyveCapsFlags;
int virBhyveProbeGrubCaps(virBhyveGrubCapsFlags *caps);
diff --git a/src/bhyve/bhyve_command.c b/src/bhyve/bhyve_command.c
index 37618812bc..931d7dd551 100644
--- a/src/bhyve/bhyve_command.c
+++ b/src/bhyve/bhyve_command.c
@@ -905,6 +905,7 @@ virBhyveProcessBuildBhyveCmd(struct _bhyveConn *driver, virDomainDef *def,
unsigned nusbcontrollers = 0;
unsigned nisacontrollers = 0;
unsigned nvcpus = virDomainDefGetVcpus(def);
+ size_t ncells = virDomainNumaGetNodeCount(def->numa);
/* CPUs */
virCommandAddArg(cmd, "-c");
@@ -955,6 +956,31 @@ virBhyveProcessBuildBhyveCmd(struct _bhyveConn *driver, virDomainDef *def,
}
}
+ /* NUMA */
+ if (ncells) {
+ if (!(bhyveDriverGetBhyveCaps(driver) & BHYVE_CAP_NUMA)) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("Installed bhyve binary does not support NUMA configuration"));
+ return NULL;
+ }
+
+ if (def->os.bootloader || !def->os.loader) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("NUMA configuration is only supported when booting using UEFI"));
+ return NULL;
+ }
+
+ for (i = 0; i < ncells; i++) {
+ unsigned long long memSize = virDomainNumaGetNodeMemorySize(def->numa, i);
+ virBitmap *cpus = virDomainNumaGetNodeCpumask(def->numa, i);
+ g_autofree char *cpumask = virBitmapFormat(cpus);
+
+ virCommandAddArg(cmd, "-n");
+ virCommandAddArgFormat(cmd, "id=%zu,size=%llu,cpus=%s", i, VIR_DIV_UP(memSize, 1024),
+ cpumask);
+ }
+ }
+
/* Memory */
virCommandAddArg(cmd, "-m");
virCommandAddArgFormat(cmd, "%llu",
diff --git a/src/bhyve/bhyve_domain.c b/src/bhyve/bhyve_domain.c
index 85960c6e12..4594d7673f 100644
--- a/src/bhyve/bhyve_domain.c
+++ b/src/bhyve/bhyve_domain.c
@@ -411,6 +411,7 @@ bhyveDomainDefValidate(const virDomainDef *def,
void *parseOpaque G_GNUC_UNUSED)
{
size_t i;
+ size_t ncells;
virStorageSource *src = NULL;
g_autoptr(GHashTable) nvme_controllers = g_hash_table_new(g_direct_hash,
g_direct_equal);
@@ -445,6 +446,24 @@ bhyveDomainDefValidate(const virDomainDef *def,
return -1;
}
+ ncells = virDomainNumaGetNodeCount(def->numa);
+ if (ncells) {
+ if (ncells > 8) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("Only up to 8 NUMA domains are supported"));
+ return -1;
+ }
+
+ for (i = 0; i < ncells; i++) {
+ if (!virDomainNumaGetNodeCpumask(def->numa, i)) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("NUMA domain id %1$zu: empty cpusets are not allowed"),
+ i);
+ return -1;
+ }
+ }
+ }
+
if (!def->os.loader)
return 0;
diff --git a/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa-empty-cpuset.xml b/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa-empty-cpuset.xml
new file mode 100644
index 0000000000..9a5fc282ba
--- /dev/null
+++ b/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa-empty-cpuset.xml
@@ -0,0 +1,29 @@
+<domain type='bhyve'>
+ <name>bhyve</name>
+ <uuid>df3be7e7-a104-11e3-aeb0-50e5492bd3dc</uuid>
+ <memory>219136</memory>
+ <vcpu>8</vcpu>
+ <os firmware='efi'>
+ <type>hvm</type>
+ </os>
+ <cpu>
+ <numa>
+ <cell id='0' cpus='0-3' memory='109568' unit='KiB'/>
+ <cell id='1' memory='109568' unit='KiB'/>
+ </numa>
+ </cpu>
+ <devices>
+ <disk type='file'>
+ <driver name='file' type='raw'/>
+ <source file='/tmp/freebsd.img'/>
+ <target dev='hda' bus='sata'/>
+ <address type='drive' controller='0' bus='0' target='2' unit='0'/>
+ </disk>
+ <interface type='bridge'>
+ <mac address='52:54:00:b9:94:02'/>
+ <model type='virtio'/>
+ <source bridge="virbr0"/>
+ <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/>
+ </interface>
+ </devices>
+</domain>
diff --git a/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa-too-many-domains.xml b/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa-too-many-domains.xml
new file mode 100644
index 0000000000..bcabe5cd85
--- /dev/null
+++ b/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa-too-many-domains.xml
@@ -0,0 +1,36 @@
+<domain type='bhyve'>
+ <name>bhyve</name>
+ <uuid>df3be7e7-a104-11e3-aeb0-50e5492bd3dc</uuid>
+ <memory>876544</memory>
+ <vcpu>9</vcpu>
+ <os firmware='efi'>
+ <type>hvm</type>
+ </os>
+ <cpu>
+ <numa>
+ <cell id='0' cpus='0' memory='109568' unit='KiB'/>
+ <cell id='1' cpus='1' memory='109568' unit='KiB'/>
+ <cell id='2' cpus='2' memory='109568' unit='KiB'/>
+ <cell id='3' cpus='3' memory='109568' unit='KiB'/>
+ <cell id='4' cpus='4' memory='109568' unit='KiB'/>
+ <cell id='5' cpus='5' memory='109568' unit='KiB'/>
+ <cell id='6' cpus='6' memory='109568' unit='KiB'/>
+ <cell id='7' cpus='7' memory='109568' unit='KiB'/>
+ <cell id='8' cpus='8' memory='109568' unit='KiB'/>
+ </numa>
+ </cpu>
+ <devices>
+ <disk type='file'>
+ <driver name='file' type='raw'/>
+ <source file='/tmp/freebsd.img'/>
+ <target dev='hda' bus='sata'/>
+ <address type='drive' controller='0' bus='0' target='2' unit='0'/>
+ </disk>
+ <interface type='bridge'>
+ <mac address='52:54:00:b9:94:02'/>
+ <model type='virtio'/>
+ <source bridge="virbr0"/>
+ <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/>
+ </interface>
+ </devices>
+</domain>
diff --git a/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa.args b/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa.args
new file mode 100644
index 0000000000..15efd1c357
--- /dev/null
+++ b/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa.args
@@ -0,0 +1,14 @@
+bhyve \
+-c 8 \
+-n id=0,size=107,cpus=0-3 \
+-n id=1,size=107,cpus=4-7 \
+-m 214 \
+-u \
+-H \
+-P \
+-s 0:0,hostbridge \
+-l bootrom,fakefirmwaredir/BHYVE_UEFI.fd,fakenvramdir/bhyve_VARS.fd \
+-s 1:0,lpc \
+-s 2:0,ahci,hd:/tmp/freebsd.img \
+-s 3:0,virtio-net,faketapdev,mac=52:54:00:b9:94:02 \
+bhyve
diff --git a/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa.ldargs b/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa.ldargs
new file mode 100644
index 0000000000..421376db9e
--- /dev/null
+++ b/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa.ldargs
@@ -0,0 +1 @@
+dummy
diff --git a/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa.xml b/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa.xml
new file mode 100644
index 0000000000..8a0da7830d
--- /dev/null
+++ b/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa.xml
@@ -0,0 +1,29 @@
+<domain type='bhyve'>
+ <name>bhyve</name>
+ <uuid>df3be7e7-a104-11e3-aeb0-50e5492bd3dc</uuid>
+ <memory>219136</memory>
+ <vcpu>8</vcpu>
+ <os firmware='efi'>
+ <type>hvm</type>
+ </os>
+ <cpu>
+ <numa>
+ <cell id='0' cpus='0-3' memory='109568' unit='KiB'/>
+ <cell id='1' cpus='4-7' memory='109568' unit='KiB'/>
+ </numa>
+ </cpu>
+ <devices>
+ <disk type='file'>
+ <driver name='file' type='raw'/>
+ <source file='/tmp/freebsd.img'/>
+ <target dev='hda' bus='sata'/>
+ <address type='drive' controller='0' bus='0' target='2' unit='0'/>
+ </disk>
+ <interface type='bridge'>
+ <mac address='52:54:00:b9:94:02'/>
+ <model type='virtio'/>
+ <source bridge="virbr0"/>
+ <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/>
+ </interface>
+ </devices>
+</domain>
diff --git a/tests/bhyvexml2argvtest.c b/tests/bhyvexml2argvtest.c
index 2330e70bbf..b7749fec6f 100644
--- a/tests/bhyvexml2argvtest.c
+++ b/tests/bhyvexml2argvtest.c
@@ -202,7 +202,7 @@ mymain(void)
BHYVE_CAP_FBUF | BHYVE_CAP_XHCI | \
BHYVE_CAP_CPUTOPOLOGY | BHYVE_CAP_SOUND_HDA | \
BHYVE_CAP_VNC_PASSWORD | BHYVE_CAP_VIRTIO_9P | \
- BHYVE_CAP_NVME;
+ BHYVE_CAP_NVME | BHYVE_CAP_NUMA;
DO_TEST("base");
DO_TEST("wired");
@@ -254,6 +254,11 @@ mymain(void)
DO_TEST("isa-controller");
DO_TEST_FAILURE("isa-multiple-controllers");
DO_TEST("firmware-efi");
+ DO_TEST("numa");
+ DO_TEST_FAILURE("numa-empty-cpuset");
+ DO_TEST_FAILURE("numa-too-many-domains");
+ driver.bhyvecaps &= ~BHYVE_CAP_NUMA;
+ DO_TEST_FAILURE("numa");
fakefirmwaredir = g_steal_pointer(&driver.config->firmwareDir);
driver.config->firmwareDir = g_steal_pointer(&fakefirmwareemptydir);
DO_TEST_PREPARE_ERROR("firmware-efi");
@@ -345,10 +350,13 @@ mymain(void)
driver.caps = virBhyveCapsBuild();
/* bhyve does not support UTC clock on ARM */
driver.bhyvecaps ^= BHYVE_CAP_RTC_UTC;
+ /* bhyve does not support NUMA on ARM */
+ driver.bhyvecaps &= ~BHYVE_CAP_NUMA;
DO_TEST("base");
DO_TEST("console");
DO_TEST("bootloader");
+ DO_TEST_FAILURE("numa");
virObjectUnref(driver.caps);
virObjectUnref(driver.xmlopt);
diff --git a/tests/bhyvexml2xmloutdata/x86_64/bhyvexml2xmlout-numa.xml b/tests/bhyvexml2xmloutdata/x86_64/bhyvexml2xmlout-numa.xml
new file mode 100644
index 0000000000..ecc147db78
--- /dev/null
+++ b/tests/bhyvexml2xmloutdata/x86_64/bhyvexml2xmlout-numa.xml
@@ -0,0 +1,42 @@
+<domain type='bhyve'>
+ <name>bhyve</name>
+ <uuid>df3be7e7-a104-11e3-aeb0-50e5492bd3dc</uuid>
+ <memory unit='KiB'>219136</memory>
+ <currentMemory unit='KiB'>219136</currentMemory>
+ <vcpu placement='static'>8</vcpu>
+ <os firmware='efi'>
+ <type arch='x86_64'>hvm</type>
+ <boot dev='hd'/>
+ </os>
+ <cpu>
+ <numa>
+ <cell id='0' cpus='0-3' memory='109568' unit='KiB'/>
+ <cell id='1' cpus='4-7' memory='109568' unit='KiB'/>
+ </numa>
+ </cpu>
+ <clock offset='utc'/>
+ <on_poweroff>destroy</on_poweroff>
+ <on_reboot>restart</on_reboot>
+ <on_crash>destroy</on_crash>
+ <devices>
+ <disk type='file' device='disk'>
+ <driver name='file' type='raw'/>
+ <source file='/tmp/freebsd.img'/>
+ <target dev='hda' bus='sata'/>
+ <address type='drive' controller='0' bus='0' target='2' unit='0'/>
+ </disk>
+ <controller type='pci' index='0' model='pci-root'/>
+ <controller type='isa' index='0'>
+ <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x0'/>
+ </controller>
+ <controller type='sata' index='0'>
+ <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/>
+ </controller>
+ <interface type='bridge'>
+ <mac address='52:54:00:b9:94:02'/>
+ <source bridge='virbr0'/>
+ <model type='virtio'/>
+ <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/>
+ </interface>
+ </devices>
+</domain>
diff --git a/tests/bhyvexml2xmltest.c b/tests/bhyvexml2xmltest.c
index 7f9de2bc36..950aaea672 100644
--- a/tests/bhyvexml2xmltest.c
+++ b/tests/bhyvexml2xmltest.c
@@ -132,6 +132,7 @@ mymain(void)
DO_TEST_DIFFERENT("passthru-multiple-devs");
DO_TEST_DIFFERENT("slirp");
DO_TEST_DIFFERENT("virtio-scsi");
+ DO_TEST_DIFFERENT("numa");
/* Address allocation tests */
DO_TEST_DIFFERENT("addr-single-sata-disk");
--
2.52.0