[libvirt] [PATCH 0/2] Use huge pages on UMA guests widely

*** BLURB HERE *** Michal Privoznik (2): conf: Disallow nonexistent NUMA nodes for hugepages qemu: Honor hugepages for UMA domains src/conf/domain_conf.c | 36 +++++++++++++++++ src/qemu/qemu_command.c | 25 ++++++++++-- .../qemuxml2argv-hugepages-pages4.xml | 45 ++++++++++++++++++++++ .../qemuxml2argv-hugepages-pages5.args | 7 ++++ .../qemuxml2argv-hugepages-pages5.xml | 32 +++++++++++++++ tests/qemuxml2argvtest.c | 2 + 6 files changed, 143 insertions(+), 4 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages4.xml create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages5.args create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages5.xml -- 1.8.5.5

As of 136ad4974 it is possible to specify different huge pages per guest NUMA node. However, there's no check if nodeset specified in ./hugepages/page contains only those guest NUMA nodes that exist. In other words with current code it is possible to define meaningless combination: <memoryBacking> <hugepages> <page size='1048576' unit='KiB' nodeset='0,2-3'/> <page size='2048' unit='KiB' nodeset='1,4'/> </hugepages> </memoryBacking> <vcpu placement='static'>4</vcpu> <cpu> <numa> <cell id='0' cpus='0' memory='1048576'/> <cell id='1' cpus='1' memory='1048576'/> <cell id='2' cpus='2' memory='1048576'/> <cell id='3' cpus='3' memory='1048576'/> </numa> </cpu> Notice the node 4 in <hugepages/>? Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/conf/domain_conf.c | 36 +++++++++++++++++ .../qemuxml2argv-hugepages-pages4.xml | 45 ++++++++++++++++++++++ tests/qemuxml2argvtest.c | 1 + 3 files changed, 82 insertions(+) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages4.xml diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 53ef694..9fb7dd7 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -2904,6 +2904,39 @@ virDomainDefRejectDuplicateControllers(virDomainDefPtr def) static int +virDomainDefCheckHugepagesNumaNodes(virDomainDefPtr def) +{ + size_t i; + + for (i = 0; i < def->mem.nhugepages; i++) { + virDomainHugePagePtr page = &def->mem.hugepages[i]; + ssize_t next_bit, pos = 0; + + if (!page->nodemask) { + /* This is the master hugepage to use. Skip it as it has no + * nodemask anyway. */ + continue; + } + + if (def->cpu && def->cpu->ncells) { + /* Fortunately, we allow only guest NUMA nodes to be continuous + * starting from zero. */ + pos = def->cpu->ncells - 1; + } + + next_bit = virBitmapNextSetBit(page->nodemask, pos); + if (next_bit >= 0) { + virReportError(VIR_ERR_XML_DETAIL, + _("hugepages: node %zd not found"), + next_bit); + return -1; + } + } + return 0; +} + + +static int virDomainDefPostParseInternal(virDomainDefPtr def, virCapsPtr caps ATTRIBUTE_UNUSED) { @@ -3048,6 +3081,9 @@ virDomainDefPostParseInternal(virDomainDefPtr def, } } + if (virDomainDefCheckHugepagesNumaNodes(def) < 0) + return -1; + return 0; } diff --git a/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages4.xml b/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages4.xml new file mode 100644 index 0000000..a3ed29b --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages4.xml @@ -0,0 +1,45 @@ +<domain type='qemu'> + <name>QEMUGuest1</name> + <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid> + <memory unit='KiB'>4194304</memory> + <currentMemory unit='KiB'>4194304</currentMemory> + <memoryBacking> + <hugepages> + <page size='2048' unit='KiB' nodeset='1,4'/> + <page size='1048576' unit='KiB' nodeset='0,2-3'/> + </hugepages> + </memoryBacking> + <vcpu placement='static'>4</vcpu> + <numatune> + <memory mode='strict' nodeset='0-3'/> + <memnode cellid='3' mode='strict' nodeset='3'/> + </numatune> + <os> + <type arch='i686' machine='pc'>hvm</type> + <boot dev='hd'/> + </os> + <cpu> + <numa> + <cell id='0' cpus='0' memory='1048576'/> + <cell id='1' cpus='1' memory='1048576'/> + <cell id='2' cpus='2' memory='1048576'/> + <cell id='3' cpus='3' memory='1048576'/> + </numa> + </cpu> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu</emulator> + <disk type='block' device='disk'> + <source dev='/dev/HostVG/QEMUGuest1'/> + <target dev='hda' bus='ide'/> + <address type='drive' controller='0' bus='0' target='0' unit='0'/> + </disk> + <controller type='usb' index='0'/> + <controller type='ide' index='0'/> + <controller type='pci' index='0' model='pci-root'/> + <memballoon model='virtio'/> + </devices> +</domain> diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c index 3feb2fe..37aa571 100644 --- a/tests/qemuxml2argvtest.c +++ b/tests/qemuxml2argvtest.c @@ -682,6 +682,7 @@ mymain(void) QEMU_CAPS_OBJECT_MEMORY_FILE); DO_TEST("hugepages-pages3", QEMU_CAPS_MEM_PATH, QEMU_CAPS_OBJECT_MEMORY_RAM, QEMU_CAPS_OBJECT_MEMORY_FILE); + DO_TEST_PARSE_ERROR("hugepages-pages4", NONE); DO_TEST("nosharepages", QEMU_CAPS_MACHINE_OPT, QEMU_CAPS_MEM_MERGE); DO_TEST("disk-cdrom", NONE); DO_TEST("disk-cdrom-network-http", QEMU_CAPS_KVM, QEMU_CAPS_DEVICE, -- 1.8.5.5

On Tue, Sep 02, 2014 at 05:12:01PM +0200, Michal Privoznik wrote:
As of 136ad4974 it is possible to specify different huge pages per guest NUMA node. However, there's no check if nodeset specified in ./hugepages/page contains only those guest NUMA nodes that exist. In other words with current code it is possible to define meaningless combination:
<memoryBacking> <hugepages> <page size='1048576' unit='KiB' nodeset='0,2-3'/> <page size='2048' unit='KiB' nodeset='1,4'/> </hugepages> </memoryBacking> <vcpu placement='static'>4</vcpu> <cpu> <numa> <cell id='0' cpus='0' memory='1048576'/> <cell id='1' cpus='1' memory='1048576'/> <cell id='2' cpus='2' memory='1048576'/> <cell id='3' cpus='3' memory='1048576'/> </numa> </cpu>
Notice the node 4 in <hugepages/>?
And with such XML, the domain will disappear after libvirt updates to a version with the patch below. We need to VIR_WARN() about that and then just delete it from the definition. Or something similar. So NACK to this version. Martin

There are two ways how to tell qemu to use huge pages. The first one is suitable for domains with NUMA nodes: the path to hugetlbfs mount is appended to NUMA node definition on the command line. The second one is suitable for UMA domains: here there's this global '-mem-path' argument that accepts path to the hugetlbfs mount point. However, the latter case was not used for all the cases that it should be. For instance: <memoryBacking> <hugepages> <page size='2048' unit='KiB' nodeset='0'/> </hugepages> </memoryBacking> didn't trigger the '-mem-path' so the huge pages - despite being configured - were not used at all. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 25 ++++++++++++++--- .../qemuxml2argv-hugepages-pages5.args | 7 +++++ .../qemuxml2argv-hugepages-pages5.xml | 32 ++++++++++++++++++++++ tests/qemuxml2argvtest.c | 1 + 4 files changed, 61 insertions(+), 4 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages5.args create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages5.xml diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index c84c7c3..26e166f 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -7540,7 +7540,7 @@ qemuBuildCommandLine(virConnectPtr conn, virCommandAddArg(cmd, "-m"); def->mem.max_balloon = VIR_DIV_UP(def->mem.max_balloon, 1024) * 1024; virCommandAddArgFormat(cmd, "%llu", def->mem.max_balloon / 1024); - if (def->mem.nhugepages && !def->mem.hugepages[0].size) { + if (def->mem.nhugepages && (!def->cpu || !def->cpu->ncells)) { char *mem_path; if (!cfg->nhugetlbfs) { @@ -7556,9 +7556,26 @@ qemuBuildCommandLine(virConnectPtr conn, goto error; } - if (!(mem_path = qemuGetDefaultHugepath(cfg->hugetlbfs, - cfg->nhugetlbfs))) - goto error; + if (def->mem.hugepages[0].size) { + for (j = 0; j < cfg->nhugetlbfs; j++) { + if (cfg->hugetlbfs[j].size == def->mem.hugepages[0].size) + break; + } + + if (j == cfg->nhugetlbfs) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Unable to find any usable hugetlbfs mount for %llu KiB"), + def->mem.hugepages[0].size); + goto error; + } + + if (!(mem_path = qemuGetHugepagePath(&cfg->hugetlbfs[j]))) + goto error; + } else { + if (!(mem_path = qemuGetDefaultHugepath(cfg->hugetlbfs, + cfg->nhugetlbfs))) + goto error; + } virCommandAddArgList(cmd, "-mem-prealloc", "-mem-path", mem_path, NULL); diff --git a/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages5.args b/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages5.args new file mode 100644 index 0000000..0e0f35a --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages5.args @@ -0,0 +1,7 @@ +LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test QEMU_AUDIO_DRV=none \ +/usr/bin/qemu -S -M pc -m 1024 \ +-mem-prealloc \ +-mem-path /dev/hugepages2M/libvirt/qemu \ +-smp 2 -nographic \ +-monitor unix:/tmp/test-monitor,server,nowait -no-acpi -boot c -usb \ +-hda /dev/HostVG/QEMUGuest1 -net none -serial none -parallel none diff --git a/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages5.xml b/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages5.xml new file mode 100644 index 0000000..50a9cd8 --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages5.xml @@ -0,0 +1,32 @@ +<domain type='qemu'> + <name>SomeDummyHugepagesGuest</name> + <uuid>ef1bdff4-27f3-4e85-a807-5fb4d58463cc</uuid> + <memory unit='KiB'>1048576</memory> + <currentMemory unit='KiB'>1048576</currentMemory> + <memoryBacking> + <hugepages> + <page size='2048' unit='KiB'/> + </hugepages> + </memoryBacking> + <vcpu placement='static'>2</vcpu> + <os> + <type arch='i686' machine='pc'>hvm</type> + <boot dev='hd'/> + </os> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu</emulator> + <disk type='block' device='disk'> + <source dev='/dev/HostVG/QEMUGuest1'/> + <target dev='hda' bus='ide'/> + <address type='drive' controller='0' bus='0' target='0' unit='0'/> + </disk> + <controller type='usb' index='0'/> + <controller type='ide' index='0'/> + <controller type='pci' index='0' model='pci-root'/> + <memballoon model='virtio'/> + </devices> +</domain> diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c index 37aa571..ce109f9 100644 --- a/tests/qemuxml2argvtest.c +++ b/tests/qemuxml2argvtest.c @@ -683,6 +683,7 @@ mymain(void) DO_TEST("hugepages-pages3", QEMU_CAPS_MEM_PATH, QEMU_CAPS_OBJECT_MEMORY_RAM, QEMU_CAPS_OBJECT_MEMORY_FILE); DO_TEST_PARSE_ERROR("hugepages-pages4", NONE); + DO_TEST("hugepages-pages5", QEMU_CAPS_MEM_PATH); DO_TEST("nosharepages", QEMU_CAPS_MACHINE_OPT, QEMU_CAPS_MEM_MERGE); DO_TEST("disk-cdrom", NONE); DO_TEST("disk-cdrom-network-http", QEMU_CAPS_KVM, QEMU_CAPS_DEVICE, -- 1.8.5.5
participants (2)
-
Martin Kletzander
-
Michal Privoznik