On Tue, Nov 01, 2016 at 12:11:15PM +0000, Jaroslav Safka wrote:
This second change introduces support for preallocated
shared file descriptor based memory backing.
It allows vhost-user to be used without hugepages.
Also token memAccess in numa cell is used (if not present, default value
from memoryBacking is used)
Used xml elements:
<memoryBacking>
<source type='file|anonymous'/>
<access Mode='shared|private'/>
<allocation mode='immediate|ondemand'/>
</memoryBacking>
---
src/qemu/qemu_command.c | 156 ++++++++++++++++-----
src/qemu/qemu_command.h | 4 +
.../qemuxml2argv-fd-memory-no-numa-topology.args | 33 +++++
.../qemuxml2argv-fd-memory-no-numa-topology.xml | 91 ++++++++++++
.../qemuxml2argv-fd-memory-numa-topology.args | 33 +++++
.../qemuxml2argv-fd-memory-numa-topology.xml | 94 +++++++++++++
.../qemuxml2argv-fd-memory-numa-topology2.args | 36 +++++
.../qemuxml2argv-fd-memory-numa-topology2.xml | 95 +++++++++++++
.../qemuxml2argv-fd-memory-numa-topology3.args | 39 ++++++
.../qemuxml2argv-fd-memory-numa-topology3.xml | 96 +++++++++++++
.../qemuxml2argv-memorybacking-set.xml | 32 +++++
.../qemuxml2argv-memorybacking-unset.xml | 32 +++++
tests/qemuxml2argvtest.c | 42 ++++++
13 files changed, 750 insertions(+), 33 deletions(-)
create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-fd-memory-no-numa-topology.args
create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-fd-memory-no-numa-topology.xml
create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-fd-memory-numa-topology.args
create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-fd-memory-numa-topology.xml
create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-fd-memory-numa-topology2.args
create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-fd-memory-numa-topology2.xml
create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-fd-memory-numa-topology3.args
create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-fd-memory-numa-topology3.xml
create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memorybacking-set.xml
create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memorybacking-unset.xml
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index b68da3d..7710864 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -3283,15 +3283,11 @@ qemuBuildMemoryBackendStr(unsigned long long size,
if (!(props = virJSONValueNewObject()))
return -1;
- if (pagesize) {
- if (qemuGetHupageMemPath(cfg, pagesize, &mem_path) < 0)
- goto cleanup;
-
+ if (def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE) {
*backendType = "memory-backend-file";
if (virJSONValueObjectAdd(props,
- "b:prealloc", true,
- "s:mem-path", mem_path,
+ "s:mem-path",
VIR_DOMAIN_MEMORY_DEFAULT_PATH,
NULL) < 0)
goto cleanup;
@@ -3307,18 +3303,61 @@ qemuBuildMemoryBackendStr(unsigned long long size,
break;
case VIR_NUMA_MEM_ACCESS_DEFAULT:
+ if (def->mem.access == VIR_DOMAIN_MEMORY_ACCESS_SHARED) {
+ if (virJSONValueObjectAdd(props, "b:share", true, NULL) <
0)
+ goto cleanup;
+ }
+ break;
+
case VIR_NUMA_MEM_ACCESS_LAST:
break;
}
+
+ force = true;
} else {
- if (memAccess) {
- virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
- _("Shared memory mapping is supported "
- "only with hugepages"));
- goto cleanup;
- }
+ if (pagesize) {
+ if (qemuGetHupageMemPath(cfg, pagesize, &mem_path) < 0)
+ goto cleanup;
- *backendType = "memory-backend-ram";
+ *backendType = "memory-backend-file";
+
+ if (virJSONValueObjectAdd(props,
+ "b:prealloc", true,
+ "s:mem-path", mem_path,
+ NULL) < 0)
+ goto cleanup;
+
+ switch (memAccess) {
+ case VIR_NUMA_MEM_ACCESS_SHARED:
+ if (virJSONValueObjectAdd(props, "b:share", true, NULL) <
0)
+ goto cleanup;
+ break;
+
+ case VIR_NUMA_MEM_ACCESS_PRIVATE:
+ if (virJSONValueObjectAdd(props, "b:share", false, NULL) <
0)
+ goto cleanup;
+ break;
+
+ case VIR_NUMA_MEM_ACCESS_DEFAULT:
+ if (def->mem.access == VIR_DOMAIN_MEMORY_ACCESS_SHARED) {
+ if (virJSONValueObjectAdd(props, "b:share", true, NULL)
< 0)
+ goto cleanup;
+ }
+ break;
+
+ case VIR_NUMA_MEM_ACCESS_LAST:
+ break;
+ }
+ } else {
+ if (memAccess) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("Shared memory mapping is supported "
+ "only with hugepages"));
+ goto cleanup;
+ }
+
This message is not true anymore ^^.
+ *backendType = "memory-backend-ram";
+ }
}
if (virJSONValueObjectAdd(props, "U:size", size * 1024, NULL) < 0)
@@ -7138,28 +7177,35 @@ qemuBuildMemPathStr(virQEMUDriverConfigPtr cfg,
const long system_page_size = virGetSystemPageSizeKB();
char *mem_path = NULL;
- /*
- * No-op if hugepages were not requested.
- */
- if (!def->mem.nhugepages)
+ if (def->mem.allocation == VIR_DOMAIN_MEMORY_ALLOCATION_IMMEDIATE) {
+ virCommandAddArgList(cmd, "-mem-prealloc", NULL);
return 0;
+ }
- /* There is one special case: if user specified "huge"
- * pages of regular system pages size.
- * And there is nothing to do in this case.
- */
- if (def->mem.hugepages[0].size == system_page_size)
- return 0;
+ if (def->mem.nhugepages) {
- if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_MEM_PATH)) {
- virReportError(VIR_ERR_INTERNAL_ERROR,
- _("hugepage backing not supported by '%s'"),
- def->emulator);
- return -1;
- }
+ /* There is one special case: if user specified "huge"
+ * pages of regular system pages size.
+ * And there is nothing to do in this case.
+ */
+ if (def->mem.hugepages[0].size == system_page_size)
+ return 0;
- if (qemuGetHupageMemPath(cfg, def->mem.hugepages[0].size, &mem_path) < 0)
- return -1;
+ if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_MEM_PATH)) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("hugepage backing not supported by
'%s'"),
+ def->emulator);
+ return -1;
+ }
+
+ if (qemuGetHupageMemPath(cfg, def->mem.hugepages[0].size, &mem_path) <
0)
+ return -1;
+ } else {
+ /*
+ * No-op if hugepages or immediate allocation were not requested.
+ */
+ return 0;
+ }
virCommandAddArgList(cmd, "-mem-prealloc", "-mem-path", mem_path,
NULL);
VIR_FREE(mem_path);
@@ -7195,8 +7241,9 @@ qemuBuildMemCommandLine(virCommandPtr cmd,
* Add '-mem-path' (and '-mem-prealloc') parameter here only if
* there is no numa node specified.
*/
- if (!virDomainNumaGetNodeCount(def->numa) &&
- qemuBuildMemPathStr(cfg, def, qemuCaps, cmd) < 0)
+ if ((!virDomainNumaGetNodeCount(def->numa)
+ || def->mem.allocation == VIR_DOMAIN_MEMORY_ALLOCATION_IMMEDIATE)
+ && qemuBuildMemPathStr(cfg, def, qemuCaps, cmd) < 0)
Some calls are guarded by new conditions [1].
return -1;
if (def->mem.locked && !virQEMUCapsGet(qemuCaps,
QEMU_CAPS_REALTIME_MLOCK)) {
@@ -9424,6 +9471,10 @@ qemuBuildCommandLine(virQEMUDriverPtr driver,
qemuBuildNumaArgStr(cfg, def, cmd, qemuCaps, nodeset) < 0)
goto error;
+ if (!virDomainNumaGetNodeCount(def->numa) &&
+ qemuBuildMemoryBackingCommandLine(def, cmd, qemuCaps) < 0)
+ goto error;
+
This call is made if there is no NUMA for the machine [2]
if (qemuBuildMemoryDeviceCommandLine(cmd, cfg, def, qemuCaps,
nodeset) < 0)
goto error;
@@ -9816,6 +9867,45 @@ qemuBuildChrDeviceStr(char **deviceStr,
return ret;
}
+int
+qemuBuildMemoryBackingCommandLine(const virDomainDef *def,
+ virCommandPtr cmd,
+ virQEMUCapsPtr qemuCaps)
Indentation doesn't match here
+{
+ int ret = -1;
+ virBuffer buf = VIR_BUFFER_INITIALIZER;
+ const char *alias = "ram";
+
+ if (def->mem.source != VIR_DOMAIN_MEMORY_SOURCE_FILE) {
+ ret = 0;
+ goto cleanup;
+ }
[1] and this function has another condition inside. It is *really* hard
to read these patches. It is also partially because some whitespaces
and all three patches having the same subject (almost). Even prefixing
them would be enough
+ /* numa is disabled and mem source is file */
+ if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_FILE)) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("File memory mapping is not supported "
+ "with this QEMU"));
+ goto cleanup;
+ }
+
+ virCommandAddArg(cmd, "-object");
+ virBufferAsprintf(&buf, "memory-backend-file,id=%s,mem-path=%s,",
alias, VIR_DOMAIN_MEMORY_DEFAULT_PATH);
+
+ if (def->mem.access == VIR_DOMAIN_MEMORY_ACCESS_SHARED)
+ virBufferAsprintf(&buf, "%s", "share=yes,");
+
+ virBufferAsprintf(&buf, "size=%llu", virDomainDefGetMemoryInitial(def)
/ 1024);
+ virCommandAddArgBuffer(cmd, &buf);
+
+ virCommandAddArg(cmd, "-numa");
+ virCommandAddArgFormat(cmd, "node,memdev=%s", alias);
+
[2] and you just add a numa node. I believe this will change guest ABI.
You can say, though, that some of the options require there to be at
least one NUMA node. Or document that this will automatically convert
to having a NUMA node, but show that in the domain XML.
+ ret = 0;
+ cleanup:
+
+ virBufferFreeAndReset(&buf);
+ return ret;
+}
virJSONValuePtr
diff --git a/src/qemu/qemu_command.h b/src/qemu/qemu_command.h
index a793fb6..c0ecd62 100644
--- a/src/qemu/qemu_command.h
+++ b/src/qemu/qemu_command.h
@@ -183,6 +183,10 @@ bool qemuCheckCCWS390AddressSupport(const virDomainDef *def,
virDomainDeviceInfo info,
virQEMUCapsPtr qemuCaps,
const char *devicename);
+int
+qemuBuildMemoryBackingCommandLine(const virDomainDef *def,
+ virCommandPtr cmd,
+ virQEMUCapsPtr qemuCaps);
virJSONValuePtr qemuBuildHotpluggableCPUProps(const virDomainVcpuDef *vcpu)
ATTRIBUTE_NONNULL(1);
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-fd-memory-no-numa-topology.args
b/tests/qemuxml2argvdata/qemuxml2argv-fd-memory-no-numa-topology.args
new file mode 100644
index 0000000..1563693
--- /dev/null
+++ b/tests/qemuxml2argvdata/qemuxml2argv-fd-memory-no-numa-topology.args
@@ -0,0 +1,33 @@
+LC_ALL=C \
+PATH=/bin \
+HOME=/home/test \
+USER=test \
+LOGNAME=test \
+QEMU_AUDIO_DRV=none \
+/usr/bin/qemu-system-x86_64 \
+-name instance-00000092 \
+-S \
+-M pc-i440fx-wily \
+-cpu host \
+-m 14336 \
+-mem-prealloc \
+-smp 8,sockets=8,cores=1,threads=1 \
+-object memory-backend-file,id=ram,mem-path=/var/lib/libvirt/qemu,share=yes,\
+size=14336 \
+-numa node,memdev=ram \
+-uuid 126f2720-6f8e-45ab-a886-ec9277079a67 \
+-smbios 'type=1,manufacturer=OpenStack Foundation,product=OpenStack Nova,\
+version=13.0.0,serial=0640ddd5-36c9-e211-a2f6-001e6789a0f1,\
+uuid=126f2720-6f8e-45ab-a886-ec9277079a67,family=Virtual Machine' \
+-nographic \
+-nodefaults \
+-monitor unix:/tmp/lib/domain--1-instance-00000092/monitor.sock,server,nowait \
+-rtc base=utc,driftfix=slew \
+-no-kvm-pit-reinjection \
+-boot c \
+-usb \
+-drive file=/var/lib/nova/instances/126f2720-6f8e-45ab-a886-ec9277079a67/disk,\
+format=qcow2,if=none,id=drive-virtio-disk0,cache=none \
+-device virtio-blk-pci,bus=pci.0,addr=0x5,drive=drive-virtio-disk0,\
+id=virtio-disk0 \
+-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x6
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-fd-memory-no-numa-topology.xml
b/tests/qemuxml2argvdata/qemuxml2argv-fd-memory-no-numa-topology.xml
new file mode 100644
index 0000000..dd8b00c
--- /dev/null
+++ b/tests/qemuxml2argvdata/qemuxml2argv-fd-memory-no-numa-topology.xml
@@ -0,0 +1,91 @@
+<domain type='kvm' id='56'>
+ <name>instance-00000092</name>
+ <uuid>126f2720-6f8e-45ab-a886-ec9277079a67</uuid>
+ <metadata>
+ <nova:instance
xmlns:nova="http://openstack.org/xmlns/libvirt/nova/1.0">
+ <nova:package version="13.0.0"/>
+ <nova:name>emma</nova:name>
+ <nova:creationTime>2016-09-16 15:51:06</nova:creationTime>
+ <nova:flavor name="devstack">
+ <nova:memory>14336</nova:memory>
+ <nova:disk>30</nova:disk>
+ <nova:swap>0</nova:swap>
+ <nova:ephemeral>0</nova:ephemeral>
+ <nova:vcpus>8</nova:vcpus>
+ </nova:flavor>
+ <nova:owner>
+ <nova:user
uuid="ff95838725294b1896e887d4caee5e1a">tester</nova:user>
+ <nova:project
uuid="f04e5e9722e441ae90bcc7f12722a0ac">test</nova:project>
+ </nova:owner>
+ <nova:root type="image"
uuid="37ec79a5-9eb2-4df5-beee-a6942657d828"/>
+ </nova:instance>
+ </metadata>
+ <memory unit='KiB'>14680064</memory>
+ <currentMemory unit='KiB'>14680064</currentMemory>
+ <memoryBacking>
+ <source type='file'/>
+ <access mode='shared'/>
+ <allocation mode='immediate'/>
+ </memoryBacking>
+ <vcpu placement='static'>8</vcpu>
+ <cputune>
+ <shares>8192</shares>
+ </cputune>
+ <resource>
+ <partition>/machine</partition>
+ </resource>
+ <sysinfo type='smbios'>
+ <system>
+ <entry name='manufacturer'>OpenStack Foundation</entry>
+ <entry name='product'>OpenStack Nova</entry>
+ <entry name='version'>13.0.0</entry>
+ <entry
name='serial'>0640ddd5-36c9-e211-a2f6-001e6789a0f1</entry>
+ <entry
name='uuid'>126f2720-6f8e-45ab-a886-ec9277079a67</entry>
+ <entry name='family'>Virtual Machine</entry>
+ </system>
+ </sysinfo>
+ <os>
+ <type arch='x86_64' machine='pc-i440fx-wily'>hvm</type>
+ <boot dev='hd'/>
+ <smbios mode='sysinfo'/>
+ </os>
+ <features>
+ <acpi/>
+ <apic/>
+ </features>
+ <cpu mode='host-passthrough'>
+ <topology sockets='8' cores='1' threads='1'/>
+ </cpu>
+ <clock offset='utc'>
+ <timer name='pit' tickpolicy='delay'/>
+ <timer name='rtc' tickpolicy='catchup'/>
+ <timer name='hpet' present='no'/>
+ </clock>
+ <on_poweroff>destroy</on_poweroff>
+ <on_reboot>restart</on_reboot>
+ <on_crash>destroy</on_crash>
+ <devices>
+ <emulator>/usr/bin/qemu-system-x86_64</emulator>
+ <disk type='file' device='disk'>
+ <driver name='qemu' type='qcow2' cache='none'/>
+ <source
file='/var/lib/nova/instances/126f2720-6f8e-45ab-a886-ec9277079a67/disk'/>
+ <backingStore type='file' index='1'>
+ <format type='raw'/>
+ <source
file='/var/lib/nova/instances/_base/7205b893db62340da7d5246a23b66a52ccef9bd1'/>
+ <backingStore/>
+ </backingStore>
+ <target dev='vda' bus='virtio'/>
+ <alias name='virtio-disk0'/>
+ <address type='pci' domain='0x0000' bus='0x00'
slot='0x05' function='0x0'/>
+ </disk>
+ <controller type='usb' index='0'>
+ <alias name='usb'/>
+ <address type='pci' domain='0x0000' bus='0x00'
slot='0x01' function='0x2'/>
+ </controller>
+ <memballoon model='virtio'>
+ <stats period='10'/>
+ <alias name='balloon0'/>
+ <address type='pci' domain='0x0000' bus='0x00'
slot='0x06' function='0x0'/>
+ </memballoon>
+ </devices>
+</domain>
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-fd-memory-numa-topology.args
b/tests/qemuxml2argvdata/qemuxml2argv-fd-memory-numa-topology.args
new file mode 100644
index 0000000..a41b264
--- /dev/null
+++ b/tests/qemuxml2argvdata/qemuxml2argv-fd-memory-numa-topology.args
@@ -0,0 +1,33 @@
+LC_ALL=C \
+PATH=/bin \
+HOME=/home/test \
+USER=test \
+LOGNAME=test \
+QEMU_AUDIO_DRV=none \
+/usr/bin/qemu-system-x86_64 \
+-name instance-00000092 \
+-S \
+-M pc-i440fx-wily \
+-cpu host \
+-m 14336 \
+-mem-prealloc \
+-smp 8,sockets=1,cores=8,threads=1 \
+-object memory-backend-file,id=ram-node0,mem-path=/var/lib/libvirt/qemu,\
+share=yes,size=15032385536 \
+-numa node,nodeid=0,cpus=0-7,memdev=ram-node0 \
+-uuid 126f2720-6f8e-45ab-a886-ec9277079a67 \
+-smbios 'type=1,manufacturer=OpenStack Foundation,product=OpenStack Nova,\
+version=13.0.0,serial=0640ddd5-36c9-e211-a2f6-001e6789a0f1,\
+uuid=126f2720-6f8e-45ab-a886-ec9277079a67,family=Virtual Machine' \
+-nographic \
+-nodefaults \
+-monitor unix:/tmp/lib/domain--1-instance-00000092/monitor.sock,server,nowait \
+-rtc base=utc,driftfix=slew \
+-no-kvm-pit-reinjection \
+-boot c \
+-usb \
+-drive file=/var/lib/nova/instances/126f2720-6f8e-45ab-a886-ec9277079a67/disk,\
+format=qcow2,if=none,id=drive-virtio-disk0,cache=none \
+-device virtio-blk-pci,bus=pci.0,addr=0x5,drive=drive-virtio-disk0,\
+id=virtio-disk0 \
+-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x6
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-fd-memory-numa-topology.xml
b/tests/qemuxml2argvdata/qemuxml2argv-fd-memory-numa-topology.xml
new file mode 100644
index 0000000..f2b1a2e
--- /dev/null
+++ b/tests/qemuxml2argvdata/qemuxml2argv-fd-memory-numa-topology.xml
@@ -0,0 +1,94 @@
+ <domain type='kvm' id='56'>
+ <name>instance-00000092</name>
+ <uuid>126f2720-6f8e-45ab-a886-ec9277079a67</uuid>
+ <metadata>
+ <nova:instance
xmlns:nova="http://openstack.org/xmlns/libvirt/nova/1.0">
+ <nova:package version="13.0.0"/>
+ <nova:name>emma</nova:name>
+ <nova:creationTime>2016-09-16 15:51:06</nova:creationTime>
+ <nova:flavor name="devstack">
+ <nova:memory>14336</nova:memory>
+ <nova:disk>30</nova:disk>
+ <nova:swap>0</nova:swap>
+ <nova:ephemeral>0</nova:ephemeral>
+ <nova:vcpus>8</nova:vcpus>
+ </nova:flavor>
+ <nova:owner>
+ <nova:user
uuid="ff95838725294b1896e887d4caee5e1a">tester</nova:user>
+ <nova:project
uuid="f04e5e9722e441ae90bcc7f12722a0ac">test</nova:project>
+ </nova:owner>
+ <nova:root type="image"
uuid="37ec79a5-9eb2-4df5-beee-a6942657d828"/>
+ </nova:instance>
+ </metadata>
Pointless metadata ^^. And more. If you want to test more things, then
note that in the name, but please make the file test only what matters
for the end goal.