On Thu, Dec 03, 2020 at 13:36:23 +0100, Michal Privoznik wrote:
QEMU gained this new virtio-mem model. It's similar to pc-dimm
in a sense that guest uses it as memory, but in a sense very
different from it as it can dynamically change allocation,
without need for hotplug. More specifically, the device has two
attributes more (it has more of course, but these two are
important here):
1) block-size - the granularity of the device. You can imagine
the device being divided into blocks, each 'block-size' long.
2) requested-size - the portion of the device that is in use by
the guest.
And it all works like this: at guest startup/hotplug both
block-size and requested-size are specified. When sysadmin wants
to give some more memory to the guest, or take some back, they
change the 'requested-size' attribute which is propagated to the
guest where virtio-mem module takes corresponding action.
This means, that 'requested-size' must be a whole number product
of 'block-size' and of course has to be in rage [0, max-size]
(including). The 'max-size' is yet another attribute but if not
set it's "inherited" from corresponding memory-backend-* object.
Therefore, two new elements are introduced under <target/>, to
reflect these attributes:
<memory model='virtio'>
<target>
<size unit='KiB'>4194304</size>
<node>0</node>
<block unit='KiB'>2048</block>
<requested unit='KiB'>524288</requested>
</target>
<address type='pci' domain='0x0000' bus='0x00'
slot='0x05' function='0x0'/>
</memory>
The intent here is that <requested/> will be allowed to change
via virDomainUpdateDeviceFlags() API.
Note, QEMU does inform us about success of allocation via an
event - this is covered in next patches.
Signed-off-by: Michal Privoznik <mprivozn(a)redhat.com>
---
docs/formatdomain.rst | 22 ++++
docs/schemas/domaincommon.rng | 10 ++
src/conf/domain_conf.c | 103 ++++++++++++++++--
src/conf/domain_conf.h | 5 +
.../memory-hotplug-virtio-mem.xml | 78 +++++++++++++
...emory-hotplug-virtio-mem.x86_64-latest.xml | 1 +
tests/qemuxml2xmltest.c | 1 +
7 files changed, 213 insertions(+), 7 deletions(-)
create mode 100644 tests/qemuxml2argvdata/memory-hotplug-virtio-mem.xml
create mode 120000 tests/qemuxml2xmloutdata/memory-hotplug-virtio-mem.x86_64-latest.xml
diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst
index ca6bc0432e..3990728939 100644
--- a/docs/formatdomain.rst
+++ b/docs/formatdomain.rst
@@ -7187,6 +7187,17 @@ Example: usage of the memory devices
</label>
</target>
</memory>
+ <memory model='virtio'>
+ <source>
+ <path>/tmp/virtio_mem</path>
+ </source>
+ <target>
+ <size unit='KiB'>1048576</size>
+ <node>0</node>
+ <block unit='KiB'>2048</block>
+ <requested unit='KiB'>524288</requested>
+ </target>
+ </memory>
<memory model='virtio' access='shared'>
<source>
<path>/tmp/virtio_pmem</path>
@@ -7300,6 +7311,17 @@ Example: usage of the memory devices
so other backend types should use the ``readonly`` element. :since:`Since
5.0.0`
+ ``block``
+ The size of an individual block, granularity of division of memory module.
+ Must be power of two and at least equal to size of a transparent hugepage
+ (2MiB on x84_64). The default is hypervisor dependant. This is valid for
+ ``virtio`` model only and mutually exclusive with ``pmem``.
+
+ ``requested``
+ The total size of blocks exposed to the guest. Must respect ``block``
+ granularity. This is valid for ``virtio`` model only and mutually
+ exclusive with ``pmem``.
Docs don't mention interactions of <size> and <requested>. Is size the
actual size? In such case you'll need to clear it on startup and
populate it with the actual size later.
The issue with <pmem> was mentioned earlier.
[...]
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 935bea1804..0551f6f266 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -6764,10 +6764,23 @@ virDomainMemoryDefValidate(const virDomainMemoryDef *mem,
return -1;
}
} else {
- /* TODO: plain virtio-mem behaves differently then virtio-pmem */
- virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
- _("virtio-mem is not supported yet. <pmem/> is
required"));
- return -1;
+ if (mem->requestedsize > mem->size) {
+ virReportError(VIR_ERR_XML_DETAIL, "%s",
+ _("requested size must be smaller than
@size"));
+ return -1;
+ }
+
+ if (!VIR_IS_POW2(mem->blocksize)) {
+ virReportError(VIR_ERR_XML_DETAIL, "%s",
+ _("block size must be a power of two"));
+ return -1;
+ }
Docs state that blocksize must be also a multiple of page size.
+
+ if (mem->requestedsize % mem->blocksize != 0) {
+ virReportError(VIR_ERR_XML_DETAIL, "%s",
+ _("requested size must be an integer multiple of
block size"));
+ return -1;
+ }
}
break;
@@ -16774,9 +16787,25 @@ virDomainMemorySourceDefParseXML(xmlNodePtr node,
case VIR_DOMAIN_MEMORY_MODEL_VIRTIO:
def->s.virtio.path = virXPathString("string(./path)", ctxt);
- if (virXPathBoolean("boolean(./pmem)", ctxt))
+ if (virXPathBoolean("boolean(./pmem)", ctxt)) {
def->s.virtio.pmem = true;
+ } else {
+ if (virDomainParseMemory("./pagesize",
"./pagesize/@unit", ctxt,
+ &def->s.virtio.pagesize, false, false) <
0)
+ return -1;
+ if ((nodemask = virXPathString("string(./nodemask)", ctxt))) {
+ if (virBitmapParse(nodemask, &def->s.virtio.sourceNodes,
+ VIR_DOMAIN_CPUMASK_LEN) < 0)
+ return -1;
+
+ if (virBitmapIsAllClear(def->s.virtio.sourceNodes)) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Invalid value of 'nodemask':
%s"), nodemask);
+ return -1;
Move this to virDomainMemoryDefValidate too.
+ }
+ }
+ }
break;
case VIR_DOMAIN_MEMORY_MODEL_NONE:
[...]
@@ -16831,6 +16861,27 @@ virDomainMemoryTargetDefParseXML(xmlNodePtr
node,
if (virXPathBoolean("boolean(./readonly)", ctxt))
def->readonly = true;
+
+ break;
+
+ case VIR_DOMAIN_MEMORY_MODEL_VIRTIO:
+ if (!def->s.virtio.pmem) {
+ if (virDomainParseMemory("./block", "./block/@unit",
ctxt,
+ &def->blocksize, false, false) < 0)
+ return -1;
+
+ if (virDomainParseMemory("./requested",
"./requested/@unit", ctxt,
+ &def->requestedsize, false, false) < 0)
+ return -1;
+ }
If size is current size it should be optional.
+
+ break;
+
+ case VIR_DOMAIN_MEMORY_MODEL_NONE:
+ case VIR_DOMAIN_MEMORY_MODEL_DIMM:
+ case VIR_DOMAIN_MEMORY_MODEL_LAST:
+ /* nada */
+ break;
}
return 0;
@@ -18649,7 +18700,9 @@ virDomainMemoryFindByDefInternal(virDomainDefPtr def,
/* target info -> always present */
if (tmp->model != mem->model ||
tmp->targetNode != mem->targetNode ||
- tmp->size != mem->size)
+ tmp->size != mem->size ||
If size is the current size and being actively updated, this lookup
might not actually work if it's updated between checking and updating.
+ tmp->blocksize != mem->blocksize ||
+ tmp->requestedsize != mem->requestedsize)
continue;
switch (mem->model) {
[...]
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index efaa4c5473..f16dc0a029 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -2327,8 +2327,11 @@ struct _virDomainMemoryDef {
bool pmem;
} nvdimm; /* VIR_DOMAIN_MEMORY_MODEL_NVDIMM */
struct {
+ // nodemask + hugepages + no prealloc
Leftovers?
char *path; /* Required for pmem, otherwise optional */
bool pmem;
+ virBitmapPtr sourceNodes;
+ unsigned long long pagesize; /* kibibytes */
} virtio; /* VIR_DOMAIN_MEMORY_MODEL_VIRTIO */
} s;