<memoryBacking>
<hugepages>
<page size="1" unit="G" nodeset="0-3,5"/>
<page size="2" unit="M" nodeset="4"/>
</hugepages>
</memoryBacking>
Signed-off-by: Michal Privoznik <mprivozn(a)redhat.com>
---
docs/formatdomain.html.in | 18 +-
docs/schemas/domaincommon.rng | 19 +-
src/conf/domain_conf.c | 197 +++++++++++++++++++--
src/conf/domain_conf.h | 13 +-
src/parallels/parallels_driver.c | 2 +-
src/qemu/qemu_command.c | 2 +-
src/qemu/qemu_conf.c | 20 ++-
src/qemu/qemu_process.c | 2 +-
.../qemuxml2argv-hugepages-pages.xml | 45 +++++
tests/qemuxml2xmltest.c | 1 +
10 files changed, 288 insertions(+), 31 deletions(-)
create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages.xml
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in
index 3c85fc5..f4362e6 100644
--- a/docs/formatdomain.html.in
+++ b/docs/formatdomain.html.in
@@ -617,7 +617,9 @@
<domain>
...
<memoryBacking>
- <hugepages/>
+ <hugepages>
+ <page size="1" unit="G"
nodeset="0-3,5"/>
+ <page size="2" unit="M" nodeset="4"/>
<nosharepages/>
<locked/>
</memoryBacking>
@@ -632,7 +634,19 @@
<dl>
<dt><code>hugepages</code></dt>
<dd>This tells the hypervisor that the guest should have its memory
- allocated using hugepages instead of the normal native page size.</dd>
+ allocated using hugepages instead of the normal native page size.
+ <span class='since'>Since 1.2.5</span> it's possible to set
hugepages
+ more specifically per numa node. The <code>page</code> element is
+ introduced. It has one compulsory attribute <code>size</code> which
+ specifies which hugepages should be used (especially useful on systems
+ supporting hugepages of different sizes). The default unit for the
+ <code>size</code> attribute is kilobytes (multiplier of 1024). If you
+ want to use different unit, use optional <code>unit</code> attribute.
+ For systems with NUMA, the optional <code>nodeset</code> attribute may
+ come handy as it ties given guest's NUMA nodes to certain hugepage
+ sizes. From the example snippet, one gigabyte hugepages are used for
+ every NUMA node except node number four. For the correct syntax see
+ <a href="#elementsNUMATuning">this</a>.</dd>
<dt><code>nosharepages</code></dt>
<dd>Instructs hypervisor to disable shared pages (memory merge, KSM) for
this domain. <span class="since">Since
1.0.6</span></dd>
diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng
index 2caeef9..d9da0bc 100644
--- a/docs/schemas/domaincommon.rng
+++ b/docs/schemas/domaincommon.rng
@@ -567,7 +567,24 @@
<interleave>
<optional>
<element name="hugepages">
- <empty/>
+ <zeroOrMore>
+ <element name="page">
+ <attribute name="size">
+ <ref name="unsignedLong"/>
+ </attribute>
+ <optional>
+ <attribute name='unit'>
+ <ref name='unit'/>
+ </attribute>
+ </optional>
+ <optional>
+ <attribute name="nodeset">
+ <ref name='cpuset'/>
+ </attribute>
+ </optional>
+ <empty/>
+ </element>
+ </zeroOrMore>
</element>
</optional>
<optional>
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index a1ef374..b49bcb0 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -11258,6 +11258,57 @@ virDomainParseMemory(const char *xpath, xmlXPathContextPtr ctxt,
}
+static int
+virDomainHugepagesParseXML(xmlNodePtr node,
+ xmlXPathContextPtr ctxt,
+ virDomainHugePagePtr hugepage)
+{
+ int ret = -1;
+ xmlNodePtr oldnode = ctxt->node;
+ unsigned long long bytes, max;
+ char *unit = NULL, *nodeset = NULL;
+
+ ctxt->node = node;
+
+ /* On 32-bit machines, our bound is 0xffffffff * KiB. On 64-bit
+ * machines, our bound is off_t (2^63). */
+ if (sizeof(unsigned long) < sizeof(long long))
+ max = 1024ull * ULONG_MAX;
+ else
+ max = LLONG_MAX;
+
+ if (virXPathULongLong("string(./@size)", ctxt, &bytes) < 0) {
+ virReportError(VIR_ERR_XML_DETAIL, "%s",
+ _("unable to parse size attribute"));
+ goto cleanup;
+ }
+
+ unit = virXPathString("string(./@unit)", ctxt);
+
+ if (virScaleInteger(&bytes, unit, 1024, max) < 0)
+ goto cleanup;
+
+ if (!(hugepage->size = VIR_DIV_UP(bytes, 1024))) {
+ virReportError(VIR_ERR_XML_DETAIL, "%s",
+ _("hugepage size can't be zero"));
+ goto cleanup;
+ }
+
+ if ((nodeset = virXMLPropString(node, "nodeset"))) {
+ if (virBitmapParse(nodeset, 0, &hugepage->nodemask,
+ VIR_DOMAIN_CPUMASK_LEN) < 0)
+ goto cleanup;
+ }
+
+ ret = 0;
+ cleanup:
+ VIR_FREE(unit);
+ VIR_FREE(nodeset);
+ ctxt->node = oldnode;
+ return ret;
+}
+
+
static virDomainResourceDefPtr
virDomainResourceDefParse(xmlNodePtr node,
xmlXPathContextPtr ctxt)
@@ -11325,7 +11376,7 @@ virDomainDefParseXML(xmlDocPtr xml,
{
xmlNodePtr *nodes = NULL, node = NULL;
char *tmp = NULL;
- size_t i;
+ size_t i, j;
int n;
long id = -1;
virDomainDefPtr def;
@@ -11475,8 +11526,55 @@ virDomainDefParseXML(xmlDocPtr xml,
def->mem.cur_balloon = def->mem.max_balloon;
}
- if ((node = virXPathNode("./memoryBacking/hugepages", ctxt)))
- def->mem.hugepage_backed = true;
+
+ if ((n = virXPathNodeSet("./memoryBacking/hugepages/page", ctxt,
&nodes)) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("cannot extract hugepages nodes"));
+ goto error;
+ }
+
+ if (n) {
+ if (VIR_ALLOC_N(def->mem.hugepages, n) < 0)
+ goto error;
+
+ for (i = 0; i < n; i++) {
+ if (virDomainHugepagesParseXML(nodes[i], ctxt,
+ &def->mem.hugepages[i]) < 0)
+ goto error;
+ def->mem.nhugepages++;
+
+ for (j = 0; j < i; j++) {
+ if (def->mem.hugepages[i].nodemask &&
+ def->mem.hugepages[j].nodemask &&
+ virBitmapDoesIntersect(def->mem.hugepages[i].nodemask,
+ def->mem.hugepages[j].nodemask)) {
+ virReportError(VIR_ERR_XML_DETAIL,
+ _("nodeset attribute of hugepages "
+ "of sizes %llu and %llu intersect"),
+ def->mem.hugepages[i].size,
+ def->mem.hugepages[j].size);
+ goto error;
+ } else if (!def->mem.hugepages[i].nodemask &&
+ !def->mem.hugepages[j].nodemask) {
+ virReportError(VIR_ERR_XML_DETAIL,
+ _("two master hugepages detected: "
+ "%llu and %llu"),
+ def->mem.hugepages[i].size,
+ def->mem.hugepages[j].size);
+ goto error;
+ }
+ }
+ }
+
+ VIR_FREE(nodes);
+ } else {
+ if ((node = virXPathNode("./memoryBacking/hugepages", ctxt))) {
+ if (VIR_ALLOC(def->mem.hugepages) < 0)
+ goto error;
+
+ def->mem.nhugepages = 1;
+ }
+ }
if ((node = virXPathNode("./memoryBacking/nosharepages", ctxt)))
def->mem.nosharepages = true;
@@ -11498,7 +11596,6 @@ virDomainDefParseXML(xmlDocPtr xml,
goto error;
for (i = 0; i < n; i++) {
- size_t j;
if (virDomainBlkioDeviceParseXML(nodes[i],
&def->blkio.devices[i]) < 0)
goto error;
@@ -12383,7 +12480,6 @@ virDomainDefParseXML(xmlDocPtr xml,
if (chr->target.port == -1) {
int maxport = -1;
- size_t j;
for (j = 0; j < i; j++) {
if (def->parallels[j]->target.port > maxport)
maxport = def->parallels[j]->target.port;
@@ -12411,7 +12507,6 @@ virDomainDefParseXML(xmlDocPtr xml,
if (chr->target.port == -1) {
int maxport = -1;
- size_t j;
for (j = 0; j < i; j++) {
if (def->serials[j]->target.port > maxport)
maxport = def->serials[j]->target.port;
@@ -12469,7 +12564,6 @@ virDomainDefParseXML(xmlDocPtr xml,
if (chr->info.type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_VIRTIO_SERIAL &&
chr->info.addr.vioserial.port == 0) {
int maxport = 0;
- size_t j;
for (j = 0; j < i; j++) {
virDomainChrDefPtr thischr = def->channels[j];
if (thischr->info.type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_VIRTIO_SERIAL
&&
@@ -12586,7 +12680,7 @@ virDomainDefParseXML(xmlDocPtr xml,
if (n && VIR_ALLOC_N(def->videos, n) < 0)
goto error;
for (i = 0; i < n; i++) {
- size_t j = def->nvideos;
+ j = def->nvideos;
virDomainVideoDefPtr video = virDomainVideoDefParseXML(nodes[j],
def,
flags);
@@ -14024,13 +14118,38 @@ virDomainDefCheckABIStability(virDomainDefPtr src,
dst->mem.cur_balloon, src->mem.cur_balloon);
goto error;
}
- if (src->mem.hugepage_backed != dst->mem.hugepage_backed) {
+ if (src->mem.nhugepages != dst->mem.nhugepages) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
- _("Target domain huge page backing %d does not match source
%d"),
- dst->mem.hugepage_backed,
- src->mem.hugepage_backed);
+ _("Target domain huge pages count %zu does not match source
%zu"),
+ dst->mem.nhugepages, src->mem.nhugepages);
goto error;
}
+ for (i = 0; i < src->mem.nhugepages; i++) {
+ virDomainHugePagePtr src_huge = &src->mem.hugepages[i];
+ virDomainHugePagePtr dst_huge = &dst->mem.hugepages[i];
+
+ if (src_huge->size != dst_huge->size) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Target domain huge page size %llu "
+ "does not match source %llu"),
+ dst_huge->size, src_huge->size);
+ goto error;
+ }
+
+ if (src_huge->nodemask && dst_huge->nodemask) {
+ if (!virBitmapEqual(src_huge->nodemask, dst_huge->nodemask)) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("Target huge page nodemask does not match
source"));
+ goto error;
+ }
+ } else {
+ if (src_huge->nodemask || dst_huge->nodemask) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("Target huge page nodemask does not match
source"));
+ goto error;
+ }
+ }
+ }
if (src->vcpus != dst->vcpus) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
@@ -17136,6 +17255,54 @@ virDomainResourceDefFormat(virBufferPtr buf,
}
+static int
+virDomainHugepagesFormatBuf(virBufferPtr buf,
+ virDomainHugePagePtr hugepage)
+{
+ int ret = -1;
+
+ virBufferAsprintf(buf, "<page size='%llu' unit='KiB'",
+ hugepage->size);
+
+ if (hugepage->nodemask) {
+ char *nodeset = NULL;
+ if (!(nodeset = virBitmapFormat(hugepage->nodemask)))
+ goto cleanup;
+ virBufferAsprintf(buf, " nodeset='%s'", nodeset);
+ VIR_FREE(nodeset);
+ }
+
+ virBufferAddLit(buf, "/>\n");
+
+ ret = 0;
+ cleanup:
+ return ret;
+}
+
+static void
+virDomainHugepagesFormat(virBufferPtr buf,
+ virDomainHugePagePtr hugepages,
+ size_t nhugepages)
+{
+ size_t i;
+
+ if (nhugepages == 1 &&
+ hugepages[0].size == 0) {
+ virBufferAddLit(buf, "<hugepages/>\n");
+ return;
+ }
+
+ virBufferAddLit(buf, "<hugepages>\n");
+ virBufferAdjustIndent(buf, 2);
+
+ for (i = 0; i < nhugepages; i++)
+ virDomainHugepagesFormatBuf(buf, &hugepages[i]);
+
+ virBufferAdjustIndent(buf, -2);
+ virBufferAddLit(buf, "</hugepages>\n");
+}
+
+
#define DUMPXML_FLAGS \
(VIR_DOMAIN_XML_SECURE | \
VIR_DOMAIN_XML_INACTIVE | \
@@ -17319,11 +17486,11 @@ virDomainDefFormatInternal(virDomainDefPtr def,
virBufferAddLit(buf, "</memtune>\n");
}
- if (def->mem.hugepage_backed || def->mem.nosharepages || def->mem.locked) {
+ if (def->mem.nhugepages || def->mem.nosharepages || def->mem.locked) {
virBufferAddLit(buf, "<memoryBacking>\n");
virBufferAdjustIndent(buf, 2);
- if (def->mem.hugepage_backed)
- virBufferAddLit(buf, "<hugepages/>\n");
+ if (def->mem.nhugepages)
+ virDomainHugepagesFormat(buf, def->mem.hugepages,
def->mem.nhugepages);
if (def->mem.nosharepages)
virBufferAddLit(buf, "<nosharepages/>\n");
if (def->mem.locked)
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index 4c9b7e8..61f057c 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -1848,6 +1848,14 @@ struct _virDomainResourceDef {
char *partition;
};
+typedef struct _virDomaiHugePage virDomainHugePage;
+typedef virDomainHugePage *virDomainHugePagePtr;
+
+struct _virDomaiHugePage {
+ virBitmapPtr nodemask; /* guest's NUMA node mask */
+ unsigned long long size; /* hugepage size in KiB */
+};
+
/*
* Guest VM main configuration
*
@@ -1874,7 +1882,10 @@ struct _virDomainDef {
struct {
unsigned long long max_balloon; /* in kibibytes */
unsigned long long cur_balloon; /* in kibibytes */
- bool hugepage_backed;
+
+ virDomainHugePagePtr hugepages;
+ size_t nhugepages;
+
bool nosharepages;
bool locked;
int dump_core; /* enum virDomainMemDump */
diff --git a/src/parallels/parallels_driver.c b/src/parallels/parallels_driver.c
index a503dea..bb9538f 100644
--- a/src/parallels/parallels_driver.c
+++ b/src/parallels/parallels_driver.c
@@ -2023,7 +2023,7 @@ parallelsApplyChanges(virDomainObjPtr dom, virDomainDefPtr new)
return -1;
}
- if (old->mem.hugepage_backed != new->mem.hugepage_backed ||
+ if (old->mem.nhugepages != new->mem.nhugepages ||
old->mem.hard_limit != new->mem.hard_limit ||
old->mem.soft_limit != new->mem.soft_limit ||
old->mem.min_guarantee != new->mem.min_guarantee ||
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index b14ce83..0b8cef5 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -7332,7 +7332,7 @@ qemuBuildCommandLine(virConnectPtr conn,
virCommandAddArg(cmd, "-m");
def->mem.max_balloon = VIR_DIV_UP(def->mem.max_balloon, 1024) * 1024;
virCommandAddArgFormat(cmd, "%llu", def->mem.max_balloon / 1024);
- if (def->mem.hugepage_backed) {
+ if (def->mem.nhugepages) {
char *mem_path;
if (!cfg->nhugetlbfs) {
diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c
index cf5ce97..03593d6 100644
--- a/src/qemu/qemu_conf.c
+++ b/src/qemu/qemu_conf.c
@@ -611,15 +611,17 @@ int virQEMUDriverConfigLoadFile(virQEMUDriverConfigPtr cfg,
} else {
CHECK_TYPE("hugetlbfs_mount", VIR_CONF_STRING);
if (p && p->str) {
- if (VIR_REALLOC_N(cfg->hugetlbfs, 1) < 0)
- goto cleanup;
- cfg->nhugetlbfs = 1;
- if (virQEMUDriverConfigHugeTLBFSInit(&cfg->hugetlbfs[0],
- p->str, true) < 0)
- goto cleanup;
- } else {
- VIR_FREE(cfg->hugetlbfs);
- cfg->nhugetlbfs = 0;
+ if (STREQ(p->str, "")) {
+ VIR_FREE(cfg->hugetlbfs);
+ cfg->nhugetlbfs = 0;
+ } else {
+ if (VIR_REALLOC_N(cfg->hugetlbfs, 1) < 0)
+ goto cleanup;
+ cfg->nhugetlbfs = 1;
+ if (virQEMUDriverConfigHugeTLBFSInit(&cfg->hugetlbfs[0],
+ p->str, true) < 0)
+ goto cleanup;
+ }
}
}
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 16d03d8..d898aad 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -3791,7 +3791,7 @@ int qemuProcessStart(virConnectPtr conn,
}
virDomainAuditSecurityLabel(vm, true);
- if (vm->def->mem.hugepage_backed) {
+ if (vm->def->mem.nhugepages) {
for (i = 0; i < cfg->nhugetlbfs; i++) {
char *hugepagePath = qemuGetHugepagePath(&cfg->hugetlbfs[i]);
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages.xml
b/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages.xml
new file mode 100644
index 0000000..5ad0695
--- /dev/null
+++ b/tests/qemuxml2argvdata/qemuxml2argv-hugepages-pages.xml
@@ -0,0 +1,45 @@
+<domain type='qemu'>
+ <name>QEMUGuest1</name>
+ <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid>
+ <memory unit='KiB'>4194304</memory>
+ <currentMemory unit='KiB'>4194304</currentMemory>
+ <memoryBacking>
+ <hugepages>
+ <page size='2048' unit='KiB' nodeset='1'/>
+ <page size='1048576' unit='KiB' nodeset='0,2-3'/>
+ </hugepages>
+ </memoryBacking>
+ <vcpu placement='static'>4</vcpu>
+ <numatune>
+ <memory mode='strict' nodeset='0-3'/>
+ <memnode cellid='3' mode='strict' nodeset='3'/>
+ </numatune>
+ <os>
+ <type arch='i686' machine='pc'>hvm</type>
+ <boot dev='hd'/>
+ </os>
+ <cpu>
+ <numa>
+ <cell id='0' cpus='0' memory='1048576'/>
+ <cell id='1' cpus='1' memory='1048576'/>
+ <cell id='2' cpus='2' memory='1048576'/>
+ <cell id='3' cpus='3' memory='1048576'/>
+ </numa>
+ </cpu>
+ <clock offset='utc'/>
+ <on_poweroff>destroy</on_poweroff>
+ <on_reboot>restart</on_reboot>
+ <on_crash>destroy</on_crash>
+ <devices>
+ <emulator>/usr/bin/qemu</emulator>
+ <disk type='block' device='disk'>
+ <source dev='/dev/HostVG/QEMUGuest1'/>
+ <target dev='hda' bus='ide'/>
+ <address type='drive' controller='0' bus='0'
target='0' unit='0'/>
+ </disk>
+ <controller type='usb' index='0'/>
+ <controller type='ide' index='0'/>
+ <controller type='pci' index='0' model='pci-root'/>
+ <memballoon model='virtio'/>
+ </devices>
+</domain>
diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c
index cefe05b..09cb228 100644
--- a/tests/qemuxml2xmltest.c
+++ b/tests/qemuxml2xmltest.c
@@ -197,6 +197,7 @@ mymain(void)
DO_TEST("hyperv-off");
DO_TEST("hugepages");
+ DO_TEST("hugepages-pages");
DO_TEST("nosharepages");
DO_TEST("disk-aio");
DO_TEST("disk-cdrom");
--
1.8.5.5