Add an element named "strict-hugepages" to control whether to
refuse guest initialization in case hugepage allocation cannot
be performed.
Signed-off-by: Marcelo Tosatti <mtosatti(a)redhat.com>
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in
index ff50214..e79f5e6 100644
--- a/docs/formatdomain.html.in
+++ b/docs/formatdomain.html.in
@@ -632,6 +632,9 @@
<dt><code>hugepages</code></dt>
<dd>This tells the hypervisor that the guest should have its memory
allocated using hugepages instead of the normal native page size.</dd>
+ <dt><code>strict-hugepages</code></dt>
+ <dd>This tells the hypervisor that the guest should refuse to start
+ in case of failure to allocate guest memory with hugepages</dd>
<dt><code>nosharepages</code></dt>
<dd>Instructs hypervisor to disable shared pages (memory merge, KSM) for
this domain. <span class="since">Since
1.0.6</span></dd>
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 28e24f9..f16ef0b 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -11226,6 +11226,9 @@ virDomainDefParseXML(xmlDocPtr xml,
if (virXPathBoolean("boolean(./memoryBacking/locked)", ctxt))
def->mem.locked = true;
+ if ((node = virXPathNode("./memoryBacking/stricthugepages", ctxt)))
+ def->mem.strict_hugepages = true;
+
/* Extract blkio cgroup tunables */
if (virXPathUInt("string(./blkiotune/weight)", ctxt,
&def->blkio.weight) < 0)
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index d8f2e49..8ea5cf0 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -1977,6 +1977,7 @@ struct _virDomainDef {
unsigned long long max_balloon; /* in kibibytes */
unsigned long long cur_balloon; /* in kibibytes */
bool hugepage_backed;
+ bool strict_hugepages;
bool nosharepages;
bool locked;
int dump_core; /* enum virDomainMemDump */
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index 96b8825..3f8d0a4 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -12133,10 +12133,9 @@ cleanup:
return def;
}
-
-static int qemuParseProcFileStrings(int pid_value,
- const char *name,
- char ***list)
+int qemuParseProcFileStrings(int pid_value,
+ const char *name,
+ char ***list)
{
char *path = NULL;
int ret = -1;
diff --git a/src/qemu/qemu_command.h b/src/qemu/qemu_command.h
index de7683d..bcdfefa 100644
--- a/src/qemu/qemu_command.h
+++ b/src/qemu/qemu_command.h
@@ -226,7 +226,9 @@ virDomainDefPtr qemuParseCommandLinePid(virCapsPtr qemuCaps,
char **pidfile,
virDomainChrSourceDefPtr *monConfig,
bool *monJSON);
-
+int qemuParseProcFileStrings(int pid_value,
+ const char *name,
+ char ***list);
int qemuDomainAssignAddresses(virDomainDefPtr def,
virQEMUCapsPtr qemuCaps,
virDomainObjPtr obj)
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 8bcd98e..cb8298e 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -25,6 +25,7 @@
#include <unistd.h>
#include <signal.h>
#include <sys/stat.h>
+#include <stdlib.h>
#if defined(__linux__)
# include <linux/capability.h>
#elif defined(__FreeBSD__)
@@ -3507,6 +3508,95 @@ error:
}
+/*
+ * Returns bool: whether to fail guest initialization.
+ *
+ */
+static bool qemuValidateStrictHugepage(virDomainObjPtr vm, virQEMUDriverConfigPtr cfg)
+{
+ bool ret = false;
+ char **maps = NULL;
+ int i;
+ char *buf;
+
+ if (!vm->def->mem.strict_hugepages)
+ return ret;
+
+ ret = true;
+
+ if (!vm->def->mem.hugepage_backed || !cfg->hugepagePath) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("strict huge pages depends on huge pages"));
+ return ret;
+ }
+
+ buf = malloc(strlen(cfg->hugepagePath) + 50);
+
+ /* The parser requires /proc/pid, which only exists on platforms
+ * like Linux where pid_t fits in int. */
+ if ((int) vm->pid != vm->pid ||
+ qemuParseProcFileStrings(vm->pid, "maps", &maps) < 0)
+ goto cleanup;
+
+ for (i = 0; maps && maps[i]; i++) {
+ char *endptr;
+ unsigned long start, end;
+ const char *map = maps[i];
+ bool found = false;
+
+ sprintf(buf, "%s/qemu_back_mem.pc.ram.", cfg->hugepagePath);
+ if (strstr(map,buf) != NULL)
+ found = true;
+
+ sprintf(buf, "%s/kvm.", cfg->hugepagePath);
+ if (strstr(map,buf) != NULL)
+ found = true;
+
+ if (!found)
+ continue;
+
+ errno = 0;
+ start = strtol(map, &endptr, 16);
+ if ((errno == ERANGE && (start == LONG_MAX || start == LONG_MIN))
+ || (errno != 0 && start == 0)) {
+ continue;
+ }
+
+ if (endptr && *endptr == '-')
+ endptr++;
+
+ if (!*endptr)
+ continue;
+
+ errno = 0;
+ end = strtol(endptr, NULL, 16);
+ if ((errno == ERANGE && (end == LONG_MAX || end == LONG_MIN))
+ || (errno != 0 && end == 0)) {
+ continue;
+ }
+
+ if (end-start >= vm->def->mem.max_balloon * 1024) {
+ ret = false;
+ break;
+ }
+ }
+
+ if (ret) {
+ /* FIXME: is VIR_ERR_NO_MEMORY to be used exclusively
+ * to reference libvirt allocation failures?
+ */
+ virReportError(VIR_ERR_NO_MEMORY, "%s",
+ _("guest memory not hugetlbfs backed"));
+ }
+
+cleanup:
+ for (i = 0; maps && maps[i]; i++)
+ VIR_FREE(maps[i]);
+ free(buf);
+ return ret;
+}
+
+
static bool
qemuValidateCpuMax(virDomainDefPtr def, virQEMUCapsPtr qemuCaps)
{
@@ -4071,6 +4161,13 @@ int qemuProcessStart(virConnectPtr conn,
goto cleanup;
}
+ /* enforce strict hugepage */
+ if (qemuValidateStrictHugepage(vm, cfg)) {
+ VIR_WARN("Failure to allocate hugepage backing for %s, exiting",
+ vm->def->name);
+ goto cleanup;
+ }
+
/* set default link states */
/* qemu doesn't support setting this on the command line, so
* enter the monitor */