* configure.in: Add check for mntent.h
* qemud/libvirtd_qemu.aug, qemud/test_libvirtd_qemu.aug, src/qemu.conf
Add 'hugetlbfs_mount' config parameter
* src/qemu_conf.c, src/qemu_conf.h: Check for -mem-path flag in QEMU,
and pass it when hugepages are requested.
Load hugetlbfs_mount config parameter, search for mount if not given.
* src/qemu_driver.c: Free hugetlbfs_mount/path parameter in driver shutdown.
Create directory for QEMU hugepage usage, chowning if required.
* docs/formatdomain.html.in: Document memoryBacking/hugepages elements
* docs/schemas/domain.rng: Add memoryBacking/hugepages elements to schema
* src/util.c, src/util.h, src/libvirt_private.syms: Add virFileFindMountPoint
helper API
* tests/qemuhelptest.c: Add -mem-path constants
* tests/qemuxml2argvtest.c, tests/qemuxml2xmltest.c: Add tests for hugepage
handling
* tests/qemuxml2argvdata/qemuxml2argv-hugepages.xml,
tests/qemuxml2argvdata/qemuxml2argv-hugepages.args: Data files for
hugepage tests
---
configure.in | 2 +-
docs/formatdomain.html | 8 +++-
docs/formatdomain.html.in | 8 +++
docs/schemas/domain.rng | 9 ++++
qemud/libvirtd_qemu.aug | 1 +
qemud/test_libvirtd_qemu.aug | 4 ++
src/domain_conf.c | 10 ++++-
src/domain_conf.h | 1 +
src/libvirt_private.syms | 1 +
src/qemu.conf | 13 +++++
src/qemu_conf.c | 49 ++++++++++++++++++++
src/qemu_conf.h | 3 +
src/qemu_driver.c | 34 ++++++++++++++
src/util.c | 37 ++++++++++++++-
src/util.h | 4 ++
tests/qemuhelptest.c | 6 ++-
tests/qemuxml2argvdata/qemuxml2argv-hugepages.args | 1 +
tests/qemuxml2argvdata/qemuxml2argv-hugepages.xml | 25 ++++++++++
tests/qemuxml2argvtest.c | 7 ++-
tests/qemuxml2xmltest.c | 1 +
20 files changed, 217 insertions(+), 7 deletions(-)
create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-hugepages.args
create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-hugepages.xml
diff --git a/configure.in b/configure.in
index d28c44a..43b9b46 100644
--- a/configure.in
+++ b/configure.in
@@ -83,7 +83,7 @@ dnl Availability of various not common threadsafe functions
AC_CHECK_FUNCS([strerror_r strtok_r getmntent_r getgrnam_r getpwuid_r])
dnl Availability of various common headers (non-fatal if missing).
-AC_CHECK_HEADERS([pwd.h paths.h regex.h sys/syslimits.h sys/utsname.h sys/wait.h
winsock2.h sched.h termios.h sys/poll.h syslog.h])
+AC_CHECK_HEADERS([pwd.h paths.h regex.h sys/syslimits.h sys/utsname.h sys/wait.h
winsock2.h sched.h termios.h sys/poll.h syslog.h mntent.h])
dnl Where are the XDR functions?
dnl If portablexdr is installed, prefer that.
diff --git a/docs/formatdomain.html b/docs/formatdomain.html
index 5415200..04ef05d 100644
--- a/docs/formatdomain.html
+++ b/docs/formatdomain.html
@@ -330,13 +330,19 @@
...
<memory>524288</memory>
<currentMemory>524288</currentMemory>
+ <memoryBacking>
+ <hugepages/>
+ </memoryBacking>
<vcpu>1</vcpu>
...</pre>
<dl><dt><code>memory</code></dt><dd>The
maximum allocation of memory for the guest at boot time.
The units for this value are kilobytes (i.e. blocks of 1024
bytes)</dd><dt><code>currentMemory</code></dt><dd>The
actual allocation of memory for the guest. This value
be less than the maximum allocation, to allow for ballooning
up the guests memory on the fly. If this is omitted, it defaults
- to the same value as the <code>memory<code>
element</code></code></dd><dt><code>vcpu</code></dt><dd>The
content of this element defines the number of virtual
+ to the same value as the <code>memory<code>
element</code></code></dd><dt><code>memoryBacking</code></dt><dd>The
optional <code>memoryBacking</code> element, may have an
+ <code>hugepages</code> element set within it. This tells the
+ hypervisor that the guest should have its memory allocated using
+ hugepages instead of the normal native page
size.</dd><dt><code>vcpu</code></dt><dd>The content of
this element defines the number of virtual
CPUs allocated for the guest OS.</dd></dl>
<h3>
<a name="elementsLifecycle"
id="elementsLifecycle">Lifecycle control</a>
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in
index eb12784..143551f 100644
--- a/docs/formatdomain.html.in
+++ b/docs/formatdomain.html.in
@@ -176,6 +176,9 @@
...
<memory>524288</memory>
<currentMemory>524288</currentMemory>
+ <memoryBacking>
+ <hugepages/>
+ </memoryBacking>
<vcpu>1</vcpu>
...</pre>
@@ -188,6 +191,11 @@
be less than the maximum allocation, to allow for ballooning
up the guests memory on the fly. If this is omitted, it defaults
to the same value as the <code>memory<code> element</dd>
+ <dt><code>memoryBacking</code></dt>
+ <dd>The optional <code>memoryBacking</code> element, may have an
+ <code>hugepages</code> element set within it. This tells the
+ hypervisor that the guest should have its memory allocated using
+ hugepages instead of the normal native page size.</dd>
<dt><code>vcpu</code></dt>
<dd>The content of this element defines the number of virtual
CPUs allocated for the guest OS.</dd>
diff --git a/docs/schemas/domain.rng b/docs/schemas/domain.rng
index f857301..d1dab00 100644
--- a/docs/schemas/domain.rng
+++ b/docs/schemas/domain.rng
@@ -244,6 +244,15 @@
</element>
</optional>
<optional>
+ <element name="memoryBacking">
+ <optional>
+ <element name="hugepages">
+ <empty/>
+ </element>
+ </optional>
+ </element>
+ </optional>
+ <optional>
<element name="vcpu">
<optional>
<attribute name="cpuset"/>
diff --git a/qemud/libvirtd_qemu.aug b/qemud/libvirtd_qemu.aug
index 2175e14..f0b2a5e 100644
--- a/qemud/libvirtd_qemu.aug
+++ b/qemud/libvirtd_qemu.aug
@@ -35,6 +35,7 @@ module Libvirtd_qemu =
| str_array_entry "cgroup_controllers"
| str_array_entry "cgroup_device_acl"
| str_entry "save_image_format"
+ | str_entry "hugetlbfs_mount"
(* Each enty in the config is one of the following three ... *)
let entry = vnc_entry
diff --git a/qemud/test_libvirtd_qemu.aug b/qemud/test_libvirtd_qemu.aug
index 72f2227..ac89438 100644
--- a/qemud/test_libvirtd_qemu.aug
+++ b/qemud/test_libvirtd_qemu.aug
@@ -91,6 +91,8 @@ cgroup_controllers = [ \"cpu\", \"devices\" ]
cgroup_device_acl = [ \"/dev/null\", \"/dev/full\",
\"/dev/zero\" ]
save_image_format = \"gzip\"
+
+hugetlbfs_mount = \"/dev/hugepages\"
"
test Libvirtd_qemu.lns get conf =
@@ -192,3 +194,5 @@ save_image_format = \"gzip\"
}
{ "#empty" }
{ "save_image_format" = "gzip" }
+{ "#empty" }
+{ "hugetlbfs_mount" = "/dev/hugepages" }
\ No newline at end of file
diff --git a/src/domain_conf.c b/src/domain_conf.c
index 1d2cc7c..fc97d7f 100644
--- a/src/domain_conf.c
+++ b/src/domain_conf.c
@@ -2442,6 +2442,10 @@ static virDomainDefPtr virDomainDefParseXML(virConnectPtr conn,
if (virXPathULong(conn, "string(./currentMemory[1])", ctxt,
&def->memory) < 0)
def->memory = def->maxmem;
+ node = virXPathNode(conn, "./memoryBacking/hugepages", ctxt);
+ if (node)
+ def->hugepage_backed = 1;
+
if (virXPathULong(conn, "string(./vcpu[1])", ctxt, &def->vcpus) <
0)
def->vcpus = 1;
@@ -4045,7 +4049,11 @@ char *virDomainDefFormat(virConnectPtr conn,
virBufferVSprintf(&buf, " <memory>%lu</memory>\n",
def->maxmem);
virBufferVSprintf(&buf, "
<currentMemory>%lu</currentMemory>\n",
def->memory);
-
+ if (def->hugepage_backed) {
+ virBufferAddLit(&buf, " <memoryBacking>\n");
+ virBufferAddLit(&buf, " <hugepages/>\n");
+ virBufferAddLit(&buf, " </memoryBacking>\n");
+ }
for (n = 0 ; n < def->cpumasklen ; n++)
if (def->cpumask[n] != 1)
allones = 0;
diff --git a/src/domain_conf.h b/src/domain_conf.h
index 44302be..2a6b132 100644
--- a/src/domain_conf.h
+++ b/src/domain_conf.h
@@ -524,6 +524,7 @@ struct _virDomainDef {
unsigned long memory;
unsigned long maxmem;
+ unsigned char hugepage_backed;
unsigned long vcpus;
int cpumasklen;
char *cpumask;
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 2bf4e15..bd4224a 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -400,6 +400,7 @@ virGetUserDirectory;
virGetUserName;
virGetUserID;
virGetGroupID;
+virFileFindMountPoint;
# uuid.h
diff --git a/src/qemu.conf b/src/qemu.conf
index 1f10b43..72e6165 100644
--- a/src/qemu.conf
+++ b/src/qemu.conf
@@ -139,3 +139,16 @@
# process of saving a domain in order to save disk space.
#
# save_image_format = "raw"
+
+# If provided by the host and a hugetlbfs mount point is configured,
+# a guest may request huge page backing. When this mount point is
+# unspecified here, determination of a host mount point in /proc/mounts
+# will be attempted. Specifying an explicit mount overrides detection
+# of the same in /proc/mounts. Setting the mount point to "" will
+# disable guest hugepage backing.
+#
+# NB, within this mount point, guests will create memory backing files
+# in a location of $MOUNTPOINT/libvirt/qemu
+
+# hugetlbfs_mount = "/dev/hugepages"
+
diff --git a/src/qemu_conf.c b/src/qemu_conf.c
index 22f5edd..4935e2f 100644
--- a/src/qemu_conf.c
+++ b/src/qemu_conf.c
@@ -35,6 +35,7 @@
#include <sys/wait.h>
#include <arpa/inet.h>
#include <sys/utsname.h>
+#include <mntent.h>
#include "c-ctype.h"
#include "virterror_internal.h"
@@ -87,6 +88,7 @@ VIR_ENUM_IMPL(qemuVideo, VIR_DOMAIN_VIDEO_TYPE_LAST,
NULL, /* no arg needed for xen */
NULL /* don't support vbox */);
+#define PROC_MOUNT_BUF_LEN 255
int qemudLoadDriverConfig(struct qemud_driver *driver,
const char *filename) {
@@ -106,6 +108,19 @@ int qemudLoadDriverConfig(struct qemud_driver *driver,
return -1;
}
+ /* For privilege driver, try and find hugepage mount automatically.
+ * Non-privileged driver requires admin to create a dir for the
+ * user, chown it, and then let user configure it manually */
+ if (driver->privileged &&
+ !(driver->hugetlbfs_mount = virFileFindMountPoint("hugetlbfs"))) {
+ if (errno != ENOENT) {
+ virReportSystemError(NULL, errno, "%s",
+ _("unable to find hugetlbfs mountpoint"));
+ return -1;
+ }
+ }
+
+
/* Just check the file is readable before opening it, otherwise
* libvirt emits an error.
*/
@@ -290,10 +305,22 @@ int qemudLoadDriverConfig(struct qemud_driver *driver,
}
}
+ p = virConfGetValue (conf, "hugetlbfs_mount");
+ CHECK_TYPE ("hugetlbfs_mount", VIR_CONF_STRING);
+ if (p && p->str) {
+ VIR_FREE(driver->hugetlbfs_mount);
+ if (!(driver->hugetlbfs_mount = strdup(p->str))) {
+ virReportOOMError(NULL);
+ virConfFree(conf);
+ return -1;
+ }
+ }
+
virConfFree (conf);
return 0;
}
+
struct qemu_feature_flags {
const char *name;
const int default_on;
@@ -784,6 +811,8 @@ static unsigned int qemudComputeCmdFlags(const char *help,
flags |= QEMUD_CMD_FLAG_DRIVE_BOOT;
if (strstr(help, "-pcidevice"))
flags |= QEMUD_CMD_FLAG_PCIDEVICE;
+ if (strstr(help, "-mem-path"))
+ flags |= QEMUD_CMD_FLAG_MEM_PATH;
if (version >= 9000)
flags |= QEMUD_CMD_FLAG_VNC_COLON;
@@ -1583,6 +1612,26 @@ int qemudBuildCommandLine(virConnectPtr conn,
ADD_ARG_LIT("-no-kvm");
ADD_ARG_LIT("-m");
ADD_ARG_LIT(memory);
+ if (def->hugepage_backed) {
+ if (!driver->hugetlbfs_mount) {
+ qemudReportError(conn, NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+ "%s", _("hugetlbfs filesystem is not
mounted"));
+ goto error;
+ }
+ if (!driver->hugepage_path) {
+ qemudReportError(conn, NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+ "%s", _("hugepages are disabled by
administrator config"));
+ goto error;
+ }
+ if (!(qemuCmdFlags & QEMUD_CMD_FLAG_MEM_PATH)) {
+ qemudReportError(conn, NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+ _("hugepage backing not supported by
'%s'"),
+ def->emulator);
+ goto error;
+ }
+ ADD_ARG_LIT("-mem-path");
+ ADD_ARG_LIT(driver->hugepage_path);
+ }
ADD_ARG_LIT("-smp");
ADD_ARG_LIT(vcpus);
diff --git a/src/qemu_conf.h b/src/qemu_conf.h
index a126dac..f126940 100644
--- a/src/qemu_conf.h
+++ b/src/qemu_conf.h
@@ -67,6 +67,7 @@ enum qemud_cmd_flags {
QEMUD_CMD_FLAG_HOST_NET_ADD = QEMUD_CMD_FLAG_0_10, /* host_net_add monitor command
*/
QEMUD_CMD_FLAG_PCIDEVICE = (1 << 17), /* PCI device assignment only
supported by qemu-kvm */
+ QEMUD_CMD_FLAG_MEM_PATH = (1 << 18), /* mmap'ped guest backing
supported */
};
/* Main driver state */
@@ -99,6 +100,8 @@ struct qemud_driver {
char *vncListen;
char *vncPassword;
char *vncSASLdir;
+ char *hugetlbfs_mount;
+ char *hugepage_path;
virCapsPtr caps;
diff --git a/src/qemu_driver.c b/src/qemu_driver.c
index eb22940..4140a63 100644
--- a/src/qemu_driver.c
+++ b/src/qemu_driver.c
@@ -542,6 +542,38 @@ qemudStartup(int privileged) {
goto error;
}
+ /* If hugetlbfs is present, then we need to create a sub-directory within
+ * it, since we can't assume the root mount point has permissions that
+ * will let our spawned QEMU instances use it.
+ *
+ * NB the check for '/', since user may config "" to disable
hugepages
+ * even when mounted
+ */
+ if (qemu_driver->hugetlbfs_mount &&
+ qemu_driver->hugetlbfs_mount[0] == '/') {
+ char *mempath = NULL;
+ int ret;
+ if (virAsprintf(&mempath, "%s/libvirt/qemu",
qemu_driver->hugetlbfs_mount) < 0)
+ goto out_of_memory;
+
+ if ((ret = virFileMakePath(mempath)) != 0) {
+ virReportSystemError(NULL, ret,
+ _("unable to create hugepage path %s"),
mempath);
+ VIR_FREE(mempath);
+ goto error;
+ }
+ if (qemu_driver->privileged &&
+ chown(mempath, qemu_driver->user, qemu_driver->group) < 0) {
+ virReportSystemError(NULL, errno,
+ _("unable to set ownership on %s to %d:%d"),
+ mempath, qemu_driver->user, qemu_driver->group);
+ VIR_FREE(mempath);
+ goto error;
+ }
+
+ qemu_driver->hugepage_path = mempath;
+ }
+
/* Get all the running persistent or transient configs first */
if (virDomainLoadAllConfigs(NULL,
qemu_driver->caps,
@@ -673,6 +705,8 @@ qemudShutdown(void) {
VIR_FREE(qemu_driver->vncPassword);
VIR_FREE(qemu_driver->vncSASLdir);
VIR_FREE(qemu_driver->saveImageFormat);
+ VIR_FREE(qemu_driver->hugetlbfs_mount);
+ VIR_FREE(qemu_driver->hugepage_path);
/* Free domain callback list */
virDomainEventCallbackListFree(qemu_driver->domainEventCallbacks);
diff --git a/src/util.c b/src/util.c
index 0d4f3fa..35efee2 100644
--- a/src/util.c
+++ b/src/util.c
@@ -60,7 +60,9 @@
#if HAVE_CAPNG
#include <cap-ng.h>
#endif
-
+#ifdef HAVE_MNTENT_H
+#include <mntent.h>
+#endif
#include "virterror_internal.h"
#include "logging.h"
@@ -1983,3 +1985,36 @@ int virGetGroupID(virConnectPtr conn,
return 0;
}
#endif
+
+
+#ifdef HAVE_MNTENT_H
+/* search /proc/mounts for mount point of *type; return pointer to
+ * malloc'ed string of the path if found, otherwise return NULL
+ */
+char *virFileFindMountPoint(const char *type)
+{
+ FILE *f;
+ struct mntent mb;
+ char mntbuf[1024];
+ char *ret = NULL;
+
+ f = setmntent("/proc/mounts", "r");
+ if (!f)
+ return NULL;
+
+ while (getmntent_r(f, &mb, mntbuf, sizeof(mntbuf))) {
+ if (STREQ(mb.mnt_type, type)) {
+ ret = strdup(mb.mnt_dir);
+ goto cleanup;
+ }
+ }
+
+cleanup:
+ endmntent(f);
+
+ if (!ret)
+ errno = ENOENT;
+
+ return ret;
+}
+#endif
diff --git a/src/util.h b/src/util.h
index b3e628a..896e1b4 100644
--- a/src/util.h
+++ b/src/util.h
@@ -233,4 +233,8 @@ int virGetGroupID(virConnectPtr conn,
int virRandomInitialize(unsigned int seed);
int virRandom(int max);
+#ifdef HAVE_MNTENT_H
+char *virFileFindMountPoint(const char *type);
+#endif
+
#endif /* __VIR_UTIL_H__ */
diff --git a/tests/qemuhelptest.c b/tests/qemuhelptest.c
index ad2045f..a42a1ba 100644
--- a/tests/qemuhelptest.c
+++ b/tests/qemuhelptest.c
@@ -105,7 +105,8 @@ mymain(int argc, char **argv)
QEMUD_CMD_FLAG_VNET_HDR |
QEMUD_CMD_FLAG_MIGRATE_KVM_STDIO |
QEMUD_CMD_FLAG_KVM |
- QEMUD_CMD_FLAG_DRIVE_FORMAT,
+ QEMUD_CMD_FLAG_DRIVE_FORMAT |
+ QEMUD_CMD_FLAG_MEM_PATH,
9001, 1, 74);
DO_TEST("qemu-0.10.5",
QEMUD_CMD_FLAG_KQEMU |
@@ -136,7 +137,8 @@ mymain(int argc, char **argv)
QEMUD_CMD_FLAG_DRIVE_FORMAT |
QEMUD_CMD_FLAG_VGA |
QEMUD_CMD_FLAG_0_10 |
- QEMUD_CMD_FLAG_PCIDEVICE,
+ QEMUD_CMD_FLAG_PCIDEVICE |
+ QEMUD_CMD_FLAG_MEM_PATH,
10005, 1, 0);
DO_TEST("kvm-86",
QEMUD_CMD_FLAG_VNC_COLON |
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-hugepages.args
b/tests/qemuxml2argvdata/qemuxml2argv-hugepages.args
new file mode 100644
index 0000000..f10a40e
--- /dev/null
+++ b/tests/qemuxml2argvdata/qemuxml2argv-hugepages.args
@@ -0,0 +1 @@
+LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test /usr/bin/qemu -S -M pc -m 214
-mem-path /dev/hugepages/libvirt/qemu -smp 1 -nographic -monitor
unix:/tmp/test-monitor,server,nowait -no-acpi -boot c -hda /dev/HostVG/QEMUGuest1 -net
none -serial none -parallel none -usb
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-hugepages.xml
b/tests/qemuxml2argvdata/qemuxml2argv-hugepages.xml
new file mode 100644
index 0000000..e25286f
--- /dev/null
+++ b/tests/qemuxml2argvdata/qemuxml2argv-hugepages.xml
@@ -0,0 +1,25 @@
+<domain type='qemu'>
+ <name>QEMUGuest1</name>
+ <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid>
+ <memory>219200</memory>
+ <currentMemory>219200</currentMemory>
+ <memoryBacking>
+ <hugepages/>
+ </memoryBacking>
+ <vcpu>1</vcpu>
+ <os>
+ <type arch='i686' machine='pc'>hvm</type>
+ <boot dev='hd'/>
+ </os>
+ <clock offset='utc'/>
+ <on_poweroff>destroy</on_poweroff>
+ <on_reboot>restart</on_reboot>
+ <on_crash>destroy</on_crash>
+ <devices>
+ <emulator>/usr/bin/qemu</emulator>
+ <disk type='block' device='disk'>
+ <source dev='/dev/HostVG/QEMUGuest1'/>
+ <target dev='hda' bus='ide'/>
+ </disk>
+ </devices>
+</domain>
diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c
index 6f25e7d..ade57b1 100644
--- a/tests/qemuxml2argvtest.c
+++ b/tests/qemuxml2argvtest.c
@@ -160,7 +160,11 @@ mymain(int argc, char **argv)
if ((driver.caps = testQemuCapsInit()) == NULL)
return EXIT_FAILURE;
- if((driver.stateDir = strdup("/nowhere")) == NULL)
+ if ((driver.stateDir = strdup("/nowhere")) == NULL)
+ return EXIT_FAILURE;
+ if ((driver.hugetlbfs_mount = strdup("/dev/hugepages")) == NULL)
+ return EXIT_FAILURE;
+ if ((driver.hugepage_path = strdup("/dev/hugepages/libvirt/qemu")) ==
NULL)
return EXIT_FAILURE;
#define DO_TEST_FULL(name, extraFlags, migrateFrom) \
@@ -189,6 +193,7 @@ mymain(int argc, char **argv)
DO_TEST("bootloader", 0);
DO_TEST("clock-utc", 0);
DO_TEST("clock-localtime", 0);
+ DO_TEST("hugepages", QEMUD_CMD_FLAG_MEM_PATH);
DO_TEST("disk-cdrom", 0);
DO_TEST("disk-cdrom-empty", QEMUD_CMD_FLAG_DRIVE);
DO_TEST("disk-floppy", 0);
diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c
index 7db7611..7f19f78 100644
--- a/tests/qemuxml2xmltest.c
+++ b/tests/qemuxml2xmltest.c
@@ -92,6 +92,7 @@ mymain(int argc, char **argv)
DO_TEST("bootloader");
DO_TEST("clock-utc");
DO_TEST("clock-localtime");
+ DO_TEST("hugepages");
DO_TEST("disk-cdrom");
DO_TEST("disk-floppy");
DO_TEST("disk-many");
--
1.6.2.5