[libvirt] [PATCH v2 0/7] Expose host's huge pages capability

diff to v1: - Expose all page size not only huge ones Michal Privoznik (7): virnuma: Introduce virNumaNodeIsAvailable nodeinfo: Rename nodeGetFreeMemory to nodeGetMemory virnuma: Introduce pages helpers virCaps: expose pages info Introduce virNodeGetFreePages virsh: Expose virNodeGetFreePages nodeinfo: Implement nodeGetFreePages daemon/remote.c | 52 +++++++ docs/schemas/capability.rng | 21 +++ include/libvirt/libvirt.h.in | 7 + src/bhyve/bhyve_driver.c | 7 +- src/conf/capabilities.c | 25 ++- src/conf/capabilities.h | 15 +- src/driver.h | 10 ++ src/internal.h | 12 ++ src/libvirt.c | 95 ++++++++++++ src/libvirt_private.syms | 6 +- src/libvirt_public.syms | 4 + src/libxl/libxl_conf.c | 1 + src/lxc/lxc_driver.c | 26 +++- src/nodeinfo.c | 170 ++++++++++++++++---- src/nodeinfo.h | 8 +- src/openvz/openvz_driver.c | 5 +- src/qemu/qemu_capabilities.c | 29 +++- src/qemu/qemu_driver.c | 26 +++- src/remote/remote_driver.c | 50 ++++++ src/remote/remote_protocol.x | 20 ++- src/remote_protocol-structs | 16 ++ src/test/test_driver.c | 2 +- src/uml/uml_driver.c | 26 +++- src/util/virnuma.c | 361 ++++++++++++++++++++++++++++++++++++++++++- src/util/virnuma.h | 11 ++ src/vbox/vbox_tmpl.c | 21 ++- src/xen/xend_internal.c | 1 + tests/vircaps2xmltest.c | 3 +- tests/vircapstest.c | 1 + tools/virsh-host.c | 167 ++++++++++++++++++++ tools/virsh.pod | 8 + 31 files changed, 1161 insertions(+), 45 deletions(-) -- 1.8.5.5

Not on all hosts the set of NUMA nodes IDs is continuous. This is critical, because our code currently assumes the set doesn't contain holes. For instance in nodeGetFreeMemory() we can see the following pattern: if ((max_node = virNumaGetMaxNode()) < 0) return 0; for (n = 0; n <= max_node; n++) { ... } while it should be something like this: if ((max_node = virNumaGetMaxNode()) < 0) return 0; for (n = 0; n <= max_node; n++) { if (!virNumaNodeIsAvailable(n)) continue; ... } Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/libvirt_private.syms | 1 + src/util/virnuma.c | 36 ++++++++++++++++++++++++++++++++++-- src/util/virnuma.h | 1 + 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 122c572..946b264 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1663,6 +1663,7 @@ virNumaGetDistances; virNumaGetMaxNode; virNumaGetNodeMemory; virNumaIsAvailable; +virNumaNodeIsAvailable; virNumaSetupMemoryPolicy; virNumaTuneMemPlacementModeTypeFromString; virNumaTuneMemPlacementModeTypeToString; diff --git a/src/util/virnuma.c b/src/util/virnuma.c index 1676208..5814cba 100644 --- a/src/util/virnuma.c +++ b/src/util/virnuma.c @@ -407,6 +407,23 @@ virNumaGetMaxCPUs(void) #ifdef HAVE_NUMA_BITMASK_ISBITSET /** + * virNumaNodeIsAvailable: + * @node: node to check + * + * On some hosts the set of NUMA nodes isn't continuous. + * Use this function to test if the @node is available. + * + * Returns: true if @node is available, + * false if @node doesn't exist + */ +bool +virNumaNodeIsAvailable(int node) +{ + return numa_bitmask_isbitset(numa_nodes_ptr, node); +} + + +/** * virNumaGetDistances: * @node: identifier of the requested NUMA node * @distances: array of distances to sibling nodes @@ -434,7 +451,7 @@ virNumaGetDistances(int node, int max_node; size_t i; - if (!numa_bitmask_isbitset(numa_nodes_ptr, node)) { + if (!virNumaNodeIsAvailable(node)) { VIR_DEBUG("Node %d does not exist", node); *distances = NULL; *ndistances = 0; @@ -450,7 +467,7 @@ virNumaGetDistances(int node, *ndistances = max_node + 1; for (i = 0; i<= max_node; i++) { - if (!numa_bitmask_isbitset(numa_nodes_ptr, i)) + if (!virNumaNodeIsAvailable(node)) continue; (*distances)[i] = numa_distance(node, i); @@ -460,7 +477,22 @@ virNumaGetDistances(int node, cleanup: return ret; } + + #else +bool +virNumaNodeIsAvailable(int node) +{ + int max_node = virNumaGetMaxNode(); + + if (max_node < 0) + return false; + + /* Do we have anything better? */ + return (node >= 0) && (node < max_node); +} + + int virNumaGetDistances(int node ATTRIBUTE_UNUSED, int **distances, diff --git a/src/util/virnuma.h b/src/util/virnuma.h index fe1e966..772296d 100644 --- a/src/util/virnuma.h +++ b/src/util/virnuma.h @@ -58,6 +58,7 @@ int virNumaSetupMemoryPolicy(virNumaTuneDef numatune, bool virNumaIsAvailable(void); int virNumaGetMaxNode(void); +bool virNumaNodeIsAvailable(int node); int virNumaGetDistances(int node, int **distances, int *ndistances); -- 1.8.5.5

On Mon, Jun 16, 2014 at 05:08:24PM +0200, Michal Privoznik wrote:
Not on all hosts the set of NUMA nodes IDs is continuous. This is critical, because our code currently assumes the set doesn't contain holes. For instance in nodeGetFreeMemory() we can see the following pattern:
if ((max_node = virNumaGetMaxNode()) < 0) return 0;
for (n = 0; n <= max_node; n++) { ... }
while it should be something like this:
if ((max_node = virNumaGetMaxNode()) < 0) return 0;
for (n = 0; n <= max_node; n++) { if (!virNumaNodeIsAvailable(n)) continue; ... }
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/libvirt_private.syms | 1 + src/util/virnuma.c | 36 ++++++++++++++++++++++++++++++++++-- src/util/virnuma.h | 1 + 3 files changed, 36 insertions(+), 2 deletions(-)
ACK Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

For future work we want to info for not only the free memory but overall memory size too. That's why the function must have new signature too. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/bhyve/bhyve_driver.c | 7 +++- src/libvirt_private.syms | 2 +- src/lxc/lxc_driver.c | 7 +++- src/nodeinfo.c | 96 +++++++++++++++++++++++++++++++++------------- src/nodeinfo.h | 3 +- src/openvz/openvz_driver.c | 5 ++- src/qemu/qemu_driver.c | 7 +++- src/uml/uml_driver.c | 7 +++- src/vbox/vbox_tmpl.c | 5 ++- 9 files changed, 105 insertions(+), 34 deletions(-) diff --git a/src/bhyve/bhyve_driver.c b/src/bhyve/bhyve_driver.c index 89f73ff..bb9bcb7 100644 --- a/src/bhyve/bhyve_driver.c +++ b/src/bhyve/bhyve_driver.c @@ -1244,10 +1244,15 @@ bhyveConnectGetMaxVcpus(virConnectPtr conn ATTRIBUTE_UNUSED, static unsigned long long bhyveNodeGetFreeMemory(virConnectPtr conn) { + unsigned long long freeMem; + if (virNodeGetFreeMemoryEnsureACL(conn) < 0) return 0; - return nodeGetFreeMemory(); + if (nodeGetMemory(NULL, &freeMem) < 0) + return 0; + + return freeMem; } static int diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 946b264..18fde54 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -875,8 +875,8 @@ nodeGetCPUBitmap; nodeGetCPUCount; nodeGetCPUMap; nodeGetCPUStats; -nodeGetFreeMemory; nodeGetInfo; +nodeGetMemory; nodeGetMemoryParameters; nodeGetMemoryStats; nodeSetMemoryParameters; diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c index d2852a7..ab227d0 100644 --- a/src/lxc/lxc_driver.c +++ b/src/lxc/lxc_driver.c @@ -5477,10 +5477,15 @@ lxcNodeGetCellsFreeMemory(virConnectPtr conn, static unsigned long long lxcNodeGetFreeMemory(virConnectPtr conn) { + unsigned long long freeMem; + if (virNodeGetFreeMemoryEnsureACL(conn) < 0) return 0; - return nodeGetFreeMemory(); + if (nodeGetMemory(NULL, &freeMem) < 0) + return 0; + + return freeMem; } diff --git a/src/nodeinfo.c b/src/nodeinfo.c index 5332ede..fd831b4 100644 --- a/src/nodeinfo.c +++ b/src/nodeinfo.c @@ -1683,37 +1683,66 @@ nodeGetCellsFreeMemoryFake(unsigned long long *freeMems, return 1; } -static unsigned long long -nodeGetFreeMemoryFake(void) +static int +nodeGetMemoryFake(unsigned long long *mem, + unsigned long long *freeMem) { + int ret = -1; + #if defined(__FreeBSD__) unsigned long pagesize = getpagesize(); u_int value; size_t value_size = sizeof(value); - unsigned long long freemem; - if (sysctlbyname("vm.stats.vm.v_free_count", &value, - &value_size, NULL, 0) < 0) { - virReportSystemError(errno, "%s", - _("sysctl failed for vm.stats.vm.v_free_count")); - return 0; + if (mem) { + if (sysctlbyname("vm.stats.vm.v_page_count", &value, + &value_size, NULL, 0) < 0) { + virReportSystemError(errno, "%s", + _("sysctl failed for vm.stats.vm.v_page_count")); + goto cleanup; + } + *mem = value * (unsigned long long)pagesize; } - freemem = value * (unsigned long long)pagesize; + if (freeMem) { + if (sysctlbyname("vm.stats.vm.v_free_count", &value, + &value_size, NULL, 0) < 0) { + virReportSystemError(errno, "%s", + _("sysctl failed for vm.stats.vm.v_free_count")); + goto cleanup; + } + + *freeMem = value * (unsigned long long)pagesize; + } - return freemem; #else - double avail = physmem_available(); - unsigned long long ret; + if (mem) { + double total = physmem_total(); + if (!total) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Cannot determine free memory")); + goto cleanup; + } - if (!(ret = (unsigned long long)avail)) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("Cannot determine free memory")); - return 0; + *mem = (unsigned long long) total; } - return ret; + if (freeMem) { + double avail = physmem_available(); + + if (!avail) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Cannot determine free memory")); + goto cleanup; + } + + *freeMem = (unsigned long long) avail; + } #endif + + ret = 0; + cleanup: + return ret; } /* returns 1 on success, 0 if the detection failed and -1 on hard error */ @@ -1914,25 +1943,40 @@ nodeGetCellsFreeMemory(unsigned long long *freeMems, return ret; } -unsigned long long -nodeGetFreeMemory(void) +int +nodeGetMemory(unsigned long long *mem, + unsigned long long *freeMem) { - unsigned long long mem; - unsigned long long freeMem = 0; int max_node; int n; + if (mem) + *mem = 0; + + if (freeMem) + *freeMem = 0; + if (!virNumaIsAvailable()) - return nodeGetFreeMemoryFake(); + return nodeGetMemoryFake(mem, freeMem); if ((max_node = virNumaGetMaxNode()) < 0) - return 0; + return -1; for (n = 0; n <= max_node; n++) { - virNumaGetNodeMemory(n, NULL, &mem); + unsigned long long tmp_mem = 0, tmp_freeMem = 0; - freeMem += mem; + if (!virNumaNodeIsAvailable(n)) + continue; + + if (virNumaGetNodeMemory(n, &tmp_mem, &tmp_freeMem) < 0) + return -1; + + if (mem) + *mem += tmp_mem; + + if (freeMem) + *freeMem += tmp_freeMem; } - return freeMem; + return 0; } diff --git a/src/nodeinfo.h b/src/nodeinfo.h index c81fcbb..e7ec144 100644 --- a/src/nodeinfo.h +++ b/src/nodeinfo.h @@ -40,7 +40,8 @@ int nodeGetMemoryStats(int cellNum, int nodeGetCellsFreeMemory(unsigned long long *freeMems, int startCell, int maxCells); -unsigned long long nodeGetFreeMemory(void); +int nodeGetMemory(unsigned long long *mem, + unsigned long long *freeMem); virBitmapPtr nodeGetCPUBitmap(int *max_id); int nodeGetCPUCount(void); diff --git a/src/openvz/openvz_driver.c b/src/openvz/openvz_driver.c index 87df2a7..4c815ed 100644 --- a/src/openvz/openvz_driver.c +++ b/src/openvz/openvz_driver.c @@ -2180,7 +2180,10 @@ openvzNodeGetCellsFreeMemory(virConnectPtr conn ATTRIBUTE_UNUSED, static unsigned long long openvzNodeGetFreeMemory(virConnectPtr conn ATTRIBUTE_UNUSED) { - return nodeGetFreeMemory(); + unsigned long long freeMem; + if (nodeGetMemory(NULL, &freeMem) < 0) + return 0; + return freeMem; } diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 7bf2020..88051c9 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -16602,10 +16602,15 @@ qemuNodeGetCellsFreeMemory(virConnectPtr conn, static unsigned long long qemuNodeGetFreeMemory(virConnectPtr conn) { + unsigned long long freeMem; + if (virNodeGetFreeMemoryEnsureACL(conn) < 0) return 0; - return nodeGetFreeMemory(); + if (nodeGetMemory(NULL, &freeMem) < 0) + return 0; + + return freeMem; } diff --git a/src/uml/uml_driver.c b/src/uml/uml_driver.c index 1e0ec0e..a5e9ea8 100644 --- a/src/uml/uml_driver.c +++ b/src/uml/uml_driver.c @@ -2806,10 +2806,15 @@ umlNodeGetCellsFreeMemory(virConnectPtr conn, static unsigned long long umlNodeGetFreeMemory(virConnectPtr conn) { + unsigned long long freeMem; + if (virNodeGetFreeMemoryEnsureACL(conn) < 0) return 0; - return nodeGetFreeMemory(); + if (nodeGetMemory(NULL, &freeMem) < 0) + return 0; + + return freeMem; } diff --git a/src/vbox/vbox_tmpl.c b/src/vbox/vbox_tmpl.c index 1ed2729..66e933a 100644 --- a/src/vbox/vbox_tmpl.c +++ b/src/vbox/vbox_tmpl.c @@ -11468,7 +11468,10 @@ vboxNodeGetCellsFreeMemory(virConnectPtr conn ATTRIBUTE_UNUSED, static unsigned long long vboxNodeGetFreeMemory(virConnectPtr conn ATTRIBUTE_UNUSED) { - return nodeGetFreeMemory(); + unsigned long long freeMem; + if (nodeGetMemory(NULL, &freeMem) < 0) + return 0; + return freeMem; } -- 1.8.5.5

On Mon, Jun 16, 2014 at 05:08:25PM +0200, Michal Privoznik wrote:
For future work we want to info for not only the free memory but
I presume you mean s/to info/to get info/
overall memory size too. That's why the function must have new signature too.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/bhyve/bhyve_driver.c | 7 +++- src/libvirt_private.syms | 2 +- src/lxc/lxc_driver.c | 7 +++- src/nodeinfo.c | 96 +++++++++++++++++++++++++++++++++------------- src/nodeinfo.h | 3 +- src/openvz/openvz_driver.c | 5 ++- src/qemu/qemu_driver.c | 7 +++- src/uml/uml_driver.c | 7 +++- src/vbox/vbox_tmpl.c | 5 ++- 9 files changed, 105 insertions(+), 34 deletions(-)
ACK with typo in commit message Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

For future work we need two functions that fetches total number of pages and number of free pages for given NUMA node and page size (virNumaGetPageInfo()). Then we need to learn pages of what sizes are supported on given node (virNumaGetPages()). Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/libvirt_private.syms | 2 + src/util/virnuma.c | 325 +++++++++++++++++++++++++++++++++++++++++++++++ src/util/virnuma.h | 10 ++ 3 files changed, 337 insertions(+) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 18fde54..a7834ed 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1662,6 +1662,8 @@ virNumaGetAutoPlacementAdvice; virNumaGetDistances; virNumaGetMaxNode; virNumaGetNodeMemory; +virNumaGetPageInfo; +virNumaGetPages; virNumaIsAvailable; virNumaNodeIsAvailable; virNumaSetupMemoryPolicy; diff --git a/src/util/virnuma.c b/src/util/virnuma.c index 5814cba..a59feca 100644 --- a/src/util/virnuma.c +++ b/src/util/virnuma.c @@ -34,12 +34,18 @@ #endif /* WITH_NUMACTL */ +#include <sys/types.h> +#include <dirent.h> + #include "virnuma.h" #include "vircommand.h" #include "virerror.h" #include "virlog.h" #include "viralloc.h" #include "virbitmap.h" +#include "virstring.h" +#include "virfile.h" +#include "nodeinfo.h" #define VIR_FROM_THIS VIR_FROM_NONE @@ -504,3 +510,322 @@ virNumaGetDistances(int node ATTRIBUTE_UNUSED, return 0; } #endif + + +#define HUGEPAGES_NUMA_PREFIX "/sys/devices/system/node/" +#define HUGEPAGES_SYSTEM_PREFIX "/sys/kernel/mm/hugepages/" +#define HUGEPAGES_PREFIX "hugepages-" + +static int +virNumaGetHugePageInfoPath(char **path, + int node, + unsigned int page_size, + const char *suffix) +{ + + int ret = -1; + + if (node == -1) { + /* We are aiming at overall system info */ + if (page_size) { + /* And even on specific huge page size */ + if (virAsprintf(path, + HUGEPAGES_SYSTEM_PREFIX HUGEPAGES_PREFIX "%ukB/%s", + page_size, suffix ? suffix : "") < 0) + goto cleanup; + } else { + if (VIR_STRDUP(*path, HUGEPAGES_SYSTEM_PREFIX) < 0) + goto cleanup; + } + + } else { + /* We are aiming on specific NUMA node */ + if (page_size) { + /* And even on specific huge page size */ + if (virAsprintf(path, + HUGEPAGES_NUMA_PREFIX "node%d/hugepages/" + HUGEPAGES_PREFIX "%ukB/%s", + node, page_size, suffix ? suffix : "") < 0) + goto cleanup; + } else { + if (virAsprintf(path, + HUGEPAGES_NUMA_PREFIX "node%d/hugepages/", + node) < 0) + goto cleanup; + } + } + + ret = 0; + cleanup: + return ret; +} + + +/** + * virNumaGetHugePageInfo: + * @node: NUMA node id + * @page_size: which huge page are we interested in + * @page_avail: total number of huge pages in the pool + * @page_free: the number of free huge pages in the pool + * + * For given NUMA node and huge page size fetch information on + * total number of huge pages in the pool (both free and taken) + * and count for free huge pages in the pool. + * + * If you're interested in just one bit, pass NULL to the other one. + * + * As a special case, if @node == -1, overall info is fetched + * from the system. + * + * Returns 0 on success, -1 otherwise (with error reported). + */ +static int +virNumaGetHugePageInfo(int node, + unsigned int page_size, + unsigned int *page_avail, + unsigned int *page_free) +{ + int ret = -1; + char *path = NULL; + char *buf = NULL; + char *end; + + if (page_avail) { + if (virNumaGetHugePageInfoPath(&path, node, + page_size, "nr_hugepages") < 0) + goto cleanup; + + if (virFileReadAll(path, 1024, &buf) < 0) + goto cleanup; + + if (virStrToLong_ui(buf, &end, 10, page_avail) < 0 || + *end != '\n') { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("unable to parse: %s"), + buf); + goto cleanup; + } + VIR_FREE(buf); + VIR_FREE(path); + } + + if (page_free) { + if (virNumaGetHugePageInfoPath(&path, node, + page_size, "free_hugepages") < 0) + goto cleanup; + + if (virFileReadAll(path, 1024, &buf) < 0) + goto cleanup; + + if (virStrToLong_ui(buf, &end, 10, page_free) < 0 || + *end != '\n') { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("unable to parse: %s"), + buf); + goto cleanup; + } + } + + ret = 0; + cleanup: + VIR_FREE(buf); + VIR_FREE(path); + return ret; +} + +/** + * virNumaGetPageInfo: + * @node: NUMA node id + * @page_size: which huge page are we interested in (in KiB) + * @page_avail: total number of huge pages in the pool + * @page_free: the number of free huge pages in the pool + * + * For given NUMA node and page size fetch information on + * total number of pages in the pool (both free and taken) + * and count for free pages in the pool. + * + * If you're interested in just one bit, pass NULL to the other one. + * + * As a special case, if @node == -1, overall info is fetched + * from the system. + * + * Returns 0 on success, -1 otherwise (with error reported). + */ +int +virNumaGetPageInfo(int node, + unsigned int page_size, + unsigned int *page_avail, + unsigned int *page_free) +{ + int ret = -1; + long system_page_size = sysconf(_SC_PAGESIZE); + + /* sysconf() returns page size in bytes, + * the @page_size is however in kibibytes */ + if (page_size == system_page_size / 1024) { + unsigned long long memsize, memfree; + + /* TODO: come up with better algorithm that takes huge pages into + * account. The problem is huge pages cut off regular memory. */ + if (node == -1) { + if (nodeGetMemory(&memsize, &memfree) < 0) + goto cleanup; + } else { + if (virNumaGetNodeMemory(node, &memsize, &memfree) < 0) + goto cleanup; + } + + if (page_avail) + *page_avail = memsize / system_page_size; + + if (page_free) + *page_free = memfree / system_page_size; + } else { + if (virNumaGetHugePageInfo(node, page_size, page_avail, page_free) < 0) + goto cleanup; + } + + ret = 0; + cleanup: + return ret; +} + + +/** + * virNumaGetPages: + * @node: NUMA node id + * @pages_size: list of pages supported on @node + * @pages_avail: list of the pool sizes on @node + * @pages_free: list of free pages on @node + * @npages: the lists size + * + * For given NUMA node fetch info on pages. The size of pages + * (e.g. 4K, 2M, 1G) is stored into @pages_size, the size of the + * pool is then stored into @pages_avail and the number of free + * pages in the pool is stored into @pages_free. + * + * If you're interested only in some lists, pass NULL to the + * other ones. + * + * As a special case, if @node == -1, overall info is fetched + * from the system. + * + * Returns 0 on success, -1 otherwise. + */ +int +virNumaGetPages(int node, + unsigned int **pages_size, + unsigned int **pages_avail, + unsigned int **pages_free, + size_t *npages) +{ + int ret = -1; + char *path = NULL; + DIR *dir = NULL; + struct dirent *entry; + unsigned int *tmp_size = NULL, *tmp_avail = NULL, *tmp_free = NULL; + unsigned int ntmp = 0; + size_t i; + bool exchange; + long system_page_size; + + /* sysconf() returns page size in bytes, + * but we are storing the page size in kibibytes. */ + system_page_size = sysconf(_SC_PAGESIZE) / 1024; + + /* We know that ordinary system pages are supported + * if nothing else is. */ + if (VIR_REALLOC_N(tmp_size, 1) < 0 || + VIR_REALLOC_N(tmp_avail, 1) < 0 || + VIR_REALLOC_N(tmp_free, 1) < 0) + goto cleanup; + + if (virNumaGetPageInfo(node, system_page_size, + &tmp_avail[ntmp], &tmp_free[ntmp]) < 0) + goto cleanup; + tmp_size[ntmp] = system_page_size; + ntmp++; + + /* Now that we got ordinary system pages, lets get info on huge pages */ + if (virNumaGetHugePageInfoPath(&path, node, 0, NULL) < 0) + goto cleanup; + + if (!(dir = opendir(path))) { + virReportSystemError(errno, + _("unable to open path: %s"), + path); + goto cleanup; + } + + while (virDirRead(dir, &entry, path) > 0) { + const char *page_name = entry->d_name; + unsigned int page_size, page_avail = 0, page_free = 0; + char *end; + + /* Just to give you a hint, we're dealing with this: + * hugepages-2048kB/ or hugepages-1048576kB/ */ + if (!STRPREFIX(entry->d_name, HUGEPAGES_PREFIX)) + continue; + + page_name += strlen(HUGEPAGES_PREFIX); + + if (virStrToLong_ui(page_name, &end, 10, &page_size) < 0 || + STRCASENEQ(end, "kB")) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("unable to parse %s"), + entry->d_name); + goto cleanup; + } + + /* Querying more detailed info makes sense only sometimes */ + if ((pages_avail || pages_free) && + virNumaGetHugePageInfo(node, page_size, + &page_avail, &page_free) < 0) + goto cleanup; + + if (VIR_REALLOC_N(tmp_size, ntmp + 1) < 0 || + VIR_REALLOC_N(tmp_avail, ntmp + 1) < 0 || + VIR_REALLOC_N(tmp_free, ntmp + 1) < 0) + goto cleanup; + + tmp_size[ntmp] = page_size; + tmp_avail[ntmp] = page_avail; + tmp_free[ntmp] = page_free; + ntmp++; + } + + /* Just to produce nice output, sort the arrays by increasing page size */ + do { + exchange = false; + for (i = 0; i < ntmp -1; i++) { + if (tmp_size[i] > tmp_size[i + 1]) { + exchange = true; + SWAP(tmp_size[i], tmp_size[i + 1]); + SWAP(tmp_avail[i], tmp_avail[i + 1]); + SWAP(tmp_free[i], tmp_free[i + 1]); + } + } + } while (exchange); + + if (pages_size) { + *pages_size = tmp_size; + tmp_size = NULL; + } + if (pages_avail) { + *pages_avail = tmp_avail; + tmp_avail = NULL; + } + if (pages_free) { + *pages_free = tmp_free; + tmp_free = NULL; + } + *npages = ntmp; + ret = 0; + cleanup: + VIR_FREE(tmp_free); + VIR_FREE(tmp_avail); + VIR_FREE(tmp_size); + closedir(dir); + VIR_FREE(path); + return ret; +} diff --git a/src/util/virnuma.h b/src/util/virnuma.h index 772296d..a7435dc 100644 --- a/src/util/virnuma.h +++ b/src/util/virnuma.h @@ -70,4 +70,14 @@ unsigned int virNumaGetMaxCPUs(void); int virNumaGetNodeCPUs(int node, virBitmapPtr *cpus); +int virNumaGetPageInfo(int node, + unsigned int page_size, + unsigned int *page_avail, + unsigned int *page_free); +int virNumaGetPages(int node, + unsigned int **pages_size, + unsigned int **pages_avail, + unsigned int **pages_free, + size_t *npages) + ATTRIBUTE_NONNULL(5); #endif /* __VIR_NUMA_H__ */ -- 1.8.5.5

On Mon, Jun 16, 2014 at 05:08:26PM +0200, Michal Privoznik wrote:
For future work we need two functions that fetches total number of pages and number of free pages for given NUMA node and page size (virNumaGetPageInfo()).
Then we need to learn pages of what sizes are supported on given node (virNumaGetPages()).
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/libvirt_private.syms | 2 + src/util/virnuma.c | 325 +++++++++++++++++++++++++++++++++++++++++++++++ src/util/virnuma.h | 10 ++ 3 files changed, 337 insertions(+)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 18fde54..a7834ed 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1662,6 +1662,8 @@ virNumaGetAutoPlacementAdvice; virNumaGetDistances; virNumaGetMaxNode; virNumaGetNodeMemory; +virNumaGetPageInfo; +virNumaGetPages; virNumaIsAvailable; virNumaNodeIsAvailable; virNumaSetupMemoryPolicy; diff --git a/src/util/virnuma.c b/src/util/virnuma.c index 5814cba..a59feca 100644 --- a/src/util/virnuma.c +++ b/src/util/virnuma.c @@ -34,12 +34,18 @@
#endif /* WITH_NUMACTL */
+#include <sys/types.h> +#include <dirent.h> + #include "virnuma.h" #include "vircommand.h" #include "virerror.h" #include "virlog.h" #include "viralloc.h" #include "virbitmap.h" +#include "virstring.h" +#include "virfile.h" +#include "nodeinfo.h"
#define VIR_FROM_THIS VIR_FROM_NONE
@@ -504,3 +510,322 @@ virNumaGetDistances(int node ATTRIBUTE_UNUSED, return 0; } #endif + + +#define HUGEPAGES_NUMA_PREFIX "/sys/devices/system/node/" +#define HUGEPAGES_SYSTEM_PREFIX "/sys/kernel/mm/hugepages/" +#define HUGEPAGES_PREFIX "hugepages-" + +static int +virNumaGetHugePageInfoPath(char **path, + int node, + unsigned int page_size, + const char *suffix) +{ + + int ret = -1; + + if (node == -1) { + /* We are aiming at overall system info */ + if (page_size) { + /* And even on specific huge page size */ + if (virAsprintf(path, + HUGEPAGES_SYSTEM_PREFIX HUGEPAGES_PREFIX "%ukB/%s", + page_size, suffix ? suffix : "") < 0) + goto cleanup; + } else { + if (VIR_STRDUP(*path, HUGEPAGES_SYSTEM_PREFIX) < 0) + goto cleanup; + } + + } else { + /* We are aiming on specific NUMA node */ + if (page_size) { + /* And even on specific huge page size */ + if (virAsprintf(path, + HUGEPAGES_NUMA_PREFIX "node%d/hugepages/" + HUGEPAGES_PREFIX "%ukB/%s", + node, page_size, suffix ? suffix : "") < 0) + goto cleanup; + } else { + if (virAsprintf(path, + HUGEPAGES_NUMA_PREFIX "node%d/hugepages/", + node) < 0) + goto cleanup; + } + } + + ret = 0; + cleanup: + return ret; +} + + +/** + * virNumaGetHugePageInfo: + * @node: NUMA node id + * @page_size: which huge page are we interested in + * @page_avail: total number of huge pages in the pool + * @page_free: the number of free huge pages in the pool + * + * For given NUMA node and huge page size fetch information on + * total number of huge pages in the pool (both free and taken) + * and count for free huge pages in the pool. + * + * If you're interested in just one bit, pass NULL to the other one. + * + * As a special case, if @node == -1, overall info is fetched + * from the system. + * + * Returns 0 on success, -1 otherwise (with error reported). + */ +static int +virNumaGetHugePageInfo(int node, + unsigned int page_size, + unsigned int *page_avail, + unsigned int *page_free) +{ + int ret = -1; + char *path = NULL; + char *buf = NULL; + char *end; + + if (page_avail) { + if (virNumaGetHugePageInfoPath(&path, node, + page_size, "nr_hugepages") < 0) + goto cleanup; + + if (virFileReadAll(path, 1024, &buf) < 0) + goto cleanup; + + if (virStrToLong_ui(buf, &end, 10, page_avail) < 0 || + *end != '\n') { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("unable to parse: %s"), + buf); + goto cleanup; + }
It would probably be worth our while to introduce a helper API virFileReadLong_ui() and likewise for the other virStrToLong variants - we have this need in quite a few places that interact with sysfs. No requirement to do it in this patch unless you want to though - its a long standing issue we can fix separately.
+/** + * virNumaGetPageInfo: + * @node: NUMA node id + * @page_size: which huge page are we interested in (in KiB) + * @page_avail: total number of huge pages in the pool + * @page_free: the number of free huge pages in the pool + * + * For given NUMA node and page size fetch information on + * total number of pages in the pool (both free and taken) + * and count for free pages in the pool. + * + * If you're interested in just one bit, pass NULL to the other one. + * + * As a special case, if @node == -1, overall info is fetched + * from the system. + * + * Returns 0 on success, -1 otherwise (with error reported). + */ +int +virNumaGetPageInfo(int node, + unsigned int page_size, + unsigned int *page_avail, + unsigned int *page_free) +{ + int ret = -1; + long system_page_size = sysconf(_SC_PAGESIZE); + + /* sysconf() returns page size in bytes, + * the @page_size is however in kibibytes */ + if (page_size == system_page_size / 1024) { + unsigned long long memsize, memfree; + + /* TODO: come up with better algorithm that takes huge pages into + * account. The problem is huge pages cut off regular memory. */
Hmm, so this code is returning normal page count that ignores the fact that some pages are not in fact usable because they've been stolen for huge pages ? I was thinking that the total memory reported by the kernel was reduced when you allocated huage pages, but testing now, it seems I was mistaken in that belief. So this is a bit of a nasty gotcha because a user of this API would probably expect that the sum of page size * page count for all page sizes would equal total physical RAM (give or take). I still like the idea of including the default page size in this info, but perhaps we should disable the default system page size for now & revisit later if we can figure out a way to accurately report it, rather than reporting misleading info.
+ if (node == -1) { + if (nodeGetMemory(&memsize, &memfree) < 0) + goto cleanup; + } else { + if (virNumaGetNodeMemory(node, &memsize, &memfree) < 0) + goto cleanup; + } + + if (page_avail) + *page_avail = memsize / system_page_size; + + if (page_free) + *page_free = memfree / system_page_size; + } else { + if (virNumaGetHugePageInfo(node, page_size, page_avail, page_free) < 0) + goto cleanup; + } + + ret = 0; + cleanup: + return ret; +} + + +/** + * virNumaGetPages: + * @node: NUMA node id + * @pages_size: list of pages supported on @node + * @pages_avail: list of the pool sizes on @node + * @pages_free: list of free pages on @node + * @npages: the lists size + * + * For given NUMA node fetch info on pages. The size of pages + * (e.g. 4K, 2M, 1G) is stored into @pages_size, the size of the + * pool is then stored into @pages_avail and the number of free + * pages in the pool is stored into @pages_free. + * + * If you're interested only in some lists, pass NULL to the + * other ones. + * + * As a special case, if @node == -1, overall info is fetched + * from the system. + * + * Returns 0 on success, -1 otherwise. + */ +int +virNumaGetPages(int node, + unsigned int **pages_size, + unsigned int **pages_avail, + unsigned int **pages_free, + size_t *npages) +{ + int ret = -1; + char *path = NULL; + DIR *dir = NULL; + struct dirent *entry; + unsigned int *tmp_size = NULL, *tmp_avail = NULL, *tmp_free = NULL; + unsigned int ntmp = 0; + size_t i; + bool exchange; + long system_page_size; + + /* sysconf() returns page size in bytes, + * but we are storing the page size in kibibytes. */ + system_page_size = sysconf(_SC_PAGESIZE) / 1024; + + /* We know that ordinary system pages are supported + * if nothing else is. */ + if (VIR_REALLOC_N(tmp_size, 1) < 0 || + VIR_REALLOC_N(tmp_avail, 1) < 0 || + VIR_REALLOC_N(tmp_free, 1) < 0) + goto cleanup; + + if (virNumaGetPageInfo(node, system_page_size, + &tmp_avail[ntmp], &tmp_free[ntmp]) < 0) + goto cleanup; + tmp_size[ntmp] = system_page_size; + ntmp++; + + /* Now that we got ordinary system pages, lets get info on huge pages */ + if (virNumaGetHugePageInfoPath(&path, node, 0, NULL) < 0) + goto cleanup; + + if (!(dir = opendir(path))) { + virReportSystemError(errno, + _("unable to open path: %s"), + path); + goto cleanup; + } + + while (virDirRead(dir, &entry, path) > 0) { + const char *page_name = entry->d_name; + unsigned int page_size, page_avail = 0, page_free = 0; + char *end; + + /* Just to give you a hint, we're dealing with this: + * hugepages-2048kB/ or hugepages-1048576kB/ */ + if (!STRPREFIX(entry->d_name, HUGEPAGES_PREFIX)) + continue; + + page_name += strlen(HUGEPAGES_PREFIX); + + if (virStrToLong_ui(page_name, &end, 10, &page_size) < 0 || + STRCASENEQ(end, "kB")) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("unable to parse %s"), + entry->d_name); + goto cleanup; + } + + /* Querying more detailed info makes sense only sometimes */ + if ((pages_avail || pages_free) && + virNumaGetHugePageInfo(node, page_size, + &page_avail, &page_free) < 0) + goto cleanup; + + if (VIR_REALLOC_N(tmp_size, ntmp + 1) < 0 || + VIR_REALLOC_N(tmp_avail, ntmp + 1) < 0 || + VIR_REALLOC_N(tmp_free, ntmp + 1) < 0) + goto cleanup; + + tmp_size[ntmp] = page_size; + tmp_avail[ntmp] = page_avail; + tmp_free[ntmp] = page_free; + ntmp++; + } + + /* Just to produce nice output, sort the arrays by increasing page size */ + do { + exchange = false; + for (i = 0; i < ntmp -1; i++) { + if (tmp_size[i] > tmp_size[i + 1]) { + exchange = true; + SWAP(tmp_size[i], tmp_size[i + 1]); + SWAP(tmp_avail[i], tmp_avail[i + 1]); + SWAP(tmp_free[i], tmp_free[i + 1]); + } + } + } while (exchange); + + if (pages_size) { + *pages_size = tmp_size; + tmp_size = NULL; + } + if (pages_avail) { + *pages_avail = tmp_avail; + tmp_avail = NULL; + } + if (pages_free) { + *pages_free = tmp_free; + tmp_free = NULL; + } + *npages = ntmp; + ret = 0; + cleanup: + VIR_FREE(tmp_free); + VIR_FREE(tmp_avail); + VIR_FREE(tmp_size); + closedir(dir); + VIR_FREE(path); + return ret; +}
Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On Thu, Jun 19, 2014 at 12:06:44PM +0100, Daniel P. Berrange wrote:
On Mon, Jun 16, 2014 at 05:08:26PM +0200, Michal Privoznik wrote:
+int +virNumaGetPageInfo(int node, + unsigned int page_size, + unsigned int *page_avail, + unsigned int *page_free) +{ + int ret = -1; + long system_page_size = sysconf(_SC_PAGESIZE); + + /* sysconf() returns page size in bytes, + * the @page_size is however in kibibytes */ + if (page_size == system_page_size / 1024) { + unsigned long long memsize, memfree; + + /* TODO: come up with better algorithm that takes huge pages into + * account. The problem is huge pages cut off regular memory. */
Hmm, so this code is returning normal page count that ignores the fact that some pages are not in fact usable because they've been stolen for huge pages ? I was thinking that the total memory reported by the kernel was reduced when you allocated huage pages, but testing now, it seems I was mistaken in that belief. So this is a bit of a nasty gotcha because a user of this API would probably expect that the sum of page size * page count for all page sizes would equal total physical RAM (give or take).
I still like the idea of including the default page size in this info, but perhaps we should disable the default system page size for now & revisit later if we can figure out a way to accurately report it, rather than reporting misleading info.
I should have said, ACK to either #ifdef 0 system page size or ACK to your previous version of this patch, unless someone has better ideas to accurately report total + free info for default page sizes. Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On 19.06.2014 13:14, Daniel P. Berrange wrote:
On Thu, Jun 19, 2014 at 12:06:44PM +0100, Daniel P. Berrange wrote:
On Mon, Jun 16, 2014 at 05:08:26PM +0200, Michal Privoznik wrote:
+int +virNumaGetPageInfo(int node, + unsigned int page_size, + unsigned int *page_avail, + unsigned int *page_free) +{ + int ret = -1; + long system_page_size = sysconf(_SC_PAGESIZE); + + /* sysconf() returns page size in bytes, + * the @page_size is however in kibibytes */ + if (page_size == system_page_size / 1024) { + unsigned long long memsize, memfree; + + /* TODO: come up with better algorithm that takes huge pages into + * account. The problem is huge pages cut off regular memory. */
Hmm, so this code is returning normal page count that ignores the fact that some pages are not in fact usable because they've been stolen for huge pages ? I was thinking that the total memory reported by the kernel was reduced when you allocated huage pages, but testing now, it seems I was mistaken in that belief. So this is a bit of a nasty gotcha because a user of this API would probably expect that the sum of page size * page count for all page sizes would equal total physical RAM (give or take).
I still like the idea of including the default page size in this info, but perhaps we should disable the default system page size for now & revisit later if we can figure out a way to accurately report it, rather than reporting misleading info.
I should have said, ACK to either #ifdef 0 system page size or ACK to your previous version of this patch, unless someone has better ideas to accurately report total + free info for default page sizes.
Regards, Daniel
Okay, I'm squashing this in prior to pushing: diff --git a/src/util/virnuma.c b/src/util/virnuma.c index a59feca..c8e7f40 100644 --- a/src/util/virnuma.c +++ b/src/util/virnuma.c @@ -663,6 +663,7 @@ virNumaGetPageInfo(int node, /* sysconf() returns page size in bytes, * the @page_size is however in kibibytes */ if (page_size == system_page_size / 1024) { +#if 0 unsigned long long memsize, memfree; /* TODO: come up with better algorithm that takes huge pages into @@ -680,6 +681,11 @@ virNumaGetPageInfo(int node, if (page_free) *page_free = memfree / system_page_size; +#else + virReportError(VIR_ERR_ARGUMENT_UNSUPPORTED, "%s", + _("system page size are not supported yet")); + goto cleanup; +#endif /* 0 */ } else { if (virNumaGetHugePageInfo(node, page_size, page_avail, page_free) < 0) goto cleanup; @@ -727,6 +733,10 @@ virNumaGetPages(int node, unsigned int ntmp = 0; size_t i; bool exchange; + +#if 0 + /* This has to be disabled until the time the issue in + * virNumaGetPageInfo is resolved. Sorry. */ long system_page_size; /* sysconf() returns page size in bytes, @@ -745,6 +755,7 @@ virNumaGetPages(int node, goto cleanup; tmp_size[ntmp] = system_page_size; ntmp++; +#endif /* 0 */ /* Now that we got ordinary system pages, lets get info on huge pages */ if (virNumaGetHugePageInfoPath(&path, node, 0, NULL) < 0) Hopefully I'll come up with resolution soon. Michal

On 06/19/2014 07:14 AM, Michal Privoznik wrote:
+#else + virReportError(VIR_ERR_ARGUMENT_UNSUPPORTED, "%s", + _("system page size are not supported yet"));
s/are/is/ -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

There are two places where you'll find info on page sizes. The first one is under <cpu/> element, where all supported pages sizes are listed. Then the second one is under each <cell/> element which refers to concrete NUMA node. At this place, the size of page's pool is reported. So the capabilities XML looks something like this: <capabilities> <host> <uuid>01281cda-f352-cb11-a9db-e905fe22010c</uuid> <cpu> <arch>x86_64</arch> <model>Westmere</model> <vendor>Intel</vendor> <topology sockets='1' cores='1' threads='1'/> ... <pages unit='KiB' size='4'/> <pages unit='KiB' size='2048'/> <pages unit='KiB' size='1048576'/> </cpu> ... <topology> <cells num='4'> <cell id='0'> <memory unit='KiB'>4054408</memory> <pages unit='KiB' size='4'>1013602</pages> <pages unit='KiB' size='2048'>3</pages> <pages unit='KiB' size='1048576'>1</pages> <distances/> <cpus num='1'> <cpu id='0' socket_id='0' core_id='0' siblings='0'/> </cpus> </cell> <cell id='1'> <memory unit='KiB'>4071072</memory> <pages unit='KiB' size='4'>1017768</pages> <pages unit='KiB' size='2048'>3</pages> <pages unit='KiB' size='1048576'>1</pages> <distances/> <cpus num='1'> <cpu id='1' socket_id='0' core_id='0' siblings='1'/> </cpus> </cell> ... </cells> </topology> ... </host> <guest/> </capabilities> Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- docs/schemas/capability.rng | 21 +++++++++++++++++++++ src/conf/capabilities.c | 25 ++++++++++++++++++++++--- src/conf/capabilities.h | 15 ++++++++++++++- src/internal.h | 12 ++++++++++++ src/libxl/libxl_conf.c | 1 + src/nodeinfo.c | 40 +++++++++++++++++++++++++++++++++++++++- src/qemu/qemu_capabilities.c | 29 ++++++++++++++++++++++++++++- src/test/test_driver.c | 2 +- src/xen/xend_internal.c | 1 + tests/vircaps2xmltest.c | 3 ++- tests/vircapstest.c | 1 + 11 files changed, 142 insertions(+), 8 deletions(-) diff --git a/docs/schemas/capability.rng b/docs/schemas/capability.rng index 0c95c05..f954599 100644 --- a/docs/schemas/capability.rng +++ b/docs/schemas/capability.rng @@ -118,6 +118,9 @@ <empty/> </element> </zeroOrMore> + <zeroOrMore> + <ref name='pagesElem'/> + </zeroOrMore> </define> <define name='power_management'> @@ -188,6 +191,10 @@ <ref name='memory'/> </optional> + <zeroOrMore> + <ref name='pagesElem'/> + </zeroOrMore> + <optional> <element name='distances'> <zeroOrMore> @@ -416,4 +423,18 @@ <param name='pattern'>[a-zA-Z0-9\-_]+</param> </data> </define> + + <define name='pagesElem'> + <element name='pages'> + <optional> + <attribute name='unit'> + <ref name='unit'/> + </attribute> + </optional> + <attribute name='size'> + <ref name='unsignedInt'/> + </attribute> + <ref name='unsignedInt'/> + </element> + </define> </grammar> diff --git a/src/conf/capabilities.c b/src/conf/capabilities.c index 954456b..19359a5 100644 --- a/src/conf/capabilities.c +++ b/src/conf/capabilities.c @@ -108,6 +108,7 @@ virCapabilitiesFreeHostNUMACell(virCapsHostNUMACellPtr cell) VIR_FREE(cell->cpus); VIR_FREE(cell->siblings); + VIR_FREE(cell->pageinfo); VIR_FREE(cell); } @@ -223,6 +224,7 @@ virCapabilitiesDispose(void *object) } VIR_FREE(caps->host.secModels); + VIR_FREE(caps->host.pagesSize); virCPUDefFree(caps->host.cpu); } @@ -281,6 +283,8 @@ virCapabilitiesAddHostMigrateTransport(virCapsPtr caps, * @cpus: array of CPU definition structures, the pointer is stolen * @nsiblings: number of sibling NUMA nodes * @siblings: info on sibling NUMA nodes + * @npageinfo: number of pages at node @num + * @pageinfo: info on each single memory page * * Registers a new NUMA cell for a host, passing in a * array of CPU IDs belonging to the cell @@ -292,7 +296,9 @@ virCapabilitiesAddHostNUMACell(virCapsPtr caps, int ncpus, virCapsHostNUMACellCPUPtr cpus, int nsiblings, - virCapsHostNUMACellSiblingInfoPtr siblings) + virCapsHostNUMACellSiblingInfoPtr siblings, + int npageinfo, + virCapsHostNUMACellPageInfoPtr pageinfo) { virCapsHostNUMACellPtr cell; @@ -303,12 +309,14 @@ virCapabilitiesAddHostNUMACell(virCapsPtr caps, if (VIR_ALLOC(cell) < 0) return -1; - cell->ncpus = ncpus; cell->num = num; cell->mem = mem; + cell->ncpus = ncpus; cell->cpus = cpus; - cell->siblings = siblings; cell->nsiblings = nsiblings; + cell->siblings = siblings; + cell->npageinfo = npageinfo; + cell->pageinfo = pageinfo; caps->host.numaCell[caps->host.nnumaCell++] = cell; @@ -773,6 +781,12 @@ virCapabilitiesFormatNUMATopology(virBufferPtr buf, virBufferAsprintf(buf, "<memory unit='KiB'>%llu</memory>\n", cells[i]->mem); + for (j = 0; j < cells[i]->npageinfo; j++) { + virBufferAsprintf(buf, "<pages unit='KiB' size='%u'>%zu</pages>\n", + cells[i]->pageinfo[j].size, + cells[i]->pageinfo[j].avail); + } + if (cells[i]->nsiblings) { virBufferAddLit(buf, "<distances>\n"); virBufferAdjustIndent(buf, 2); @@ -856,6 +870,11 @@ virCapabilitiesFormatXML(virCapsPtr caps) } virCPUDefFormatBuf(&buf, caps->host.cpu, 0); + for (i = 0; i < caps->host.nPagesSize; i++) { + virBufferAsprintf(&buf, "<pages unit='KiB' size='%u'/>\n", + caps->host.pagesSize[i]); + } + virBufferAdjustIndent(&buf, -2); virBufferAddLit(&buf, "</cpu>\n"); diff --git a/src/conf/capabilities.h b/src/conf/capabilities.h index 53a83c9..2f94451 100644 --- a/src/conf/capabilities.h +++ b/src/conf/capabilities.h @@ -102,6 +102,13 @@ struct _virCapsHostNUMACellSiblingInfo { unsigned int distance; /* distance to the node */ }; +typedef struct _virCapsHostNUMACellPageInfo virCapsHostNUMACellPageInfo; +typedef virCapsHostNUMACellPageInfo *virCapsHostNUMACellPageInfoPtr; +struct _virCapsHostNUMACellPageInfo { + unsigned int size; /* page size in kibibytes */ + size_t avail; /* the size of pool */ +}; + typedef struct _virCapsHostNUMACell virCapsHostNUMACell; typedef virCapsHostNUMACell *virCapsHostNUMACellPtr; struct _virCapsHostNUMACell { @@ -111,6 +118,8 @@ struct _virCapsHostNUMACell { virCapsHostNUMACellCPUPtr cpus; int nsiblings; virCapsHostNUMACellSiblingInfoPtr siblings; + int npageinfo; + virCapsHostNUMACellPageInfoPtr pageinfo; }; typedef struct _virCapsHostSecModelLabel virCapsHostSecModelLabel; @@ -152,6 +161,8 @@ struct _virCapsHost { virCapsHostSecModelPtr secModels; virCPUDefPtr cpu; + int nPagesSize; /* size of pagesSize array */ + unsigned int *pagesSize; /* page sizes support on the system */ unsigned char host_uuid[VIR_UUID_BUFLEN]; }; @@ -206,7 +217,9 @@ virCapabilitiesAddHostNUMACell(virCapsPtr caps, int ncpus, virCapsHostNUMACellCPUPtr cpus, int nsiblings, - virCapsHostNUMACellSiblingInfoPtr siblings); + virCapsHostNUMACellSiblingInfoPtr siblings, + int npageinfo, + virCapsHostNUMACellPageInfoPtr pageinfo); extern int diff --git a/src/internal.h b/src/internal.h index 0b36de9..a9e2065 100644 --- a/src/internal.h +++ b/src/internal.h @@ -256,6 +256,18 @@ __FILE__, __LINE__); /** + * SWAP: + * + * In place exchange of two values + */ +# define SWAP(a, b) \ + do { \ + (a) = (a) ^ (b); \ + (b) = (a) ^ (b); \ + (a) = (a) ^ (b); \ + } while (0) + +/** * virCheckFlags: * @supported: an OR'ed set of supported flags * @retval: return value in case unsupported flags were passed diff --git a/src/libxl/libxl_conf.c b/src/libxl/libxl_conf.c index cec37d6..eca51a1 100644 --- a/src/libxl/libxl_conf.c +++ b/src/libxl/libxl_conf.c @@ -210,6 +210,7 @@ libxlCapsInitNuma(libxl_ctx *ctx, virCapsPtr caps) if (virCapabilitiesAddHostNUMACell(caps, i, numa_info[i].size / 1024, nr_cpus_node[i], cpus[i], + 0, NULL, 0, NULL) < 0) { virCapabilitiesClearHostNUMACellCPUTopology(cpus[i], nr_cpus_node[i]); diff --git a/src/nodeinfo.c b/src/nodeinfo.c index fd831b4..b55d77e 100644 --- a/src/nodeinfo.c +++ b/src/nodeinfo.c @@ -1646,6 +1646,7 @@ nodeCapsInitNUMAFake(virCapsPtr caps ATTRIBUTE_UNUSED) if (virCapabilitiesAddHostNUMACell(caps, 0, nodeinfo.memory, ncpus, cpus, + 0, NULL, 0, NULL) < 0) goto error; @@ -1824,6 +1825,35 @@ virNodeCapsGetSiblingInfo(int node, return ret; } +static int +virNodeCapsGetPagesInfo(int node, + virCapsHostNUMACellPageInfoPtr *pageinfo, + int *npageinfo) +{ + int ret = -1; + unsigned int *pages_size = NULL, *pages_avail = NULL; + size_t npages, i; + + if (virNumaGetPages(node, &pages_size, &pages_avail, NULL, &npages) < 0) + goto cleanup; + + if (VIR_ALLOC_N(*pageinfo, npages) < 0) + goto cleanup; + *npageinfo = npages; + + for (i = 0; i < npages; i++) { + (*pageinfo)[i].size = pages_size[i]; + (*pageinfo)[i].avail = pages_avail[i]; + } + + ret = 0; + + cleanup: + VIR_FREE(pages_avail); + VIR_FREE(pages_size); + return ret; +} + int nodeCapsInitNUMA(virCapsPtr caps) { @@ -1833,6 +1863,8 @@ nodeCapsInitNUMA(virCapsPtr caps) virBitmapPtr cpumap = NULL; virCapsHostNUMACellSiblingInfoPtr siblings = NULL; int nsiblings = 0; + virCapsHostNUMACellPageInfoPtr pageinfo = NULL; + int npageinfo; int ret = -1; int ncpus = 0; int cpu; @@ -1875,17 +1907,22 @@ nodeCapsInitNUMA(virCapsPtr caps) if (virNodeCapsGetSiblingInfo(n, &siblings, &nsiblings) < 0) goto cleanup; + if (virNodeCapsGetPagesInfo(n, &pageinfo, &npageinfo) < 0) + goto cleanup; + /* Detect the amount of memory in the numa cell in KiB */ virNumaGetNodeMemory(n, &memory, NULL); memory >>= 10; if (virCapabilitiesAddHostNUMACell(caps, n, memory, ncpus, cpus, - nsiblings, siblings) < 0) + nsiblings, siblings, + npageinfo, pageinfo) < 0) goto cleanup; cpus = NULL; siblings = NULL; + pageinfo = NULL; } ret = 0; @@ -1897,6 +1934,7 @@ nodeCapsInitNUMA(virCapsPtr caps) virBitmapFree(cpumap); VIR_FREE(cpus); VIR_FREE(siblings); + VIR_FREE(pageinfo); if (ret < 0) VIR_FREE(cpus); diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index 08c3d04..245d6b5 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -920,6 +920,29 @@ virQEMUCapsInitCPU(virCapsPtr caps, } +static int +virQEMUCapsInitPages(virCapsPtr caps) +{ + int ret = -1; + unsigned int *pages_size = NULL; + size_t npages; + + if (virNumaGetPages(-1 /* Magic constant for overall info */, + &pages_size, NULL, NULL, &npages) < 0) + goto cleanup; + + caps->host.pagesSize = pages_size; + pages_size = NULL; + caps->host.nPagesSize = npages; + npages = 0; + + ret = 0; + cleanup: + VIR_FREE(pages_size); + return ret; +} + + virCapsPtr virQEMUCapsInit(virQEMUCapsCachePtr cache) { virCapsPtr caps; @@ -943,10 +966,14 @@ virCapsPtr virQEMUCapsInit(virQEMUCapsCachePtr cache) VIR_WARN("Failed to get host CPU"); /* Add the power management features of the host */ - if (virNodeSuspendGetTargetMask(&caps->host.powerMgmt) < 0) VIR_WARN("Failed to get host power management capabilities"); + /* Add huge pages info */ + if (virQEMUCapsInitPages(caps) < 0) + VIR_WARN("Failed to get pages info"); + + /* Add domain migration transport URI */ virCapabilitiesAddHostMigrateTransport(caps, "tcp"); diff --git a/src/test/test_driver.c b/src/test/test_driver.c index f9e2b3d..0bf710a 100644 --- a/src/test/test_driver.c +++ b/src/test/test_driver.c @@ -338,7 +338,7 @@ testBuildCapabilities(virConnectPtr conn) if (virCapabilitiesAddHostNUMACell(caps, i, 0, privconn->cells[i].numCpus, - cpu_cells, 0, NULL) < 0) + cpu_cells, 0, NULL, 0, NULL) < 0) goto error; } diff --git a/src/xen/xend_internal.c b/src/xen/xend_internal.c index 5ddf71a..03fdde1 100644 --- a/src/xen/xend_internal.c +++ b/src/xen/xend_internal.c @@ -1102,6 +1102,7 @@ sexpr_to_xend_topology(const struct sexpr *root, virCapsPtr caps) if (virCapabilitiesAddHostNUMACell(caps, cell, 0, nb_cpus, cpuInfo, + 0, NULL, 0, NULL) < 0) goto error; cpuInfo = NULL; diff --git a/tests/vircaps2xmltest.c b/tests/vircaps2xmltest.c index fa02534..7166c98 100644 --- a/tests/vircaps2xmltest.c +++ b/tests/vircaps2xmltest.c @@ -74,7 +74,8 @@ buildVirCapabilities(int max_cells, if (virCapabilitiesAddHostNUMACell(caps, cell_id, max_mem_in_cell, max_cpus_in_cell, cell_cpus, - nsiblings, siblings) < 0) + nsiblings, siblings, + 0, NULL) < 0) goto error; cell_cpus = NULL; diff --git a/tests/vircapstest.c b/tests/vircapstest.c index 3edebba..59e9c2b 100644 --- a/tests/vircapstest.c +++ b/tests/vircapstest.c @@ -66,6 +66,7 @@ buildNUMATopology(int seq) if (virCapabilitiesAddHostNUMACell(caps, cell_id + seq, MAX_MEM_IN_CELL, MAX_CPUS_IN_CELL, cell_cpus, + 0, NULL, 0, NULL) < 0) goto error; -- 1.8.5.5

On Mon, Jun 16, 2014 at 05:08:27PM +0200, Michal Privoznik wrote:
There are two places where you'll find info on page sizes. The first one is under <cpu/> element, where all supported pages sizes are listed. Then the second one is under each <cell/> element which refers to concrete NUMA node. At this place, the size of page's pool is reported. So the capabilities XML looks something like this:
<capabilities>
<host> <uuid>01281cda-f352-cb11-a9db-e905fe22010c</uuid> <cpu> <arch>x86_64</arch> <model>Westmere</model> <vendor>Intel</vendor> <topology sockets='1' cores='1' threads='1'/> ... <pages unit='KiB' size='4'/> <pages unit='KiB' size='2048'/> <pages unit='KiB' size='1048576'/> </cpu> ... <topology> <cells num='4'> <cell id='0'> <memory unit='KiB'>4054408</memory> <pages unit='KiB' size='4'>1013602</pages> <pages unit='KiB' size='2048'>3</pages> <pages unit='KiB' size='1048576'>1</pages> <distances/> <cpus num='1'> <cpu id='0' socket_id='0' core_id='0' siblings='0'/> </cpus> </cell> <cell id='1'> <memory unit='KiB'>4071072</memory> <pages unit='KiB' size='4'>1017768</pages> <pages unit='KiB' size='2048'>3</pages> <pages unit='KiB' size='1048576'>1</pages> <distances/> <cpus num='1'> <cpu id='1' socket_id='0' core_id='0' siblings='1'/> </cpus> </cell> ... </cells> </topology> ... </host>
<guest/>
</capabilities>
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- docs/schemas/capability.rng | 21 +++++++++++++++++++++ src/conf/capabilities.c | 25 ++++++++++++++++++++++--- src/conf/capabilities.h | 15 ++++++++++++++- src/internal.h | 12 ++++++++++++ src/libxl/libxl_conf.c | 1 + src/nodeinfo.c | 40 +++++++++++++++++++++++++++++++++++++++- src/qemu/qemu_capabilities.c | 29 ++++++++++++++++++++++++++++- src/test/test_driver.c | 2 +- src/xen/xend_internal.c | 1 + tests/vircaps2xmltest.c | 3 ++- tests/vircapstest.c | 1 + 11 files changed, 142 insertions(+), 8 deletions(-)
ACK except
diff --git a/src/internal.h b/src/internal.h index 0b36de9..a9e2065 100644 --- a/src/internal.h +++ b/src/internal.h @@ -256,6 +256,18 @@ __FILE__, __LINE__);
/** + * SWAP: + * + * In place exchange of two values + */ +# define SWAP(a, b) \ + do { \ + (a) = (a) ^ (b); \ + (b) = (a) ^ (b); \ + (a) = (a) ^ (b); \ + } while (0) + +/**
this doesn't seem to be used anywhere. Either it belongs in a later patch perhaps or can be dropped ? ACK if you resolve that. Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On 19.06.2014 13:08, Daniel P. Berrange wrote:
On Mon, Jun 16, 2014 at 05:08:27PM +0200, Michal Privoznik wrote:
There are two places where you'll find info on page sizes. The first one is under <cpu/> element, where all supported pages sizes are listed. Then the second one is under each <cell/> element which refers to concrete NUMA node. At this place, the size of page's pool is reported. So the capabilities XML looks something like this:
<capabilities>
<host> <uuid>01281cda-f352-cb11-a9db-e905fe22010c</uuid> <cpu> <arch>x86_64</arch> <model>Westmere</model> <vendor>Intel</vendor> <topology sockets='1' cores='1' threads='1'/> ... <pages unit='KiB' size='4'/> <pages unit='KiB' size='2048'/> <pages unit='KiB' size='1048576'/> </cpu> ... <topology> <cells num='4'> <cell id='0'> <memory unit='KiB'>4054408</memory> <pages unit='KiB' size='4'>1013602</pages> <pages unit='KiB' size='2048'>3</pages> <pages unit='KiB' size='1048576'>1</pages> <distances/> <cpus num='1'> <cpu id='0' socket_id='0' core_id='0' siblings='0'/> </cpus> </cell> <cell id='1'> <memory unit='KiB'>4071072</memory> <pages unit='KiB' size='4'>1017768</pages> <pages unit='KiB' size='2048'>3</pages> <pages unit='KiB' size='1048576'>1</pages> <distances/> <cpus num='1'> <cpu id='1' socket_id='0' core_id='0' siblings='1'/> </cpus> </cell> ... </cells> </topology> ... </host>
<guest/>
</capabilities>
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- docs/schemas/capability.rng | 21 +++++++++++++++++++++ src/conf/capabilities.c | 25 ++++++++++++++++++++++--- src/conf/capabilities.h | 15 ++++++++++++++- src/internal.h | 12 ++++++++++++ src/libxl/libxl_conf.c | 1 + src/nodeinfo.c | 40 +++++++++++++++++++++++++++++++++++++++- src/qemu/qemu_capabilities.c | 29 ++++++++++++++++++++++++++++- src/test/test_driver.c | 2 +- src/xen/xend_internal.c | 1 + tests/vircaps2xmltest.c | 3 ++- tests/vircapstest.c | 1 + 11 files changed, 142 insertions(+), 8 deletions(-)
ACK except
diff --git a/src/internal.h b/src/internal.h index 0b36de9..a9e2065 100644 --- a/src/internal.h +++ b/src/internal.h @@ -256,6 +256,18 @@ __FILE__, __LINE__);
/** + * SWAP: + * + * In place exchange of two values + */ +# define SWAP(a, b) \ + do { \ + (a) = (a) ^ (b); \ + (b) = (a) ^ (b); \ + (a) = (a) ^ (b); \ + } while (0) + +/**
this doesn't seem to be used anywhere. Either it belongs in a later patch perhaps or can be dropped ?
ACK if you resolve that.
In fact this one is used in previous patch. I've moved it there. Michal

The aim of the API is to get information on number of free pages on the system. The API behaves similar to the virNodeGetCellsFreeMemory(). User passes starting NUMA cell, the count of nodes that he's interested in, pages sizes (yes, multiple sizes can be queried at once) and the counts are returned in an array. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- daemon/remote.c | 52 ++++++++++++++++++++++++ include/libvirt/libvirt.h.in | 7 ++++ src/driver.h | 10 +++++ src/libvirt.c | 95 ++++++++++++++++++++++++++++++++++++++++++++ src/libvirt_public.syms | 4 ++ src/remote/remote_driver.c | 50 +++++++++++++++++++++++ src/remote/remote_protocol.x | 20 +++++++++- src/remote_protocol-structs | 16 ++++++++ 8 files changed, 253 insertions(+), 1 deletion(-) diff --git a/daemon/remote.c b/daemon/remote.c index 34c96c9..11ae758 100644 --- a/daemon/remote.c +++ b/daemon/remote.c @@ -6115,6 +6115,58 @@ remoteDispatchDomainGetTime(virNetServerPtr server ATTRIBUTE_UNUSED, return rv; } + +static int +remoteDispatchNodeGetFreePages(virNetServerPtr server ATTRIBUTE_UNUSED, + virNetServerClientPtr client, + virNetMessagePtr msg ATTRIBUTE_UNUSED, + virNetMessageErrorPtr rerr, + remote_node_get_free_pages_args *args, + remote_node_get_free_pages_ret *ret) +{ + int rv = -1; + int len; + struct daemonClientPrivate *priv = + virNetServerClientGetPrivateData(client); + + if (!priv->conn) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("connection not open")); + goto cleanup; + } + + if (args->pages.pages_len * args->cellCount > REMOTE_NODE_MAX_CELLS) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("the result won't fit into REMOTE_NODE_MAX_CELLS")); + goto cleanup; + } + + /* Allocate return buffer. */ + if (VIR_ALLOC_N(ret->counts.counts_val, + args->pages.pages_len * args->cellCount) < 0) + goto cleanup; + + if ((len = virNodeGetFreePages(priv->conn, + args->pages.pages_len, + args->pages.pages_val, + args->startCell, + args->cellCount, + (unsigned long long *) ret->counts.counts_val, + args->flags)) <= 0) + goto cleanup; + + ret->counts.counts_len = len; + rv = 0; + + cleanup: + if (rv < 0) { + virNetMessageSaveError(rerr); + VIR_FREE(ret->counts.counts_val); + } + return rv; + +} + + /*----- Helpers. -----*/ /* get_nonnull_domain and get_nonnull_network turn an on-wire diff --git a/include/libvirt/libvirt.h.in b/include/libvirt/libvirt.h.in index 127de11..b939848 100644 --- a/include/libvirt/libvirt.h.in +++ b/include/libvirt/libvirt.h.in @@ -5307,6 +5307,13 @@ int virDomainSetTime(virDomainPtr dom, unsigned int nseconds, unsigned int flags); +int virNodeGetFreePages(virConnectPtr conn, + unsigned int npages, + unsigned int *pages, + int startcell, + unsigned int cellcount, + unsigned long long *counts, + unsigned int flags); /** * virSchedParameterType: * diff --git a/src/driver.h b/src/driver.h index 5ac89d6..a0b258a 100644 --- a/src/driver.h +++ b/src/driver.h @@ -1173,6 +1173,15 @@ typedef int unsigned int nmountpoints, unsigned int flags); +typedef int +(*virDrvNodeGetFreePages)(virConnectPtr conn, + unsigned int npages, + unsigned int *pages, + int startCell, + unsigned int cellCount, + unsigned long long *counts, + unsigned int flags); + typedef struct _virDriver virDriver; typedef virDriver *virDriverPtr; @@ -1391,6 +1400,7 @@ struct _virDriver { virDrvDomainFSThaw domainFSThaw; virDrvDomainGetTime domainGetTime; virDrvDomainSetTime domainSetTime; + virDrvNodeGetFreePages nodeGetFreePages; }; diff --git a/src/libvirt.c b/src/libvirt.c index 6c4a124..83b7437 100644 --- a/src/libvirt.c +++ b/src/libvirt.c @@ -20910,3 +20910,98 @@ virDomainSetTime(virDomainPtr dom, virDispatchError(dom->conn); return -1; } + + +/** + * virNodeGetFreePages: + * @conn: pointer to the hypervisor connection + * @npages: number of items in the @pages array + * @pages: page sizes to query + * @startCell: index of first cell to return free pages info on. + * @cellCount: maximum number of cells for which free pages + * information can be returned. + * @counts: returned counts of free pages + * @flags: extra flags; not used yet, so callers should always pass 0 + * + * This calls queries the host system on free pages of + * specified size. Ont the input, @pages is expected to be + * filled with pages that caller is interested in (the size + * unit is kibibytes, so e.g. pass 2048 for 2MB), then @startcell + * refers to the first NUMA node that info should be collected + * from, and @cellcount tells how many consecutive nodes should + * be queried. On the function output, @counts is filled with + * desired information, where items are grouped by NUMA node. + * So from @counts[0] till @counts[@npages - 1] you'll find count + * for the first node (@startcell), then from @counts[@npages] + * till @count[2 * @npages - 1] you'll find info for the + * (@startcell + 1) node, and so on. It's callers responsibility + * to allocate the @counts array. + * + * Example how to use this API: + * + * unsigned int pages[] = { 4, 2048, 1048576} + * unsigned int npages = ARRAY_CARDINALITY(pages); + * int startcell = 0; + * unsigned int cellcount = 2; + * + * unsigned long long counts = malloc(sizeof(long long) * npages * cellcount); + * + * virNodeGetFreePages(conn, pages, npages, + * startcell, cellcount, counts, 0); + * + * for (i = 0 ; i < cellcount ; i++) { + * fprintf(stdout, "Cell %d\n", startcell + i); + * for (j = 0 ; j < npages ; j++) { + * fprintf(stdout, " Page size=%d count=%d bytes=%llu\n", + * pages[j], counts[(i * npages) + j], + * pages[j] * counts[(i * npages) + j]); + * } + * } + * + * This little code snippet will produce something like this: + * Cell 0 + * Page size=4096 count=300 bytes=1228800 + * Page size=2097152 count=0 bytes=0 + * Page size=1073741824 count=1 bytes=1073741824 + * Cell 1 + * Page size=4096 count=0 bytes=0 + * Page size=2097152 count=20 bytes=41943040 + * Page size=1073741824 count=0 bytes=0 + * + * Returns: the number of entries filled in @counts or -1 in case of error. + */ +int +virNodeGetFreePages(virConnectPtr conn, + unsigned int npages, + unsigned int *pages, + int startCell, + unsigned int cellCount, + unsigned long long *counts, + unsigned int flags) +{ + VIR_DEBUG("conn=%p, npages=%u, pages=%p, startCell=%u, " + "cellCount=%u, counts=%p, flags=%x", + conn, npages, pages, startCell, cellCount, counts, flags); + + virResetLastError(); + + virCheckConnectReturn(conn, -1); + virCheckNonZeroArgGoto(npages, error); + virCheckNonNullArgGoto(pages, error); + virCheckNonZeroArgGoto(cellCount, error); + virCheckNonNullArgGoto(counts, error); + + if (conn->driver->nodeGetFreePages) { + int ret; + ret = conn->driver->nodeGetFreePages(conn, npages, pages, startCell, + cellCount, counts, flags); + if (ret < 0) + goto error; + return ret; + } + + virReportUnsupportedError(); + error: + virDispatchError(conn); + return -1; +} diff --git a/src/libvirt_public.syms b/src/libvirt_public.syms index cce6bdf..40d2c1a 100644 --- a/src/libvirt_public.syms +++ b/src/libvirt_public.syms @@ -658,5 +658,9 @@ LIBVIRT_1.2.5 { virDomainSetTime; } LIBVIRT_1.2.3; +LIBVIRT_1.2.6 { + global: + virNodeGetFreePages; +} LIBVIRT_1.2.5; # .... define new API here using predicted next version number .... diff --git a/src/remote/remote_driver.c b/src/remote/remote_driver.c index 85fe597..563fac0 100644 --- a/src/remote/remote_driver.c +++ b/src/remote/remote_driver.c @@ -7469,6 +7469,55 @@ remoteDomainGetTime(virDomainPtr dom, } +static int +remoteNodeGetFreePages(virConnectPtr conn, + unsigned int npages, + unsigned int *pages, + int startCell, + unsigned int cellCount, + unsigned long long *counts, + unsigned int flags) +{ + int rv = -1; + remote_node_get_free_pages_args args; + remote_node_get_free_pages_ret ret; + struct private_data *priv = conn->privateData; + + remoteDriverLock(priv); + + if (npages * cellCount > REMOTE_NODE_MAX_CELLS) { + virReportError(VIR_ERR_RPC, + _("too many NUMA cells: %d > %d"), + npages * cellCount, REMOTE_NODE_MAX_CELLS); + goto done; + } + + if (VIR_ALLOC_N(args.pages.pages_val, npages) < 0) + goto done; + memcpy(args.pages.pages_val, pages, npages * sizeof(*pages)); + args.pages.pages_len = npages; + args.startCell = startCell; + args.cellCount = cellCount; + args.flags = flags; + + memset(&ret, 0, sizeof(ret)); + if (call(conn, priv, 0, REMOTE_PROC_NODE_GET_FREE_PAGES, + (xdrproc_t) xdr_remote_node_get_free_pages_args, (char *)&args, + (xdrproc_t) xdr_remote_node_get_free_pages_ret, (char *)&ret) == -1) + goto done; + + memcpy(counts, ret.counts.counts_val, ret.counts.counts_len * sizeof(*counts)); + + xdr_free((xdrproc_t) xdr_remote_node_get_free_pages_ret, (char *) &ret); + + rv = ret.counts.counts_len; + + done: + remoteDriverUnlock(priv); + return rv; +} + + /* get_nonnull_domain and get_nonnull_network turn an on-wire * (name, uuid) pair into virDomainPtr or virNetworkPtr object. * These can return NULL if underlying memory allocations fail, @@ -7805,6 +7854,7 @@ static virDriver remote_driver = { .domainFSThaw = remoteDomainFSThaw, /* 1.2.5 */ .domainGetTime = remoteDomainGetTime, /* 1.2.5 */ .domainSetTime = remoteDomainSetTime, /* 1.2.5 */ + .nodeGetFreePages = remoteNodeGetFreePages, /* 1.2.6 */ }; static virNetworkDriver network_driver = { diff --git a/src/remote/remote_protocol.x b/src/remote/remote_protocol.x index 1f9d583..ec4f3e2 100644 --- a/src/remote/remote_protocol.x +++ b/src/remote/remote_protocol.x @@ -2999,6 +2999,17 @@ struct remote_domain_fsthaw_ret { int filesystems; }; +struct remote_node_get_free_pages_args { + unsigned int pages<REMOTE_NODE_MAX_CELLS>; + int startCell; + unsigned int cellCount; + unsigned int flags; +}; + +struct remote_node_get_free_pages_ret { + unsigned hyper counts<REMOTE_NODE_MAX_CELLS>; +}; + /*----- Protocol. -----*/ @@ -5338,5 +5349,12 @@ enum remote_procedure { * @generate: both * @acl: domain:set_time */ - REMOTE_PROC_DOMAIN_SET_TIME = 338 + REMOTE_PROC_DOMAIN_SET_TIME = 338, + + /** + * @generate: none + * @priority: high + * @acl: connect:read + */ + REMOTE_PROC_NODE_GET_FREE_PAGES = 339 }; diff --git a/src/remote_protocol-structs b/src/remote_protocol-structs index 5b22049..6c51e75 100644 --- a/src/remote_protocol-structs +++ b/src/remote_protocol-structs @@ -2463,6 +2463,21 @@ struct remote_domain_fsthaw_args { struct remote_domain_fsthaw_ret { int filesystems; }; +struct remote_node_get_free_pages_args { + struct { + u_int pages_len; + u_int * pages_val; + } pages; + int startCell; + u_int cellCount; + u_int flags; +}; +struct remote_node_get_free_pages_ret { + struct { + u_int counts_len; + uint64_t * counts_val; + } counts; +}; enum remote_procedure { REMOTE_PROC_CONNECT_OPEN = 1, REMOTE_PROC_CONNECT_CLOSE = 2, @@ -2802,4 +2817,5 @@ enum remote_procedure { REMOTE_PROC_DOMAIN_FSTHAW = 336, REMOTE_PROC_DOMAIN_GET_TIME = 337, REMOTE_PROC_DOMAIN_SET_TIME = 338, + REMOTE_PROC_NODE_GET_FREE_PAGES = 339, }; -- 1.8.5.5

On Mon, Jun 16, 2014 at 05:08:28PM +0200, Michal Privoznik wrote:
The aim of the API is to get information on number of free pages on the system. The API behaves similar to the virNodeGetCellsFreeMemory(). User passes starting NUMA cell, the count of nodes that he's interested in, pages sizes (yes, multiple sizes can be queried at once) and the counts are returned in an array.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- daemon/remote.c | 52 ++++++++++++++++++++++++ include/libvirt/libvirt.h.in | 7 ++++ src/driver.h | 10 +++++ src/libvirt.c | 95 ++++++++++++++++++++++++++++++++++++++++++++ src/libvirt_public.syms | 4 ++ src/remote/remote_driver.c | 50 +++++++++++++++++++++++ src/remote/remote_protocol.x | 20 +++++++++- src/remote_protocol-structs | 16 ++++++++ 8 files changed, 253 insertions(+), 1 deletion(-)
ACK Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

The new API is exposed under 'freepages' command. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- tools/virsh-host.c | 167 +++++++++++++++++++++++++++++++++++++++++++++++++++++ tools/virsh.pod | 8 +++ 2 files changed, 175 insertions(+) diff --git a/tools/virsh-host.c b/tools/virsh-host.c index 8091437..2d6cb00 100644 --- a/tools/virsh-host.c +++ b/tools/virsh-host.c @@ -193,6 +193,167 @@ cmdFreecell(vshControl *ctl, const vshCmd *cmd) return ret; } + +/* + * "freepages" command + */ +static const vshCmdInfo info_freepages[] = { + {.name = "help", + .data = N_("NUMA free memory") + }, + {.name = "desc", + .data = N_("display available free memory for the NUMA cell.") + }, + {.name = NULL} +}; + +static const vshCmdOptDef opts_freepages[] = { + {.name = "cellno", + .type = VSH_OT_INT, + .help = N_("NUMA cell number") + }, + {.name = "pagesize", + .type = VSH_OT_INT, + .help = N_("page size (in kibibites)") + }, + {.name = "all", + .type = VSH_OT_BOOL, + .help = N_("show free pages for all NUMA cells") + }, + {.name = NULL} +}; + +static bool +cmdFreepages(vshControl *ctl, const vshCmd *cmd) +{ + bool ret = false; + unsigned int npages; + unsigned int *pagesize = NULL; + int cell; + unsigned long long *counts = NULL; + size_t i, j; + xmlNodePtr *nodes = NULL; + int nodes_cnt; + char *cap_xml = NULL; + xmlDocPtr doc = NULL; + xmlXPathContextPtr ctxt = NULL; + bool all = vshCommandOptBool(cmd, "all"); + bool cellno = vshCommandOptBool(cmd, "cellno"); + + VSH_EXCLUSIVE_OPTIONS_VAR(all, cellno); + + if (all) { + if (!(cap_xml = virConnectGetCapabilities(ctl->conn))) { + vshError(ctl, "%s", _("unable to get node capabilities")); + goto cleanup; + } + + if (!(doc = virXMLParseStringCtxt(cap_xml, _("capabilities"), &ctxt))) { + vshError(ctl, "%s", _("unable to parse node capabilities")); + goto cleanup; + } + + nodes_cnt = virXPathNodeSet("/capabilities/host/cpu/pages", ctxt, &nodes); + + if (nodes_cnt <= 0) { + vshError(ctl, "%s", _("could not get information about " + "supported page sizes")); + goto cleanup; + } + + pagesize = vshMalloc(ctl, nodes_cnt * sizeof(*pagesize)); + + for (i = 0; i < nodes_cnt; i++) { + char *val = virXMLPropString(nodes[i], "size"); + + if (virStrToLong_ui(val, NULL, 10, &pagesize[i]) < 0) { + vshError(ctl, _("unable to parse page size: %s"), val); + VIR_FREE(val); + goto cleanup; + } + + VIR_FREE(val); + } + + npages = nodes_cnt; + VIR_FREE(nodes); + + counts = vshMalloc(ctl, npages * sizeof(*counts)); + + nodes_cnt = virXPathNodeSet("/capabilities/host/topology/cells/cell", + ctxt, &nodes); + for (i = 0; i < nodes_cnt; i++) { + char *val = virXMLPropString(nodes[i], "id"); + + if (virStrToLong_i(val, NULL, 10, &cell) < 0) { + vshError(ctl, _("unable to parse numa node id: %s"), val); + VIR_FREE(val); + goto cleanup; + } + VIR_FREE(val); + + if (virNodeGetFreePages(ctl->conn, npages, pagesize, + cell, 1, counts, 0) < 0) + goto cleanup; + + vshPrint(ctl, _("Node %d:\n"), cell); + for (j = 0; j < npages; j++) { + vshPrint(ctl, "%uKiB: %lld\n", pagesize[j], counts[j]); + } + vshPrint(ctl, "%c", '\n'); + } + + } else { + if (!cellno) { + vshError(ctl, "%s", _("missing cellno argument")); + goto cleanup; + } + + if (vshCommandOptInt(cmd, "cellno", &cell) < 0) { + vshError(ctl, "%s", _("Invalid cellno argument")); + goto cleanup; + } + + if (cell < -1) { + vshError(ctl, "%s", _("cell number must be non-negative integer or -1")); + goto cleanup; + } + + pagesize = vshMalloc(ctl, sizeof(*pagesize)); + if (vshCommandOptScaledInt(cmd, "pagesize", (unsigned long long *) pagesize, + 1, UINT_MAX) < 0) { + vshError(ctl, "%s", _("page size has to be a number")); + goto cleanup; + } + + /* page size is expected in kibibytes */ + pagesize[0] /= 1024; + + if (!pagesize[0]) { + vshError(ctl, "%s", _("page size must be at least 1KiB")); + goto cleanup; + } + + counts = vshMalloc(ctl, sizeof(*counts)); + + if (virNodeGetFreePages(ctl->conn, 1, pagesize, cell, 1, counts, 0) < 0) + goto cleanup; + + vshPrint(ctl, "%uKiB: %lld\n", *pagesize, counts[0]); + } + + ret = true; + cleanup: + xmlXPathFreeContext(ctxt); + xmlFreeDoc(doc); + VIR_FREE(cap_xml); + VIR_FREE(nodes); + VIR_FREE(counts); + VIR_FREE(pagesize); + return ret; +} + + /* * "maxvcpus" command */ @@ -977,6 +1138,12 @@ const vshCmdDef hostAndHypervisorCmds[] = { .info = info_freecell, .flags = 0 }, + {.name = "freepages", + .handler = cmdFreepages, + .opts = opts_freepages, + .info = info_freepages, + .flags = 0 + }, {.name = "hostname", .handler = cmdHostname, .opts = NULL, diff --git a/tools/virsh.pod b/tools/virsh.pod index 35cf878..0cc89ad 100644 --- a/tools/virsh.pod +++ b/tools/virsh.pod @@ -511,6 +511,14 @@ cell and the total free memory on the machine. Finally, with a numeric argument or with --cellno plus a cell number it will display the free memory for the specified cell only. +=item B<freepages> [{ [I<--cellno>] I<cellno> [I<--pagesize>] I<pagesize> | + I<--all> }] + +Prints the available amount of pages within a NUMA cell. I<cellno> refers +to the NUMA cell you're interested in. I<pagesize> is a scaled integer (see +B<NOTES> above). Alternatively, if I<--all> is used, info on each possible +combination of NUMA cell and page size is printed out. + =item B<cpu-baseline> I<FILE> [I<--features>] Compute baseline CPU which will be supported by all host CPUs given in <file>. -- 1.8.5.5

On Mon, Jun 16, 2014 at 05:08:29PM +0200, Michal Privoznik wrote:
The new API is exposed under 'freepages' command.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- tools/virsh-host.c | 167 +++++++++++++++++++++++++++++++++++++++++++++++++++++ tools/virsh.pod | 8 +++ 2 files changed, 175 insertions(+)
ACK Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On 06/16/2014 09:08 AM, Michal Privoznik wrote:
The new API is exposed under 'freepages' command.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- tools/virsh-host.c | 167 +++++++++++++++++++++++++++++++++++++++++++++++++++++ tools/virsh.pod | 8 +++ 2 files changed, 175 insertions(+)
I'm working on a followup patch to fix several bugs...
+ {.name = "pagesize", + .type = VSH_OT_INT, + .help = N_("page size (in kibibites)")
s/bites/bytes/
+static bool +cmdFreepages(vshControl *ctl, const vshCmd *cmd) +{ + bool ret = false; + unsigned int npages; + unsigned int *pagesize = NULL; + int cell; + unsigned long long *counts = NULL; + size_t i, j; + xmlNodePtr *nodes = NULL; + int nodes_cnt;
pagesize is an int...
+ + nodes_cnt = virXPathNodeSet("/capabilities/host/cpu/pages", ctxt, &nodes); + + if (nodes_cnt <= 0) { + vshError(ctl, "%s", _("could not get information about " + "supported page sizes")); + goto cleanup; + } + + pagesize = vshMalloc(ctl, nodes_cnt * sizeof(*pagesize));
Risks multiplication overflow (probably unlikely in practice, but in theory a super-large number of /capabilities/host/cpu/pages can overflow). You're not the first culprit; we've got lots of abuse of vshMalloc(, a * b) which should instead be using vshCalloc or VIR_ALLOC_N.
+ + pagesize = vshMalloc(ctl, sizeof(*pagesize));
...so this allocates only 4 bytes...
+ if (vshCommandOptScaledInt(cmd, "pagesize", (unsigned long long *) pagesize, + 1, UINT_MAX) < 0) {
...but this pointer cast causes a store through 8 bytes. Absolute no-no. Clang caught it, and so will valgrind. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

And add stubs to other drivers like: lxc, qemu, uml and vbox. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/libvirt_private.syms | 1 + src/lxc/lxc_driver.c | 19 +++++++++++++++++++ src/nodeinfo.c | 34 ++++++++++++++++++++++++++++++++++ src/nodeinfo.h | 5 +++++ src/qemu/qemu_driver.c | 19 +++++++++++++++++++ src/uml/uml_driver.c | 19 +++++++++++++++++++ src/vbox/vbox_tmpl.c | 16 ++++++++++++++++ 7 files changed, 113 insertions(+) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index a7834ed..865919c 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -875,6 +875,7 @@ nodeGetCPUBitmap; nodeGetCPUCount; nodeGetCPUMap; nodeGetCPUStats; +nodeGetFreePages; nodeGetInfo; nodeGetMemory; nodeGetMemoryParameters; diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c index ab227d0..9380e8d 100644 --- a/src/lxc/lxc_driver.c +++ b/src/lxc/lxc_driver.c @@ -5656,6 +5656,24 @@ lxcDomainGetCPUStats(virDomainPtr dom, } +static int +lxcNodeGetFreePages(virConnectPtr conn, + unsigned int npages, + unsigned int *pages, + int startCell, + unsigned int cellCount, + unsigned long long *counts, + unsigned int flags) +{ + virCheckFlags(0, -1); + + if (virNodeGetFreePagesEnsureACL(conn) < 0) + return -1; + + return nodeGetFreePages(npages, pages, startCell, cellCount, counts); +} + + /* Function Tables */ static virDriver lxcDriver = { .no = VIR_DRV_LXC, @@ -5745,6 +5763,7 @@ static virDriver lxcDriver = { .domainShutdownFlags = lxcDomainShutdownFlags, /* 1.0.1 */ .domainReboot = lxcDomainReboot, /* 1.0.1 */ .domainLxcOpenNamespace = lxcDomainLxcOpenNamespace, /* 1.0.2 */ + .nodeGetFreePages = lxcNodeGetFreePages, /* 1.2.6 */ }; static virStateDriver lxcStateDriver = { diff --git a/src/nodeinfo.c b/src/nodeinfo.c index b55d77e..dbfbf54 100644 --- a/src/nodeinfo.c +++ b/src/nodeinfo.c @@ -2018,3 +2018,37 @@ nodeGetMemory(unsigned long long *mem, return 0; } + +int +nodeGetFreePages(unsigned int npages, + unsigned int *pages, + int startCell, + unsigned int cellCount, + unsigned long long *counts) +{ + int ret = -1; + int cell; + size_t i, ncounts = 0; + + for (cell = startCell; cell < (int) (startCell + cellCount); cell++) { + for (i = 0; i < npages; i++) { + unsigned int page_size = pages[i]; + unsigned int page_free; + + if (virNumaGetPageInfo(cell, page_size, NULL, &page_free) < 0) + goto cleanup; + + counts[ncounts++] = page_free; + } + } + + if (!ncounts) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("no suitable info found")); + goto cleanup; + } + + ret = ncounts; + cleanup: + return ret; +} diff --git a/src/nodeinfo.h b/src/nodeinfo.h index e7ec144..0896c6c 100644 --- a/src/nodeinfo.h +++ b/src/nodeinfo.h @@ -58,4 +58,9 @@ int nodeGetCPUMap(unsigned char **cpumap, unsigned int *online, unsigned int flags); +int nodeGetFreePages(unsigned int npages, + unsigned int *pages, + int startCell, + unsigned int cellCount, + unsigned long long *counts); #endif /* __VIR_NODEINFO_H__*/ diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 88051c9..ce84df4 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -16876,6 +16876,24 @@ qemuDomainFSThaw(virDomainPtr dom, } +static int +qemuNodeGetFreePages(virConnectPtr conn, + unsigned int npages, + unsigned int *pages, + int startCell, + unsigned int cellCount, + unsigned long long *counts, + unsigned int flags) +{ + virCheckFlags(0, -1); + + if (virNodeGetFreePagesEnsureACL(conn) < 0) + return -1; + + return nodeGetFreePages(npages, pages, startCell, cellCount, counts); +} + + static virDriver qemuDriver = { .no = VIR_DRV_QEMU, .name = QEMU_DRIVER_NAME, @@ -17070,6 +17088,7 @@ static virDriver qemuDriver = { .domainFSThaw = qemuDomainFSThaw, /* 1.2.5 */ .domainGetTime = qemuDomainGetTime, /* 1.2.5 */ .domainSetTime = qemuDomainSetTime, /* 1.2.5 */ + .nodeGetFreePages = qemuNodeGetFreePages, /* 1.2.6 */ }; diff --git a/src/uml/uml_driver.c b/src/uml/uml_driver.c index a5e9ea8..5ccd443 100644 --- a/src/uml/uml_driver.c +++ b/src/uml/uml_driver.c @@ -2870,6 +2870,24 @@ umlNodeSuspendForDuration(virConnectPtr conn, } +static int +umlNodeGetFreePages(virConnectPtr conn, + unsigned int npages, + unsigned int *pages, + int startCell, + unsigned int cellCount, + unsigned long long *counts, + unsigned int flags) +{ + virCheckFlags(0, -1); + + if (virNodeGetFreePagesEnsureACL(conn) < 0) + return -1; + + return nodeGetFreePages(npages, pages, startCell, cellCount, counts); +} + + static virDriver umlDriver = { .no = VIR_DRV_UML, .name = "UML", @@ -2931,6 +2949,7 @@ static virDriver umlDriver = { .nodeSuspendForDuration = umlNodeSuspendForDuration, /* 0.9.8 */ .nodeGetMemoryParameters = umlNodeGetMemoryParameters, /* 0.10.2 */ .nodeSetMemoryParameters = umlNodeSetMemoryParameters, /* 0.10.2 */ + .nodeGetFreePages = umlNodeGetFreePages, /* 1.2.6 */ }; static virStateDriver umlStateDriver = { diff --git a/src/vbox/vbox_tmpl.c b/src/vbox/vbox_tmpl.c index 66e933a..e4440dd 100644 --- a/src/vbox/vbox_tmpl.c +++ b/src/vbox/vbox_tmpl.c @@ -11475,6 +11475,21 @@ vboxNodeGetFreeMemory(virConnectPtr conn ATTRIBUTE_UNUSED) } +static int +vboxNodeGetFreePages(virConnectPtr conn ATTRIBUTE_UNUSED, + unsigned int npages, + unsigned int *pages, + int startCell, + unsigned int cellCount, + unsigned long long *counts, + unsigned int flags) +{ + virCheckFlags(0, -1); + + return nodeGetFreePages(npages, pages, startCell, cellCount, counts); +} + + /** * Function Tables */ @@ -11556,6 +11571,7 @@ virDriver NAME(Driver) = { .domainRevertToSnapshot = vboxDomainRevertToSnapshot, /* 0.8.0 */ .domainSnapshotDelete = vboxDomainSnapshotDelete, /* 0.8.0 */ .connectIsAlive = vboxConnectIsAlive, /* 0.9.8 */ + .nodeGetFreePages = vboxNodeGetFreePages, /* 1.2.6 */ }; virNetworkDriver NAME(NetworkDriver) = { -- 1.8.5.5

On Mon, Jun 16, 2014 at 05:08:30PM +0200, Michal Privoznik wrote:
And add stubs to other drivers like: lxc, qemu, uml and vbox.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/libvirt_private.syms | 1 + src/lxc/lxc_driver.c | 19 +++++++++++++++++++ src/nodeinfo.c | 34 ++++++++++++++++++++++++++++++++++ src/nodeinfo.h | 5 +++++ src/qemu/qemu_driver.c | 19 +++++++++++++++++++ src/uml/uml_driver.c | 19 +++++++++++++++++++ src/vbox/vbox_tmpl.c | 16 ++++++++++++++++ 7 files changed, 113 insertions(+)
ACK Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On 16.06.2014 17:08, Michal Privoznik wrote:
diff to v1: - Expose all page size not only huge ones
Michal Privoznik (7): virnuma: Introduce virNumaNodeIsAvailable nodeinfo: Rename nodeGetFreeMemory to nodeGetMemory virnuma: Introduce pages helpers virCaps: expose pages info Introduce virNodeGetFreePages virsh: Expose virNodeGetFreePages nodeinfo: Implement nodeGetFreePages
daemon/remote.c | 52 +++++++ docs/schemas/capability.rng | 21 +++ include/libvirt/libvirt.h.in | 7 + src/bhyve/bhyve_driver.c | 7 +- src/conf/capabilities.c | 25 ++- src/conf/capabilities.h | 15 +- src/driver.h | 10 ++ src/internal.h | 12 ++ src/libvirt.c | 95 ++++++++++++ src/libvirt_private.syms | 6 +- src/libvirt_public.syms | 4 + src/libxl/libxl_conf.c | 1 + src/lxc/lxc_driver.c | 26 +++- src/nodeinfo.c | 170 ++++++++++++++++---- src/nodeinfo.h | 8 +- src/openvz/openvz_driver.c | 5 +- src/qemu/qemu_capabilities.c | 29 +++- src/qemu/qemu_driver.c | 26 +++- src/remote/remote_driver.c | 50 ++++++ src/remote/remote_protocol.x | 20 ++- src/remote_protocol-structs | 16 ++ src/test/test_driver.c | 2 +- src/uml/uml_driver.c | 26 +++- src/util/virnuma.c | 361 ++++++++++++++++++++++++++++++++++++++++++- src/util/virnuma.h | 11 ++ src/vbox/vbox_tmpl.c | 21 ++- src/xen/xend_internal.c | 1 + tests/vircaps2xmltest.c | 3 +- tests/vircapstest.c | 1 + tools/virsh-host.c | 167 ++++++++++++++++++++ tools/virsh.pod | 8 + 31 files changed, 1161 insertions(+), 45 deletions(-)
Thanks Daniel for the review! I'm pushing this one. Michal
participants (3)
-
Daniel P. Berrange
-
Eric Blake
-
Michal Privoznik