From: Bing Niu <bing.niu(a)intel.com>
Add memory bandwidth allocation support to virresctrl class.
Introducing virResctrlAllocMemBW which is used for allocating memory
bandwidth. Following virResctrlAllocPerType, it also employs a
nested sparse array to indicate whether allocation is available for
particular last level cache.
Signed-off-by: Bing Niu <bing.niu(a)intel.com>
---
src/util/virresctrl.c | 346 ++++++++++++++++++++++++++++++++++++++++++++++++--
src/util/virresctrl.h | 13 ++
2 files changed, 346 insertions(+), 13 deletions(-)
diff --git a/src/util/virresctrl.c b/src/util/virresctrl.c
index 06e2702..bec2afd 100644
--- a/src/util/virresctrl.c
+++ b/src/util/virresctrl.c
@@ -36,9 +36,9 @@ VIR_LOG_INIT("util.virresctrl")
/* Resctrl is short for Resource Control. It might be implemented for various
- * resources, but at the time of this writing this is only supported for cache
- * allocation technology (aka CAT). Hence the reson for leaving 'Cache' out of
- * all the structure and function names for now (can be added later if needed.
+ * resources. Currently this supports cache allocation technology (aka CAT) and
+ * memory bandwidth allocation (aka MBA). More resources technologies may be
+ * added in feature.
*/
@@ -89,6 +89,8 @@ typedef virResctrlAllocPerType *virResctrlAllocPerTypePtr;
typedef struct _virResctrlAllocPerLevel virResctrlAllocPerLevel;
typedef virResctrlAllocPerLevel *virResctrlAllocPerLevelPtr;
+typedef struct _virResctrlAllocMemBW virResctrlAllocMemBW;
+typedef virResctrlAllocMemBW *virResctrlAllocMemBWPtr;
/* Class definitions and initializations */
static virClassPtr virResctrlInfoClass;
@@ -181,7 +183,10 @@ virResctrlInfoDispose(void *obj)
* consequently a directory under /sys/fs/resctrl). Since it can have multiple
* parts of multiple caches allocated it is represented as bunch of nested
* sparse arrays (by sparse I mean array of pointers so that each might be NULL
- * in case there is no allocation for that particular one (level, cache, ...)).
+ * in case there is no allocation for that particular cache allocation (level,
+ * cache, ...) or memory allocation for particular node).
+ *
+ * =====Cache allocation technology (CAT)=====
*
* Since one allocation can be made for caches on different levels, the first
* nested sparse array is of types virResctrlAllocPerLevel. For example if you
@@ -206,6 +211,16 @@ virResctrlInfoDispose(void *obj)
* all of them. While doing that we store the bitmask in a sparse array of
* virBitmaps named `masks` indexed the same way as `sizes`. The upper bounds
* of the sparse arrays are stored in nmasks or nsizes, respectively.
+ *
+ * =====Memory Bandwidth allocation technology (MBA)=====
+ *
+ * The memory bandwidth allocation support in virResctrlAlloc works in the same
+ * fashion as CAT. However, memory bandwidth controller doesn't have a hierarchy
+ * organization as cache, each node have one memory bandwidth controller to
+ * memory bandwidth distribution. The number of memory bandwidth controller is
+ * identical with number of last level cache. So MBA also employs a sparse array
+ * to represent whether a memory bandwidth allocation happens on corresponding node.
+ * The available memory controller number is collected in 'virResctrlInfo'.
*/
struct _virResctrlAllocPerType {
/* There could be bool saying whether this is set or not, but since everything
@@ -226,12 +241,24 @@ struct _virResctrlAllocPerLevel {
* VIR_CACHE_TYPE_LAST number of items */
};
+/*
+ * virResctrlAllocMemBW represents one memory bandwidth allocation. Since it can have
+ * several last level caches in a NUMA system, it is also represented as a nested
+ * sparse arrays as virRestrlAllocPerLevel.
+ */
+struct _virResctrlAllocMemBW {
+ unsigned int **bandwidths;
+ size_t nbandwidths;
+};
+
struct _virResctrlAlloc {
virObject parent;
virResctrlAllocPerLevelPtr *levels;
size_t nlevels;
+ virResctrlAllocMemBWPtr mem_bw;
+
/* The identifier (any unique string for now) */
char *id;
/* libvirt-generated path in /sys/fs/resctrl for this particular
@@ -275,6 +302,13 @@ virResctrlAllocDispose(void *obj)
VIR_FREE(level);
}
+ if (alloc->mem_bw) {
+ virResctrlAllocMemBWPtr mem_bw = alloc->mem_bw;
+ for (i = 0; i < mem_bw->nbandwidths; i++)
+ VIR_FREE(mem_bw->bandwidths[i]);
+ }
+
+ VIR_FREE(alloc->mem_bw);
VIR_FREE(alloc->id);
VIR_FREE(alloc->path);
VIR_FREE(alloc->levels);
@@ -697,6 +731,9 @@ virResctrlAllocIsEmpty(virResctrlAllocPtr alloc)
if (!alloc)
return true;
+ if (alloc->mem_bw)
+ return false;
+
for (i = 0; i < alloc->nlevels; i++) {
virResctrlAllocPerLevelPtr a_level = alloc->levels[i];
@@ -890,6 +927,27 @@ virResctrlAllocSetCacheSize(virResctrlAllocPtr alloc,
int
+virResctrlAllocForeachMemory(virResctrlAllocPtr alloc,
+ virResctrlAllocForeachMemoryCallback cb,
+ void *opaque)
+{
+ size_t i = 0;
+
+ if (!alloc)
+ return 0;
+
+ if (alloc->mem_bw) {
+ virResctrlAllocMemBWPtr mem_bw = alloc->mem_bw;
+ for (i = 0; i < mem_bw->nbandwidths; i++)
+ if (mem_bw->bandwidths[i])
+ cb(i, *mem_bw->bandwidths[i], opaque);
+ }
+
+ return 0;
+}
+
+
+int
virResctrlAllocForeachCache(virResctrlAllocPtr alloc,
virResctrlAllocForeachCacheCallback cb,
void *opaque)
@@ -952,6 +1010,240 @@ virResctrlAllocGetID(virResctrlAllocPtr alloc)
}
+static void
+virResctrlMemoryBandwidthSubtract(virResctrlAllocPtr free,
+ virResctrlAllocPtr used)
+{
+ size_t i;
+
+ if (!used->mem_bw)
+ return;
+
+ for (i = 0; i < used->mem_bw->nbandwidths; i++) {
+ if (used->mem_bw->bandwidths[i])
+ *(free->mem_bw->bandwidths[i]) -=
*(used->mem_bw->bandwidths[i]);
+ }
+}
+
+
+int
+virResctrlSetMemoryBandwidth(virResctrlAllocPtr alloc,
+ unsigned int id,
+ unsigned int memory_bandwidth)
+{
+ virResctrlAllocMemBWPtr mem_bw = alloc->mem_bw;
+
+ if (!mem_bw) {
+ if (VIR_ALLOC(mem_bw) < 0)
+ return -1;
+ alloc->mem_bw = mem_bw;
+ }
+
+ if (mem_bw->nbandwidths <= id &&
+ VIR_EXPAND_N(mem_bw->bandwidths, mem_bw->nbandwidths,
+ id - mem_bw->nbandwidths + 1) < 0)
+ return -1;
+
+ if (mem_bw->bandwidths[id]) {
+ virReportError(VIR_ERR_XML_ERROR,
+ _("Memory Bandwidth already defined for node %u"),
+ id);
+ return -1;
+ }
+
+ if (VIR_ALLOC(mem_bw->bandwidths[id]) < 0)
+ return -1;
+
+ *(mem_bw->bandwidths[id]) = memory_bandwidth;
+ return 0;
+}
+
+
+static int
+virResctrlAllocMemoryBandwidthFormat(virResctrlAllocPtr alloc,
+ virBufferPtr buf)
+{
+ size_t i;
+
+ if (!alloc->mem_bw)
+ return 0;
+
+ virBufferAddLit(buf, "MB:");
+
+ for (i = 0; i < alloc->mem_bw->nbandwidths; i++) {
+ if (alloc->mem_bw->bandwidths[i]) {
+ virBufferAsprintf(buf, "%zd=%u;", i,
+ *(alloc->mem_bw->bandwidths[i]));
+ }
+ }
+
+ virBufferTrim(buf, ";", 1);
+ virBufferAddChar(buf, '\n');
+ if (virBufferCheckError(buf) < 0)
+ return -1;
+ else
+ return 0;
+}
+
+
+static int
+virResctrlAllocMemoryBandwidth(virResctrlInfoPtr resctrl,
+ virResctrlAllocPtr alloc,
+ virResctrlAllocPtr free)
+{
+ size_t i;
+ virResctrlAllocMemBWPtr mem_bw_alloc = alloc->mem_bw;
+ virResctrlAllocMemBWPtr mem_bw_free = free->mem_bw;
+ virResctrlInfoMemBWPtr mem_bw_info = resctrl->membw_info;
+
+ if (!mem_bw_alloc)
+ return 0;
+
+ if (mem_bw_alloc && !mem_bw_info) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("RDT Memory Bandwidth allocation "
+ "unsupported"));
+ return -1;
+ }
+
+ for (i = 0; i < mem_bw_alloc->nbandwidths; i++) {
+ if (!mem_bw_alloc->bandwidths[i])
+ continue;
+
+ if (*(mem_bw_alloc->bandwidths[i]) % mem_bw_info->bandwidth_granularity) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Memory Bandwidth allocation of size "
+ "%u is not divisible by granularity %u"),
+ *(mem_bw_alloc->bandwidths[i]),
+ mem_bw_info->bandwidth_granularity);
+ return -1;
+ }
+ if (*(mem_bw_alloc->bandwidths[i]) < mem_bw_info->min_bandwidth) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Memory Bandwidth allocation of size "
+ "%u is smaller than the minimum "
+ "allowed allocation %u"),
+ *(mem_bw_alloc->bandwidths[i]),
+ mem_bw_info->min_bandwidth);
+ return -1;
+ }
+ if (i > mem_bw_info->max_id) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("bandwidth controller %zd not exist, "
+ "max controller id %u"),
+ i, mem_bw_info->max_id);
+ return -1;
+ }
+ if (*(mem_bw_alloc->bandwidths[i]) > *(mem_bw_free->bandwidths[i])) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Not enough room for allocation of %u%% "
+ "bandwidth on node %zd, available bandwidth
%u%%"),
+ *(mem_bw_alloc->bandwidths[i]), i,
+ *(mem_bw_free->bandwidths[i]));
+ return -1;
+ }
+ }
+ return 0;
+}
+
+
+static int
+virResctrlAllocParseProcessMemoryBandwidth(virResctrlInfoPtr resctrl,
+ virResctrlAllocPtr alloc,
+ char *mem_bw)
+{
+ unsigned int bandwidth;
+ unsigned int id;
+ char *tmp = NULL;
+
+ tmp = strchr(mem_bw, '=');
+ if (!tmp)
+ return 0;
+ *tmp = '\0';
+ tmp++;
+
+ if (virStrToLong_uip(mem_bw, NULL, 10, &id) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Invalid node id %u "), id);
+ return -1;
+ }
+ if (virStrToLong_uip(tmp, NULL, 10, &bandwidth) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Invalid bandwidth %u"), bandwidth);
+ return -1;
+ }
+ if (bandwidth < resctrl->membw_info->min_bandwidth ||
+ id > resctrl->membw_info->max_id) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Missing or inconsistent resctrl info for "
+ "memory bandwidth node '%u'"), id);
+ return -1;
+ }
+ if (alloc->mem_bw->nbandwidths <= id &&
+ VIR_EXPAND_N(alloc->mem_bw->bandwidths, alloc->mem_bw->nbandwidths,
+ id - alloc->mem_bw->nbandwidths + 1) < 0) {
+ return -1;
+ }
+ if (!alloc->mem_bw->bandwidths[id]) {
+ if (VIR_ALLOC(alloc->mem_bw->bandwidths[id]) < 0)
+ return -1;
+ }
+
+ *(alloc->mem_bw->bandwidths[id]) = bandwidth;
+ return 0;
+}
+
+
+static int
+virResctrlAllocParseMemoryBandwidthLine(virResctrlInfoPtr resctrl,
+ virResctrlAllocPtr alloc,
+ char *line)
+{
+ char **mbs = NULL;
+ char *tmp = NULL;
+ size_t nmbs = 0;
+ size_t i;
+ int ret = -1;
+
+ /* For no reason there can be spaces */
+ virSkipSpaces((const char **) &line);
+
+ if (STRNEQLEN(line, "MB", 2))
+ return 0;
+
+ if (!resctrl || !resctrl->membw_info ||
+ !resctrl->membw_info->min_bandwidth ||
+ !resctrl->membw_info->bandwidth_granularity) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Missing or inconsistent resctrl info for "
+ "memory bandwidth allocation"));
+ }
+
+ if (!alloc->mem_bw) {
+ if (VIR_ALLOC(alloc->mem_bw) < 0)
+ return -1;
+ }
+
+ tmp = strchr(line, ':');
+ if (!tmp)
+ return 0;
+ tmp++;
+
+ mbs = virStringSplitCount(tmp, ";", 0, &nmbs);
+ if (nmbs == 0)
+ return 0;
+
+ for (i = 0; i < nmbs; i++) {
+ if (virResctrlAllocParseProcessMemoryBandwidth(resctrl, alloc, mbs[i]) < 0)
+ goto cleanup;
+ }
+ ret = 0;
+ cleanup:
+ virStringListFree(mbs);
+ return ret;
+}
+
+
static int
virResctrlAllocFormatCache(virResctrlAllocPtr alloc, virBufferPtr buf)
{
@@ -1013,6 +1305,11 @@ virResctrlAllocFormat(virResctrlAllocPtr alloc)
return NULL;
}
+ if (virResctrlAllocMemoryBandwidthFormat(alloc, &buf) < 0) {
+ virBufferFreeAndReset(&buf);
+ return NULL;
+ }
+
return virBufferContentAndReset(&buf);
}
@@ -1139,6 +1436,8 @@ virResctrlAllocParse(virResctrlInfoPtr resctrl,
lines = virStringSplitCount(schemata, "\n", 0, &nlines);
for (i = 0; i < nlines; i++) {
+ if (virResctrlAllocParseMemoryBandwidthLine(resctrl, alloc, lines[i]) < 0)
+ goto cleanup;
if (virResctrlAllocParseCacheLine(resctrl, alloc, lines[i]) < 0)
goto cleanup;
}
@@ -1273,6 +1572,22 @@ virResctrlAllocNewFromInfo(virResctrlInfoPtr info)
}
}
+ /* set default free memory bandwidth to 100%*/
+ if (info->membw_info) {
+ if (VIR_ALLOC(ret->mem_bw) < 0)
+ goto error;
+
+ if (VIR_EXPAND_N(ret->mem_bw->bandwidths, ret->mem_bw->nbandwidths,
+ info->membw_info->max_id + 1) < 0)
+ goto error;
+
+ for (i = 0; i < ret->mem_bw->nbandwidths; i++) {
+ if (VIR_ALLOC(ret->mem_bw->bandwidths[i]) < 0)
+ goto error;
+ *(ret->mem_bw->bandwidths[i]) = 100;
+ }
+ }
+
cleanup:
virBitmapFree(mask);
return ret;
@@ -1284,13 +1599,14 @@ virResctrlAllocNewFromInfo(virResctrlInfoPtr info)
/*
* This function creates an allocation that represents all unused parts of all
- * caches in the system. It uses virResctrlInfo for creating a new full
- * allocation with all bits set (using virResctrlAllocNewFromInfo()) and then
- * scans for all allocations under /sys/fs/resctrl and subtracts each one of
- * them from it. That way it can then return an allocation with only bit set
- * being those that are not mentioned in any other allocation. It is used for
- * two things, a) calculating the masks when creating allocations and b) from
- * tests.
+ * caches and memory bandwidth in the system. It uses virResctrlInfo for
+ * creating a new full allocation with all bits set (using
+ * virResctrlAllocNewFromInfo()), memory bandwidth 100% and then scans
+ * for all allocations under /sys/fs/resctrl and subtracts each one of them
+ * from it. That way it can then return an allocation with only bit set
+ * being those that are not mentioned in any other allocation for CAT and
+ * available memory bandwidth for MBA. It is used for two things, a) calculating
+ * the masks and bandwidth available when creating allocations and b) from tests.
*/
virResctrlAllocPtr
virResctrlAllocGetUnused(virResctrlInfoPtr resctrl)
@@ -1336,6 +1652,7 @@ virResctrlAllocGetUnused(virResctrlInfoPtr resctrl)
goto error;
}
+ virResctrlMemoryBandwidthSubtract(ret, alloc);
virResctrlAllocSubtract(ret, alloc);
virObjectUnref(alloc);
alloc = NULL;
@@ -1526,8 +1843,8 @@ virResctrlAllocCopyMasks(virResctrlAllocPtr dst,
/*
* This function is called when creating an allocation in the system. What it
- * does is that it gets all the unused bits using virResctrlAllocGetUnused() and
- * then tries to find a proper space for every requested allocation effectively
+ * does is that it gets all the unused resources using virResctrlAllocGetUnused()
+ * and then tries to find a proper space for every requested allocation effectively
* transforming `sizes` into `masks`.
*/
static int
@@ -1547,6 +1864,9 @@ virResctrlAllocAssign(virResctrlInfoPtr resctrl,
if (!alloc_default)
goto cleanup;
+ if (virResctrlAllocMemoryBandwidth(resctrl, alloc, alloc_free) < 0)
+ goto cleanup;
+
if (virResctrlAllocCopyMasks(alloc, alloc_default) < 0)
goto cleanup;
diff --git a/src/util/virresctrl.h b/src/util/virresctrl.h
index d657c06..d43fd31 100644
--- a/src/util/virresctrl.h
+++ b/src/util/virresctrl.h
@@ -73,6 +73,10 @@ typedef int virResctrlAllocForeachCacheCallback(unsigned int level,
unsigned long long size,
void *opaque);
+typedef int virResctrlAllocForeachMemoryCallback(unsigned int id,
+ unsigned int size,
+ void *opaque);
+
virResctrlAllocPtr
virResctrlAllocNew(void);
@@ -85,6 +89,15 @@ virResctrlAllocSetCacheSize(virResctrlAllocPtr alloc,
virCacheType type,
unsigned int cache,
unsigned long long size);
+int
+virResctrlAllocForeachMemory(virResctrlAllocPtr resctrl,
+ virResctrlAllocForeachMemoryCallback cb,
+ void *opaque);
+
+int
+virResctrlSetMemoryBandwidth(virResctrlAllocPtr alloc,
+ unsigned int id,
+ unsigned int memory_bandwidth);
int
virResctrlAllocForeachCache(virResctrlAllocPtr alloc,
--
2.7.4