With this commit we finally have a way to read and manipulate basic resctrl
settings. Locking is done only on exposed functions that read/write from/to
resctrlfs. Not in functions that are exposed in virresctrlpriv.h as those are
only supposed to be used from tests.
More information about how resctrl works:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/D...
Signed-off-by: Martin Kletzander <mkletzan(a)redhat.com>
---
src/Makefile.am | 2 +-
src/libvirt_private.syms | 10 +
src/util/virresctrl.c | 1211 ++++++++++++++++++++++++++++++++++++++++++++-
src/util/virresctrl.h | 49 ++
src/util/virresctrlpriv.h | 27 +
5 files changed, 1288 insertions(+), 11 deletions(-)
create mode 100644 src/util/virresctrlpriv.h
diff --git a/src/Makefile.am b/src/Makefile.am
index 166c9a8e9199..289b03747212 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -167,7 +167,7 @@ UTIL_SOURCES = \
util/virprocess.c util/virprocess.h \
util/virqemu.c util/virqemu.h \
util/virrandom.h util/virrandom.c \
- util/virresctrl.h util/virresctrl.c \
+ util/virresctrl.h util/virresctrl.c util/virresctrlpriv.h \
util/virrotatingfile.h util/virrotatingfile.c \
util/virscsi.c util/virscsi.h \
util/virscsihost.c util/virscsihost.h \
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index d63474e0fc20..f2a2c8650d97 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -2552,6 +2552,16 @@ virRandomInt;
# util/virresctrl.h
virCacheTypeFromString;
virCacheTypeToString;
+virResctrlAllocAddPID;
+virResctrlAllocCreate;
+virResctrlAllocForeachSize;
+virResctrlAllocFormat;
+virResctrlAllocGetID;
+virResctrlAllocGetUnused;
+virResctrlAllocNew;
+virResctrlAllocRemove;
+virResctrlAllocSetID;
+virResctrlAllocSetSize;
virResctrlGetInfo;
virResctrlInfoGetCache;
virResctrlInfoNew;
diff --git a/src/util/virresctrl.c b/src/util/virresctrl.c
index f32929468c80..b5c0ea338830 100644
--- a/src/util/virresctrl.c
+++ b/src/util/virresctrl.c
@@ -18,8 +18,12 @@
#include <config.h>
-#include "virresctrl.h"
+#include <sys/file.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include "virresctrlpriv.h"
#include "c-ctype.h"
#include "count-one-bits.h"
#include "viralloc.h"
@@ -152,6 +156,195 @@ virResctrlInfoNew(void)
}
+/* Alloc-related definitions and AllocClass-related functions */
+
+/*
+ * virResctrlAlloc represents one allocation (in XML under cputune/cachetune and
+ * consequently a directory under /sys/fs/resctrl). Since it can have multiple
+ * parts of multiple caches allocated it is represented as bunch of nested
+ * sparse arrays (by sparse I mean array of pointers so that each might be NULL
+ * in case there is no allocation for that particular one (level, cache, ...)).
+ *
+ * Since one allocation can be made for caches on different levels, the first
+ * nested sparse array is of types virResctrlAllocPerLevel. For example if you
+ * have allocation for level 3 cache, there will be three NULL pointers and then
+ * allocated pointer to virResctrlAllocPerLevel. That way you can access it by
+ * `alloc[level]` as O(1) is desired instead of crawling through normal arrays
+ * or lists in three nested loops. The code uses a lot of direct accesses.
+ *
+ * Each virResctrlAllocPerLevel can have allocations for different cache
+ * allocation types. You can allocate instruction cache (VIR_CACHE_TYPE_CODE),
+ * data cache (VIR_CACHE_TYPE_DATA) or unified cache (VIR_CACHE_TYPE_BOTH).
+ * Those allocations are kept in sparse array of virResctrlAllocPerType pointers.
+ *
+ * For each virResctrlAllocPerType users can request some size of the cache to
+ * be allocated. That's what the sparse array `sizes` is for. Non-NULL
+ * pointers represent requested size allocations. The array is indexed by host
+ * cache id (gotten from `/sys/devices/system/cpu/cpuX/cache/indexY/id`). Users
+ * can see this information e.g. in the output of `virsh capabilities` (for that
+ * information there's the other struct, namely `virResctrlInfo`).
+ *
+ * When allocation is being created we need to find unused part of the cache for
+ * all of them. While doing that we store the bitmask in a sparse array of
+ * virBitmaps named `masks` indexed the same way as `sizes`. The upper bounds
+ * of the sparse arrays are stored in nmasks or nsizes, respectively.
+ */
+
+/* virResctrlAllocPerType represents */
+typedef struct _virResctrlAllocPerType virResctrlAllocPerType;
+typedef virResctrlAllocPerType *virResctrlAllocPerTypePtr;
+struct _virResctrlAllocPerType {
+ /* There could be bool saying whether this is set or not, but since everything
+ * in virResctrlAlloc (and most of libvirt) goes with pointer arrays we would
+ * have to have one more level of allocation anyway, so this stays faithful to
+ * the concept */
+ unsigned long long **sizes;
+ size_t nsizes;
+
+ /* Mask for each cache */
+ virBitmapPtr *masks;
+ size_t nmasks;
+};
+
+typedef struct _virResctrlAllocPerLevel virResctrlAllocPerLevel;
+typedef virResctrlAllocPerLevel *virResctrlAllocPerLevelPtr;
+struct _virResctrlAllocPerLevel {
+ virResctrlAllocPerTypePtr *types; /* Indexed with enum virCacheType */
+ /* There is no `ntypes` member variable as it is always allocated for
+ * VIR_CACHE_TYPE_LAST number of items */
+};
+
+struct _virResctrlAlloc {
+ virObject parent;
+
+ virResctrlAllocPerLevelPtr *levels;
+ size_t nlevels;
+
+ /* The identifier (any unique string for now) */
+ char *id;
+ /* libvirt-generated path in /sys/fs/resctrl for this particular
+ * allocation */
+ char *path;
+};
+
+static virClassPtr virResctrlAllocClass;
+
+static void
+virResctrlAllocDispose(void *obj)
+{
+ size_t i = 0;
+ size_t j = 0;
+ size_t k = 0;
+
+ virResctrlAllocPtr resctrl = obj;
+
+ for (i = 0; i < resctrl->nlevels; i++) {
+ virResctrlAllocPerLevelPtr level = resctrl->levels[i];
+
+ if (!level)
+ continue;
+
+ for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
+ virResctrlAllocPerTypePtr type = level->types[j];
+
+ if (!type)
+ continue;
+
+ for (k = 0; k < type->nsizes; k++)
+ VIR_FREE(type->sizes[k]);
+
+ for (k = 0; k < type->nmasks; k++)
+ virBitmapFree(type->masks[k]);
+
+ VIR_FREE(type->sizes);
+ VIR_FREE(type->masks);
+ VIR_FREE(type);
+ }
+ VIR_FREE(level->types);
+ VIR_FREE(level);
+ }
+
+ VIR_FREE(resctrl->id);
+ VIR_FREE(resctrl->path);
+ VIR_FREE(resctrl->levels);
+}
+
+
+static int
+virResctrlAllocOnceInit(void)
+{
+ if (!(virResctrlAllocClass = virClassNew(virClassForObject(),
+ "virResctrlAlloc",
+ sizeof(virResctrlAlloc),
+ virResctrlAllocDispose)))
+ return -1;
+
+ return 0;
+}
+
+
+VIR_ONCE_GLOBAL_INIT(virResctrlAlloc)
+
+
+virResctrlAllocPtr
+virResctrlAllocNew(void)
+{
+ if (virResctrlAllocInitialize() < 0)
+ return NULL;
+
+ return virObjectNew(virResctrlAllocClass);
+}
+
+
+/* Common functions */
+static int
+virResctrlLockInternal(int op)
+{
+ int fd = open(SYSFS_RESCTRL_PATH, O_DIRECTORY | O_CLOEXEC);
+
+ if (fd < 0) {
+ virReportSystemError(errno, "%s", _("Cannot open
resctrlfs"));
+ return -1;
+ }
+
+ if (flock(fd, op) < 0) {
+ virReportSystemError(errno, "%s", _("Cannot lock
resctrlfs"));
+ VIR_FORCE_CLOSE(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+
+static inline int
+virResctrlLockWrite(void)
+{
+ return virResctrlLockInternal(LOCK_EX);
+}
+
+
+static int
+virResctrlUnlock(int fd)
+{
+ if (fd == -1)
+ return 0;
+
+ /* The lock gets unlocked by closing the fd, which we need to do anyway in
+ * order to clean up properly */
+ if (VIR_CLOSE(fd) < 0) {
+ virReportSystemError(errno, "%s", _("Cannot close
resctrlfs"));
+
+ /* Trying to save the already broken */
+ if (flock(fd, LOCK_UN) < 0)
+ virReportSystemError(errno, "%s", _("Cannot unlock
resctrlfs"));
+ return -1;
+ }
+
+ return 0;
+}
+
+
/* Info-related functions */
bool
virResctrlInfoIsEmpty(virResctrlInfoPtr resctrl)
@@ -200,6 +393,8 @@ virResctrlGetInfo(virResctrlInfoPtr resctrl)
}
while ((rv = virDirRead(dirp, &ent, SYSFS_RESCTRL_PATH "/info")) >
0) {
+ VIR_DEBUG("Parsing info type '%s'", ent->d_name);
+
if (ent->d_type != DT_DIR)
continue;
@@ -207,16 +402,14 @@ virResctrlGetInfo(virResctrlInfoPtr resctrl)
continue;
if (virStrToLong_uip(ent->d_name + 1, &endptr, 10, &level) < 0) {
- virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
- _("Cannot parse resctrl cache info level"));
+ VIR_DEBUG("Cannot parse resctrl cache info level '%s'",
ent->d_name + 1);
goto cleanup;
}
type = virResctrlTypeFromString(endptr);
if (type < 0) {
- virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
- _("Cannot parse resctrl cache info type"));
- goto cleanup;
+ VIR_DEBUG("Cannot parse resctrl cache info type '%s'",
endptr);
+ continue;
}
if (VIR_ALLOC(i_type) < 0)
@@ -259,10 +452,19 @@ virResctrlGetInfo(virResctrlInfoPtr resctrl)
level - resctrl->nlevels + 1) < 0)
goto cleanup;
- if (!resctrl->levels[level] &&
- (VIR_ALLOC(resctrl->levels[level]) < 0 ||
- VIR_ALLOC_N(resctrl->levels[level]->types, VIR_CACHE_TYPE_LAST) <
0))
- goto cleanup;
+ if (!resctrl->levels[level]) {
+ virResctrlInfoPerTypePtr *types = NULL;
+
+ if (VIR_ALLOC_N(types, VIR_CACHE_TYPE_LAST) < 0)
+ goto cleanup;
+
+ if (VIR_ALLOC(resctrl->levels[level]) < 0) {
+ VIR_FREE(types);
+ goto cleanup;
+ }
+ resctrl->levels[level]->types = types;
+ }
+
i_level = resctrl->levels[level];
if (i_level->types[type]) {
@@ -358,3 +560,992 @@ virResctrlInfoGetCache(virResctrlInfoPtr resctrl,
VIR_FREE(*controls);
goto cleanup;
}
+
+
+/* Alloc-related functions */
+bool
+virResctrlAllocIsEmpty(virResctrlAllocPtr resctrl)
+{
+ size_t i = 0;
+ size_t j = 0;
+ size_t k = 0;
+
+ if (!resctrl)
+ return true;
+
+ for (i = 0; i < resctrl->nlevels; i++) {
+ virResctrlAllocPerLevelPtr a_level = resctrl->levels[i];
+
+ if (!a_level)
+ continue;
+
+ for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
+ virResctrlAllocPerTypePtr a_type = a_level->types[j];
+
+ if (!a_type)
+ continue;
+
+ for (k = 0; k < a_type->nsizes; k++) {
+ if (a_type->sizes[k])
+ return false;
+ }
+
+ for (k = 0; k < a_type->nmasks; k++) {
+ if (a_type->masks[k])
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+
+static virResctrlAllocPerTypePtr
+virResctrlAllocGetType(virResctrlAllocPtr resctrl,
+ unsigned int level,
+ virCacheType type)
+{
+ virResctrlAllocPerLevelPtr a_level = NULL;
+
+ if (resctrl->nlevels <= level &&
+ VIR_EXPAND_N(resctrl->levels, resctrl->nlevels, level - resctrl->nlevels
+ 1) < 0)
+ return NULL;
+
+ if (!resctrl->levels[level]) {
+ virResctrlAllocPerTypePtr *types = NULL;
+
+ if (VIR_ALLOC_N(types, VIR_CACHE_TYPE_LAST) < 0)
+ return NULL;
+
+ if (VIR_ALLOC(resctrl->levels[level]) < 0) {
+ VIR_FREE(types);
+ return NULL;
+ }
+ resctrl->levels[level]->types = types;
+ }
+
+ a_level = resctrl->levels[level];
+
+ if (!a_level->types[type] && VIR_ALLOC(a_level->types[type]) < 0)
+ return NULL;
+
+ return a_level->types[type];
+}
+
+
+static int
+virResctrlAllocUpdateMask(virResctrlAllocPtr resctrl,
+ unsigned int level,
+ virCacheType type,
+ unsigned int cache,
+ virBitmapPtr mask)
+{
+ virResctrlAllocPerTypePtr a_type = virResctrlAllocGetType(resctrl, level, type);
+
+ if (!a_type)
+ return -1;
+
+ if (a_type->nmasks <= cache &&
+ VIR_EXPAND_N(a_type->masks, a_type->nmasks,
+ cache - a_type->nmasks + 1) < 0)
+ return -1;
+
+ if (!a_type->masks[cache]) {
+ a_type->masks[cache] = virBitmapNew(virBitmapSize(mask));
+
+ if (!a_type->masks[cache])
+ return -1;
+ }
+
+ return virBitmapCopy(a_type->masks[cache], mask);
+}
+
+
+static int
+virResctrlAllocUpdateSize(virResctrlAllocPtr resctrl,
+ unsigned int level,
+ virCacheType type,
+ unsigned int cache,
+ unsigned long long size)
+{
+ virResctrlAllocPerTypePtr a_type = virResctrlAllocGetType(resctrl, level, type);
+
+ if (!a_type)
+ return -1;
+
+ if (a_type->nsizes <= cache &&
+ VIR_EXPAND_N(a_type->sizes, a_type->nsizes,
+ cache - a_type->nsizes + 1) < 0)
+ return -1;
+
+ if (!a_type->sizes[cache] && VIR_ALLOC(a_type->sizes[cache]) < 0)
+ return -1;
+
+ *(a_type->sizes[cache]) = size;
+
+ return 0;
+}
+
+
+/*
+ * Check if there is an allocation for this level/type/cache already. Called
+ * before updating the structure. VIR_CACHE_TYPE_BOTH collides with any type,
+ * the other types collide with itself. This code basically checks if either:
+ * `alloc[level]->types[type]->sizes[cache]`
+ * or
+ * `alloc[level]->types[VIR_CACHE_TYPE_BOTH]->sizes[cache]`
+ * is non-NULL. All the fuzz around it is checking for NULL pointers along
+ * the way.
+ */
+static bool
+virResctrlAllocCheckCollision(virResctrlAllocPtr alloc,
+ unsigned int level,
+ virCacheType type,
+ unsigned int cache)
+{
+ virResctrlAllocPerLevelPtr a_level = NULL;
+ virResctrlAllocPerTypePtr a_type = NULL;
+
+ if (!alloc)
+ return false;
+
+ if (alloc->nlevels <= level)
+ return false;
+
+ a_level = alloc->levels[level];
+
+ if (!a_level)
+ return false;
+
+ a_type = a_level->types[VIR_CACHE_TYPE_BOTH];
+
+ /* If there is an allocation for type 'both', there can be no other
+ * allocation for the same cache */
+ if (a_type && a_type->nsizes > cache &&
a_type->sizes[cache])
+ return true;
+
+ if (type == VIR_CACHE_TYPE_BOTH) {
+ a_type = a_level->types[VIR_CACHE_TYPE_CODE];
+
+ if (a_type && a_type->nsizes > cache &&
a_type->sizes[cache])
+ return true;
+
+ a_type = a_level->types[VIR_CACHE_TYPE_DATA];
+
+ if (a_type && a_type->nsizes > cache &&
a_type->sizes[cache])
+ return true;
+ } else {
+ a_type = a_level->types[type];
+
+ if (a_type && a_type->nsizes > cache &&
a_type->sizes[cache])
+ return true;
+ }
+
+ return false;
+}
+
+
+int
+virResctrlAllocSetSize(virResctrlAllocPtr resctrl,
+ unsigned int level,
+ virCacheType type,
+ unsigned int cache,
+ unsigned long long size)
+{
+ if (virResctrlAllocCheckCollision(resctrl, level, type, cache)) {
+ virReportError(VIR_ERR_XML_ERROR,
+ _("Colliding cache allocations for cache "
+ "level '%u' id '%u', type
'%s'"),
+ level, cache, virCacheTypeToString(type));
+ return -1;
+ }
+
+ return virResctrlAllocUpdateSize(resctrl, level, type, cache, size);
+}
+
+
+int
+virResctrlAllocForeachSize(virResctrlAllocPtr resctrl,
+ virResctrlAllocForeachSizeCallback cb,
+ void *opaque)
+{
+ int ret = 0;
+ unsigned int level = 0;
+ unsigned int type = 0;
+ unsigned int cache = 0;
+
+ if (!resctrl)
+ return 0;
+
+ for (level = 0; level < resctrl->nlevels; level++) {
+ virResctrlAllocPerLevelPtr a_level = resctrl->levels[level];
+
+ if (!a_level)
+ continue;
+
+ for (type = 0; type < VIR_CACHE_TYPE_LAST; type++) {
+ virResctrlAllocPerTypePtr a_type = a_level->types[type];
+
+ if (!a_type)
+ continue;
+
+ for (cache = 0; cache < a_type->nsizes; cache++) {
+ unsigned long long *size = a_type->sizes[cache];
+
+ if (!size)
+ continue;
+
+ ret = cb(level, type, cache, *size, opaque);
+ if (ret < 0)
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+}
+
+
+int
+virResctrlAllocSetID(virResctrlAllocPtr alloc,
+ const char *id)
+{
+ if (!id) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Resctrl allocation 'id' cannot be NULL"));
+ return -1;
+ }
+
+ return VIR_STRDUP(alloc->id, id);
+}
+
+
+const char *
+virResctrlAllocGetID(virResctrlAllocPtr alloc)
+{
+ return alloc->id;
+}
+
+
+char *
+virResctrlAllocFormat(virResctrlAllocPtr resctrl)
+{
+ virBuffer buf = VIR_BUFFER_INITIALIZER;
+ unsigned int level = 0;
+ unsigned int type = 0;
+ unsigned int cache = 0;
+
+ if (!resctrl)
+ return NULL;
+
+ for (level = 0; level < resctrl->nlevels; level++) {
+ virResctrlAllocPerLevelPtr a_level = resctrl->levels[level];
+
+ if (!a_level)
+ continue;
+
+ for (type = 0; type < VIR_CACHE_TYPE_LAST; type++) {
+ virResctrlAllocPerTypePtr a_type = a_level->types[type];
+
+ if (!a_type)
+ continue;
+
+ virBufferAsprintf(&buf, "L%u%s:", level,
virResctrlTypeToString(type));
+
+ for (cache = 0; cache < a_type->nmasks; cache++) {
+ virBitmapPtr mask = a_type->masks[cache];
+ char *mask_str = NULL;
+
+ if (!mask)
+ continue;
+
+ mask_str = virBitmapToString(mask, false, true);
+ if (!mask_str) {
+ virBufferFreeAndReset(&buf);
+ return NULL;
+ }
+
+ virBufferAsprintf(&buf, "%u=%s;", cache, mask_str);
+ VIR_FREE(mask_str);
+ }
+
+ virBufferTrim(&buf, ";", 1);
+ virBufferAddChar(&buf, '\n');
+ }
+ }
+
+ virBufferCheckError(&buf);
+ return virBufferContentAndReset(&buf);
+}
+
+
+static int
+virResctrlAllocParseProcessCache(virResctrlInfoPtr resctrl,
+ virResctrlAllocPtr alloc,
+ unsigned int level,
+ virCacheType type,
+ char *cache)
+{
+ char *tmp = strchr(cache, '=');
+ unsigned int cache_id = 0;
+ virBitmapPtr mask = NULL;
+ int ret = -1;
+
+ if (!tmp)
+ return 0;
+
+ *tmp = '\0';
+ tmp++;
+
+ if (virStrToLong_uip(cache, NULL, 10, &cache_id) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Invalid cache id '%s'"), cache);
+ return -1;
+ }
+
+ mask = virBitmapNewString(tmp);
+ if (!mask)
+ return -1;
+
+ virBitmapShrink(mask, resctrl->levels[level]->types[type]->bits);
+
+ /* Guess what! All bits set mean there is no allocation and it uses only
+ * unallocated parts of the cache. */
+ if (!virBitmapIsAllSet(mask) &&
+ virResctrlAllocUpdateMask(alloc, level, type, cache_id, mask) < 0)
+ goto cleanup;
+
+ ret = 0;
+ cleanup:
+ virBitmapFree(mask);
+ return ret;
+}
+
+
+static int
+virResctrlAllocParseProcessLine(virResctrlInfoPtr resctrl,
+ virResctrlAllocPtr alloc,
+ char *line)
+{
+ char **caches = NULL;
+ char *tmp = NULL;
+ unsigned int level = 0;
+ int type = -1;
+ size_t ncaches = 0;
+ size_t i = 0;
+ int ret = -1;
+
+ /* For no reason there can be spaces */
+ virSkipSpaces((const char **) &line);
+
+ /* Skip lines that don't concern caches, e.g. MB: etc. */
+ if (line[0] != 'L')
+ return 0;
+
+ /* And lines that we can't parse too */
+ tmp = strchr(line, ':');
+ if (!tmp)
+ return 0;
+
+ *tmp = '\0';
+ tmp++;
+
+ if (virStrToLong_uip(line + 1, &line, 10, &level) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Cannot parse resctrl schema level '%s'"),
+ line + 1);
+ return -1;
+ }
+
+ type = virResctrlTypeFromString(line);
+ if (type < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Cannot parse resctrl schema level '%s'"),
+ line + 1);
+ return -1;
+ }
+
+ caches = virStringSplitCount(tmp, ";", 0, &ncaches);
+ if (!caches)
+ return 0;
+
+ for (i = 0; i < ncaches; i++) {
+ if (virResctrlAllocParseProcessCache(resctrl, alloc, level, type, caches[i]) <
0)
+ goto cleanup;
+ }
+
+ ret = 0;
+ cleanup:
+ virStringListFree(caches);
+ return ret;
+}
+
+
+static int
+virResctrlAllocParse(virResctrlInfoPtr resctrl,
+ virResctrlAllocPtr alloc,
+ const char *schemata)
+{
+ char **lines = NULL;
+ size_t nlines = 0;
+ size_t i = 0;
+ int ret = -1;
+
+ lines = virStringSplitCount(schemata, "\n", 0, &nlines);
+ for (i = 0; i < nlines; i++) {
+ if (virResctrlAllocParseProcessLine(resctrl, alloc, lines[i]) < 0)
+ goto cleanup;
+ }
+
+ ret = 0;
+ cleanup:
+ virStringListFree(lines);
+ return ret;
+}
+
+
+static void
+virResctrlAllocSubtractPerType(virResctrlAllocPerTypePtr dst,
+ virResctrlAllocPerTypePtr src)
+{
+ size_t i = 0;
+
+ if (!dst || !src)
+ return;
+
+ for (i = 0; i < dst->nmasks && i < src->nmasks; i++) {
+ if (dst->masks[i] && src->masks[i])
+ virBitmapSubtract(dst->masks[i], src->masks[i]);
+ }
+}
+
+
+static void
+virResctrlAllocSubtract(virResctrlAllocPtr dst,
+ virResctrlAllocPtr src)
+{
+ size_t i = 0;
+ size_t j = 0;
+
+ if (!src)
+ return;
+
+ for (i = 0; i < dst->nlevels && i < src->nlevels; i++) {
+ if (dst->levels[i] && src->levels[i]) {
+ for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
+ virResctrlAllocSubtractPerType(dst->levels[i]->types[j],
+ src->levels[i]->types[j]);
+ }
+ }
+ }
+}
+
+
+static virResctrlAllocPtr
+virResctrlAllocNewFromInfo(virResctrlInfoPtr info)
+{
+ size_t i = 0;
+ size_t j = 0;
+ size_t k = 0;
+ virResctrlAllocPtr ret = virResctrlAllocNew();
+ virBitmapPtr mask = NULL;
+
+ if (!ret)
+ return NULL;
+
+ for (i = 0; i < info->nlevels; i++) {
+ virResctrlInfoPerLevelPtr i_level = info->levels[i];
+
+ if (!i_level)
+ continue;
+
+ for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
+ virResctrlInfoPerTypePtr i_type = i_level->types[j];
+
+ if (!i_type)
+ continue;
+
+ virBitmapFree(mask);
+ mask = virBitmapNew(i_type->bits);
+ if (!mask)
+ goto error;
+ virBitmapSetAll(mask);
+
+ for (k = 0; k <= i_type->max_cache_id; k++) {
+ if (virResctrlAllocUpdateMask(ret, i, j, k, mask) < 0)
+ goto error;
+ }
+ }
+ }
+
+ cleanup:
+ virBitmapFree(mask);
+ return ret;
+ error:
+ virObjectUnref(ret);
+ ret = NULL;
+ goto cleanup;
+}
+
+/*
+ * This function creates an allocation that represents all unused parts of all
+ * caches in the system. It uses virResctrlInfo for creating a new full
+ * allocation with all bits set (using virResctrlAllocNewFromInfo()) and then
+ * scans for all allocations under /sys/fs/resctrl and subtracts each one of
+ * them from it. That way it can then return an allocation with only bit set
+ * being those that are not mentioned in any other allocation. It is used for
+ * two things, a) calculating the masks when creating allocations and b) from
+ * tests.
+ */
+virResctrlAllocPtr
+virResctrlAllocGetUnused(virResctrlInfoPtr resctrl)
+{
+ virResctrlAllocPtr ret = NULL;
+ virResctrlAllocPtr alloc = NULL;
+ struct dirent *ent = NULL;
+ DIR *dirp = NULL;
+ char *schemata = NULL;
+ int rv = -1;
+
+ if (virResctrlInfoIsEmpty(resctrl)) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("Resource control is not supported on this host"));
+ return NULL;
+ }
+
+ ret = virResctrlAllocNewFromInfo(resctrl);
+ if (!ret)
+ return NULL;
+
+ if (virFileReadValueString(&schemata,
+ SYSFS_RESCTRL_PATH
+ "/schemata") < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Could not read schemata file for the default
group"));
+ goto error;
+ }
+
+ alloc = virResctrlAllocNew();
+ if (!alloc)
+ goto error;
+
+ if (virResctrlAllocParse(resctrl, alloc, schemata) < 0)
+ goto error;
+ if (!virResctrlAllocIsEmpty(alloc))
+ virResctrlAllocSubtract(ret, alloc);
+
+ if (virDirOpen(&dirp, SYSFS_RESCTRL_PATH) < 0)
+ goto error;
+
+ while ((rv = virDirRead(dirp, &ent, SYSFS_RESCTRL_PATH)) > 0) {
+ if (ent->d_type != DT_DIR)
+ continue;
+
+ if (STREQ(ent->d_name, "info"))
+ continue;
+
+ VIR_FREE(schemata);
+ rv = virFileReadValueString(&schemata,
+ SYSFS_RESCTRL_PATH
+ "/%s/schemata",
+ ent->d_name);
+ if (rv == -2)
+ continue;
+
+ if (rv < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Could not read schemata file for group %s"),
+ ent->d_name);
+ goto error;
+ }
+
+ virObjectUnref(alloc);
+ alloc = virResctrlAllocNew();
+ if (!alloc)
+ goto error;
+
+ if (virResctrlAllocParse(resctrl, alloc, schemata) < 0)
+ goto error;
+
+ virResctrlAllocSubtract(ret, alloc);
+ }
+ if (rv < 0)
+ goto error;
+
+ cleanup:
+ virObjectUnref(alloc);
+ VIR_DIR_CLOSE(dirp);
+ VIR_FREE(schemata);
+ return ret;
+
+ error:
+ virObjectUnref(ret);
+ ret = NULL;
+ goto cleanup;
+}
+
+
+
+static int
+virResctrlAllocSetMask(virResctrlAllocPerTypePtr a_type,
+ unsigned int cache,
+ virBitmapPtr mask)
+{
+ if (a_type->nmasks <= cache &&
+ VIR_EXPAND_N(a_type->masks, a_type->nmasks,
+ cache - a_type->nmasks + 1) < 0)
+ return -1;
+
+ a_type->masks[cache] = mask;
+
+ return 0;
+}
+
+
+/*
+ * Given the information about requested allocation type `a_type`, the host
+ * cache for a particular type `i_type` and unused bits in the system `f_type`
+ * this function tries to find the smallest free space in which the allocation
+ * for cache id `cache` would fit. We're looking for the smallest place in
+ * order to minimize fragmentation and maximize the possibility of succeeding.
+ */
+static int
+virResctrlAllocFindUnused(virResctrlAllocPerTypePtr a_type,
+ virResctrlInfoPerTypePtr i_type,
+ virResctrlAllocPerTypePtr f_type,
+ unsigned int level,
+ unsigned int type,
+ unsigned int cache)
+{
+ unsigned long long *size = a_type->sizes[cache];
+ virBitmapPtr a_mask = NULL;
+ virBitmapPtr f_mask = NULL;
+ unsigned long long granularity;
+ unsigned long long need_bits;
+ size_t i = 0;
+ ssize_t pos = -1;
+ ssize_t last_bits = 0;
+ ssize_t last_pos = -1;
+
+ /* If there is no reservation requested we need to set all bits. That's due
+ * to weird interface of the resctrl sysfs. It's also the reason why we
+ * cannot reserve the whole cache in one allocation. */
+ if (!size) {
+ a_mask = virBitmapNew(i_type->bits);
+ if (!a_mask)
+ return -1;
+
+ virBitmapSetAll(a_mask);
+
+ if (virResctrlAllocSetMask(a_type, cache, a_mask) < 0) {
+ virBitmapFree(a_mask);
+ return -1;
+ }
+
+ return 0;
+ }
+
+ if (cache >= f_type->nmasks) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Cache with id %u does not exists for level %d"),
+ cache, level);
+ return -1;
+ }
+
+ f_mask = f_type->masks[cache];
+ if (!f_mask) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Cache level %d id %u does not support tuning for "
+ "scope type '%s'"),
+ level, cache, virCacheTypeToString(type));
+ return -1;
+ }
+
+ if (*size == i_type->size) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Cache allocation for the whole cache is not "
+ "possible, specify size smaller than %llu"),
+ i_type->size);
+ return -1;
+ }
+
+ granularity = i_type->size / i_type->bits;
+ need_bits = *size / granularity;
+
+ if (*size % granularity) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Cache allocation of size %llu is not "
+ "divisible by granularity %llu"),
+ *size, granularity);
+ return -1;
+ }
+
+ if (need_bits < i_type->min_cbm_bits) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Cache allocation of size %llu is smaller "
+ "than the minimum allowed allocation %llu"),
+ *size, granularity * i_type->min_cbm_bits);
+ return -1;
+ }
+
+ while ((pos = virBitmapNextSetBit(f_mask, pos)) >= 0) {
+ ssize_t pos_clear = virBitmapNextClearBit(f_mask, pos);
+ ssize_t bits;
+
+ if (pos_clear < 0)
+ pos_clear = virBitmapSize(f_mask);
+
+ bits = pos_clear - pos;
+
+ /* Not enough bits, move on and skip all of them */
+ if (bits < need_bits) {
+ pos = pos_clear;
+ continue;
+ }
+
+ /* This fits perfectly */
+ if (bits == need_bits) {
+ last_pos = pos;
+ break;
+ }
+
+ /* Remember the smaller region if we already found on before */
+ if (last_pos < 0 || (last_bits && bits < last_bits)) {
+ last_bits = bits;
+ last_pos = pos;
+ }
+
+ pos = pos_clear;
+ }
+
+ if (last_pos < 0) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Not enough room for allocation of "
+ "%llu bytes for level %u cache %u "
+ "scope type '%s'"),
+ *size, level, cache,
+ virCacheTypeToString(type));
+ return -1;
+ }
+
+ a_mask = virBitmapNew(i_type->bits);
+ if (!a_mask)
+ return -1;
+
+ for (i = last_pos; i < last_pos + need_bits; i++) {
+ ignore_value(virBitmapSetBit(a_mask, i));
+ ignore_value(virBitmapClearBit(f_mask, i));
+ }
+
+ if (virResctrlAllocSetMask(a_type, cache, a_mask) < 0) {
+ virBitmapFree(a_mask);
+ return -1;
+ }
+
+ return 0;
+}
+
+
+/*
+ * This function is called when creating an allocation in the system. What it
+ * does is that it gets all the unused bits using virResctrlAllocGetUnused() and
+ * then tries to find a proper space for every requested allocation effectively
+ * transforming `sizes` into `masks`.
+ */
+static int
+virResctrlAllocMasksAssign(virResctrlInfoPtr resctrl,
+ virResctrlAllocPtr alloc)
+{
+ int ret = -1;
+ unsigned int level = 0;
+ virResctrlAllocPtr alloc_free = NULL;
+
+ alloc_free = virResctrlAllocGetUnused(resctrl);
+ if (!alloc_free)
+ return -1;
+
+ for (level = 0; level < alloc->nlevels; level++) {
+ virResctrlAllocPerLevelPtr a_level = alloc->levels[level];
+ virResctrlAllocPerLevelPtr f_level = NULL;
+ unsigned int type = 0;
+
+ if (!a_level)
+ continue;
+
+ if (level < alloc_free->nlevels)
+ f_level = alloc_free->levels[level];
+
+ if (!f_level) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Cache level %d does not support tuning"),
+ level);
+ goto cleanup;
+ }
+
+ for (type = 0; type < VIR_CACHE_TYPE_LAST; type++) {
+ virResctrlAllocPerTypePtr a_type = a_level->types[type];
+ virResctrlAllocPerTypePtr f_type = f_level->types[type];
+ unsigned int cache = 0;
+
+ if (!a_type)
+ continue;
+
+ if (!f_type) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Cache level %d does not support tuning for "
+ "scope type '%s'"),
+ level, virCacheTypeToString(type));
+ goto cleanup;
+ }
+
+ for (cache = 0; cache < a_type->nsizes; cache++) {
+ virResctrlInfoPerLevelPtr i_level = resctrl->levels[level];
+ virResctrlInfoPerTypePtr i_type = i_level->types[type];
+
+ if (virResctrlAllocFindUnused(a_type, i_type, f_type, level, type, cache)
< 0)
+ goto cleanup;
+ }
+ }
+ }
+
+ ret = 0;
+ cleanup:
+ virObjectUnref(alloc_free);
+ return ret;
+}
+
+
+/* This checks if the directory for the alloc exists. If not it tries to create
+ * it and apply appropriate alloc settings. */
+int
+virResctrlAllocCreate(virResctrlInfoPtr resctrl,
+ virResctrlAllocPtr alloc,
+ const char *machinename)
+{
+ char *schemata_path = NULL;
+ char *alloc_str = NULL;
+ int ret = -1;
+ int lockfd = -1;
+
+ if (!alloc)
+ return 0;
+
+ if (!resctrl) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("Resource control is not supported on this host"));
+ return -1;
+ }
+
+ if (!alloc->id) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Resctrl Allocation ID must be set before
creation"));
+ return -1;
+ }
+
+ if (!alloc->path &&
+ virAsprintf(&alloc->path, "%s/%s-%s",
+ SYSFS_RESCTRL_PATH, machinename, alloc->id) < 0)
+ return -1;
+
+ /* Check if this allocation was already created */
+ if (virFileIsDir(alloc->path))
+ return 0;
+
+ if (virFileExists(alloc->path)) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Path '%s' for resctrl allocation exists and is not
a "
+ "directory"), alloc->path);
+ goto cleanup;
+ }
+
+ lockfd = virResctrlLockWrite();
+ if (lockfd < 0)
+ goto cleanup;
+
+ if (virResctrlAllocMasksAssign(resctrl, alloc) < 0)
+ goto cleanup;
+
+ alloc_str = virResctrlAllocFormat(alloc);
+ if (!alloc_str)
+ goto cleanup;
+
+ if (virAsprintf(&schemata_path, "%s/schemata", alloc->path) < 0)
+ goto cleanup;
+
+ if (virFileMakePath(alloc->path) < 0) {
+ virReportSystemError(errno,
+ _("Cannot create resctrl directory
'%s'"),
+ alloc->path);
+ goto cleanup;
+ }
+
+ VIR_DEBUG("Writing resctrl schemata '%s' into '%s'",
alloc_str, schemata_path);
+ if (virFileWriteStr(schemata_path, alloc_str, 0) < 0) {
+ rmdir(alloc->path);
+ virReportSystemError(errno,
+ _("Cannot write into schemata file
'%s'"),
+ schemata_path);
+ goto cleanup;
+ }
+
+ ret = 0;
+ cleanup:
+ virResctrlUnlock(lockfd);
+ VIR_FREE(alloc_str);
+ VIR_FREE(schemata_path);
+ return ret;
+}
+
+
+int
+virResctrlAllocAddPID(virResctrlAllocPtr alloc,
+ pid_t pid)
+{
+ char *tasks = NULL;
+ char *pidstr = NULL;
+ int ret = 0;
+
+ if (!alloc->path) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Cannot add pid to non-existing resctrl
allocation"));
+ return -1;
+ }
+
+ if (virAsprintf(&tasks, "%s/tasks", alloc->path) < 0)
+ return -1;
+
+ if (virAsprintf(&pidstr, "%lld", (long long int) pid) < 0)
+ goto cleanup;
+
+ if (virFileWriteStr(tasks, pidstr, 0) < 0) {
+ virReportSystemError(errno,
+ _("Cannot write pid in tasks file '%s'"),
+ tasks);
+ goto cleanup;
+ }
+
+ ret = 0;
+ cleanup:
+ VIR_FREE(tasks);
+ VIR_FREE(pidstr);
+ return ret;
+}
+
+
+int
+virResctrlAllocRemove(virResctrlAllocPtr alloc)
+{
+ int ret = 0;
+
+ if (!alloc->path)
+ return 0;
+
+ VIR_DEBUG("Removing resctrl allocation %s", alloc->path);
+ if (rmdir(alloc->path) != 0 && errno != ENOENT) {
+ ret = -errno;
+ VIR_ERROR(_("Unable to remove %s (%d)"), alloc->path, errno);
+ }
+
+ return ret;
+}
diff --git a/src/util/virresctrl.h b/src/util/virresctrl.h
index 471c02f28dcd..27793c86a745 100644
--- a/src/util/virresctrl.h
+++ b/src/util/virresctrl.h
@@ -68,4 +68,53 @@ virResctrlInfoGetCache(virResctrlInfoPtr resctrl,
size_t *ncontrols,
virResctrlInfoPerCachePtr **controls);
+/* Alloc-related things */
+typedef struct _virResctrlAlloc virResctrlAlloc;
+typedef virResctrlAlloc *virResctrlAllocPtr;
+
+typedef int virResctrlAllocForeachSizeCallback(unsigned int level,
+ virCacheType type,
+ unsigned int cache,
+ unsigned long long size,
+ void *opaque);
+
+virResctrlAllocPtr
+virResctrlAllocNew(void);
+
+bool
+virResctrlAllocIsEmpty(virResctrlAllocPtr resctrl);
+
+int
+virResctrlAllocSetSize(virResctrlAllocPtr resctrl,
+ unsigned int level,
+ virCacheType type,
+ unsigned int cache,
+ unsigned long long size);
+
+int
+virResctrlAllocForeachSize(virResctrlAllocPtr resctrl,
+ virResctrlAllocForeachSizeCallback cb,
+ void *opaque);
+
+int
+virResctrlAllocSetID(virResctrlAllocPtr alloc,
+ const char *id);
+const char *
+virResctrlAllocGetID(virResctrlAllocPtr alloc);
+
+char *
+virResctrlAllocFormat(virResctrlAllocPtr alloc);
+
+int
+virResctrlAllocCreate(virResctrlInfoPtr r_info,
+ virResctrlAllocPtr alloc,
+ const char *machinename);
+
+int
+virResctrlAllocAddPID(virResctrlAllocPtr alloc,
+ pid_t pid);
+
+int
+virResctrlAllocRemove(virResctrlAllocPtr alloc);
+
#endif /* __VIR_RESCTRL_H__ */
diff --git a/src/util/virresctrlpriv.h b/src/util/virresctrlpriv.h
new file mode 100644
index 000000000000..72b115afd13e
--- /dev/null
+++ b/src/util/virresctrlpriv.h
@@ -0,0 +1,27 @@
+/*
+ * virresctrlpriv.h:
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see
+ * <
http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __VIR_RESCTRL_PRIV_H__
+# define __VIR_RESCTRL_PRIV_H__
+
+# include "virresctrl.h"
+
+virResctrlAllocPtr
+virResctrlAllocGetUnused(virResctrlInfoPtr resctrl);
+
+#endif /* __VIR_RESCTRL_PRIV_H__ */
--
2.16.1