On Wed, Jan 24, 2018 at 02:51:46PM +0100, Pavel Hrdina wrote:
On Tue, Jan 23, 2018 at 07:05:14PM +0100, Martin Kletzander wrote:
> With this commit we finally have a way to read and manipulate basic resctrl
> settings. Locking is done only on exposed functions that read/write from/to
> resctrlfs. Not in functions that are exposed in virresctrlpriv.h as those are
> only supposed to be used from tests.
>
> More information about how resctrl works:
>
>
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/D...
>
> Signed-off-by: Martin Kletzander <mkletzan(a)redhat.com>
> ---
> src/Makefile.am | 2 +-
> src/libvirt_private.syms | 10 +
> src/util/virresctrl.c | 1211 ++++++++++++++++++++++++++++++++++++++++++++-
> src/util/virresctrl.h | 49 ++
> src/util/virresctrlpriv.h | 27 +
> 5 files changed, 1288 insertions(+), 11 deletions(-)
> create mode 100644 src/util/virresctrlpriv.h
>
> diff --git a/src/Makefile.am b/src/Makefile.am
> index 166c9a8e9199..289b03747212 100644
> --- a/src/Makefile.am
> +++ b/src/Makefile.am
> @@ -167,7 +167,7 @@ UTIL_SOURCES = \
> util/virprocess.c util/virprocess.h \
> util/virqemu.c util/virqemu.h \
> util/virrandom.h util/virrandom.c \
> - util/virresctrl.h util/virresctrl.c \
> + util/virresctrl.h util/virresctrl.c util/virresctrlpriv.h \
> util/virrotatingfile.h util/virrotatingfile.c \
> util/virscsi.c util/virscsi.h \
> util/virscsihost.c util/virscsihost.h \
> diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
> index d63474e0fc20..f2a2c8650d97 100644
> --- a/src/libvirt_private.syms
> +++ b/src/libvirt_private.syms
> @@ -2552,6 +2552,16 @@ virRandomInt;
> # util/virresctrl.h
> virCacheTypeFromString;
> virCacheTypeToString;
> +virResctrlAllocAddPID;
> +virResctrlAllocCreate;
> +virResctrlAllocForeachSize;
> +virResctrlAllocFormat;
> +virResctrlAllocGetID;
> +virResctrlAllocGetUnused;
> +virResctrlAllocNew;
> +virResctrlAllocRemove;
> +virResctrlAllocSetID;
> +virResctrlAllocSetSize;
> virResctrlGetInfo;
> virResctrlInfoGetCache;
> virResctrlInfoNew;
> diff --git a/src/util/virresctrl.c b/src/util/virresctrl.c
> index f32929468c80..b5c0ea338830 100644
> --- a/src/util/virresctrl.c
> +++ b/src/util/virresctrl.c
> @@ -18,8 +18,12 @@
>
> #include <config.h>
>
> -#include "virresctrl.h"
> +#include <sys/file.h>
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <fcntl.h>
>
> +#include "virresctrlpriv.h"
> #include "c-ctype.h"
> #include "count-one-bits.h"
> #include "viralloc.h"
> @@ -152,6 +156,195 @@ virResctrlInfoNew(void)
> }
>
>
> +/* Alloc-related definitions and AllocClass-related functions */
> +
> +/*
> + * virResctrlAlloc represents one allocation (in XML under cputune/cachetune and
> + * consequently a directory under /sys/fs/resctrl). Since it can have multiple
> + * parts of multiple caches allocated it is represented as bunch of nested
> + * sparse arrays (by sparse I mean array of pointers so that each might be NULL
> + * in case there is no allocation for that particular one (level, cache, ...)).
> + *
> + * Since one allocation can be made for caches on different levels, the first
> + * nested sparse array is of types virResctrlAllocPerLevel. For example if you
> + * have allocation for level 3 cache, there will be three NULL pointers and then
> + * allocated pointer to virResctrlAllocPerLevel. That way you can access it by
> + * `alloc[level]` as O(1) is desired instead of crawling through normal arrays
> + * or lists in three nested loops. The code uses a lot of direct accesses.
> + *
> + * Each virResctrlAllocPerLevel can have allocations for different cache
> + * allocation types. You can allocate instruction cache (VIR_CACHE_TYPE_CODE),
> + * data cache (VIR_CACHE_TYPE_DATA) or unified cache (VIR_CACHE_TYPE_BOTH).
> + * Those allocations are kept in sparse array of virResctrlAllocPerType pointers.
> + *
> + * For each virResctrlAllocPerType users can request some size of the cache to
> + * be allocated. That's what the sparse array `sizes` is for. Non-NULL
> + * pointers represent requested size allocations. The array is indexed by host
> + * cache id (gotten from `/sys/devices/system/cpu/cpuX/cache/indexY/id`). Users
> + * can see this information e.g. in the output of `virsh capabilities` (for that
> + * information there's the other struct, namely `virResctrlInfo`).
> + *
> + * When allocation is being created we need to find unused part of the cache for
> + * all of them. While doing that we store the bitmask in a sparse array of
> + * virBitmaps named `masks` indexed the same way as `sizes`. The upper bounds
> + * of the sparse arrays are stored in nmasks or nsizes, respectively.
> + */
> +
> +/* virResctrlAllocPerType represents */
Incomplete comment? :)
Fixed.
> +typedef struct _virResctrlAllocPerType virResctrlAllocPerType;
> +typedef virResctrlAllocPerType *virResctrlAllocPerTypePtr;
> +struct _virResctrlAllocPerType {
> + /* There could be bool saying whether this is set or not, but since everything
> + * in virResctrlAlloc (and most of libvirt) goes with pointer arrays we would
> + * have to have one more level of allocation anyway, so this stays faithful to
> + * the concept */
> + unsigned long long **sizes;
> + size_t nsizes;
> +
> + /* Mask for each cache */
> + virBitmapPtr *masks;
> + size_t nmasks;
> +};
> +
> +typedef struct _virResctrlAllocPerLevel virResctrlAllocPerLevel;
> +typedef virResctrlAllocPerLevel *virResctrlAllocPerLevelPtr;
> +struct _virResctrlAllocPerLevel {
> + virResctrlAllocPerTypePtr *types; /* Indexed with enum virCacheType */
> + /* There is no `ntypes` member variable as it is always allocated for
> + * VIR_CACHE_TYPE_LAST number of items */
> +};
> +
> +struct _virResctrlAlloc {
> + virObject parent;
> +
> + virResctrlAllocPerLevelPtr *levels;
> + size_t nlevels;
> +
> + /* The identifier (any unique string for now) */
> + char *id;
> + /* libvirt-generated path in /sys/fs/resctrl for this particular
> + * allocation */
> + char *path;
> +};
> +
> +static virClassPtr virResctrlAllocClass;
> +
> +static void
> +virResctrlAllocDispose(void *obj)
> +{
> + size_t i = 0;
> + size_t j = 0;
> + size_t k = 0;
> +
> + virResctrlAllocPtr resctrl = obj;
> +
> + for (i = 0; i < resctrl->nlevels; i++) {
> + virResctrlAllocPerLevelPtr level = resctrl->levels[i];
> +
> + if (!level)
> + continue;
> +
> + for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
> + virResctrlAllocPerTypePtr type = level->types[j];
> +
> + if (!type)
> + continue;
> +
> + for (k = 0; k < type->nsizes; k++)
> + VIR_FREE(type->sizes[k]);
> +
> + for (k = 0; k < type->nmasks; k++)
> + virBitmapFree(type->masks[k]);
> +
> + VIR_FREE(type->sizes);
> + VIR_FREE(type->masks);
> + VIR_FREE(type);
> + }
> + VIR_FREE(level->types);
> + VIR_FREE(level);
> + }
> +
> + VIR_FREE(resctrl->id);
> + VIR_FREE(resctrl->path);
> + VIR_FREE(resctrl->levels);
> +}
> +
> +
> +static int
> +virResctrlAllocOnceInit(void)
> +{
> + if (!(virResctrlAllocClass = virClassNew(virClassForObject(),
> + "virResctrlAlloc",
> + sizeof(virResctrlAlloc),
> + virResctrlAllocDispose)))
> + return -1;
> +
> + return 0;
> +}
> +
> +
> +VIR_ONCE_GLOBAL_INIT(virResctrlAlloc)
> +
> +
> +virResctrlAllocPtr
> +virResctrlAllocNew(void)
> +{
> + if (virResctrlAllocInitialize() < 0)
> + return NULL;
> +
> + return virObjectNew(virResctrlAllocClass);
> +}
> +
> +
> +/* Common functions */
> +static int
> +virResctrlLockInternal(int op)
> +{
> + int fd = open(SYSFS_RESCTRL_PATH, O_DIRECTORY | O_CLOEXEC);
> +
> + if (fd < 0) {
> + virReportSystemError(errno, "%s", _("Cannot open
resctrlfs"));
> + return -1;
> + }
> +
> + if (flock(fd, op) < 0) {
> + virReportSystemError(errno, "%s", _("Cannot lock
resctrlfs"));
> + VIR_FORCE_CLOSE(fd);
> + return -1;
> + }
> +
> + return fd;
> +}
> +
> +
> +static inline int
> +virResctrlLockWrite(void)
> +{
> + return virResctrlLockInternal(LOCK_EX);
> +}
> +
> +
> +static int
> +virResctrlUnlock(int fd)
> +{
> + if (fd == -1)
> + return 0;
> +
> + /* The lock gets unlocked by closing the fd, which we need to do anyway in
> + * order to clean up properly */
> + if (VIR_CLOSE(fd) < 0) {
> + virReportSystemError(errno, "%s", _("Cannot close
resctrlfs"));
> +
> + /* Trying to save the already broken */
> + if (flock(fd, LOCK_UN) < 0)
> + virReportSystemError(errno, "%s", _("Cannot unlock
resctrlfs"));
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +
> /* Info-related functions */
> bool
> virResctrlInfoIsEmpty(virResctrlInfoPtr resctrl)
> @@ -200,6 +393,8 @@ virResctrlGetInfo(virResctrlInfoPtr resctrl)
> }
>
> while ((rv = virDirRead(dirp, &ent, SYSFS_RESCTRL_PATH "/info"))
> 0) {
> + VIR_DEBUG("Parsing info type '%s'", ent->d_name);
> +
> if (ent->d_type != DT_DIR)
> continue;
>
> @@ -207,16 +402,14 @@ virResctrlGetInfo(virResctrlInfoPtr resctrl)
> continue;
>
> if (virStrToLong_uip(ent->d_name + 1, &endptr, 10, &level) <
0) {
> - virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> - _("Cannot parse resctrl cache info level"));
> + VIR_DEBUG("Cannot parse resctrl cache info level
'%s'", ent->d_name + 1);
> goto cleanup;
I guess that there should be "continue" instead of "goto cleanup".
Good point, fixed.
> }
>
> type = virResctrlTypeFromString(endptr);
> if (type < 0) {
> - virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> - _("Cannot parse resctrl cache info type"));
> - goto cleanup;
> + VIR_DEBUG("Cannot parse resctrl cache info type '%s'",
endptr);
> + continue;
> }
>
> if (VIR_ALLOC(i_type) < 0)
> @@ -259,10 +452,19 @@ virResctrlGetInfo(virResctrlInfoPtr resctrl)
> level - resctrl->nlevels + 1) < 0)
> goto cleanup;
>
> - if (!resctrl->levels[level] &&
> - (VIR_ALLOC(resctrl->levels[level]) < 0 ||
> - VIR_ALLOC_N(resctrl->levels[level]->types, VIR_CACHE_TYPE_LAST)
< 0))
> - goto cleanup;
> + if (!resctrl->levels[level]) {
> + virResctrlInfoPerTypePtr *types = NULL;
> +
> + if (VIR_ALLOC_N(types, VIR_CACHE_TYPE_LAST) < 0)
> + goto cleanup;
> +
> + if (VIR_ALLOC(resctrl->levels[level]) < 0) {
> + VIR_FREE(types);
> + goto cleanup;
> + }
> + resctrl->levels[level]->types = types;
> + }
> +
> i_level = resctrl->levels[level];
>
> if (i_level->types[type]) {
> @@ -358,3 +560,992 @@ virResctrlInfoGetCache(virResctrlInfoPtr resctrl,
> VIR_FREE(*controls);
> goto cleanup;
> }
This whole info related part should be probably squashed into patch 3.
I've done that now. Probably just a silly mistake from addition of lot
of fixups.
> +
> +
> +/* Alloc-related functions */
> +bool
> +virResctrlAllocIsEmpty(virResctrlAllocPtr resctrl)
> +{
> + size_t i = 0;
> + size_t j = 0;
> + size_t k = 0;
> +
> + if (!resctrl)
> + return true;
> +
> + for (i = 0; i < resctrl->nlevels; i++) {
> + virResctrlAllocPerLevelPtr a_level = resctrl->levels[i];
> +
> + if (!a_level)
> + continue;
> +
> + for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
> + virResctrlAllocPerTypePtr a_type = a_level->types[j];
> +
> + if (!a_type)
> + continue;
> +
> + for (k = 0; k < a_type->nsizes; k++) {
> + if (a_type->sizes[k])
> + return false;
> + }
> +
> + for (k = 0; k < a_type->nmasks; k++) {
> + if (a_type->masks[k])
> + return false;
> + }
> + }
> + }
> +
> + return true;
> +}
> +
> +
> +static virResctrlAllocPerTypePtr
> +virResctrlAllocGetType(virResctrlAllocPtr resctrl,
> + unsigned int level,
> + virCacheType type)
> +{
> + virResctrlAllocPerLevelPtr a_level = NULL;
> +
> + if (resctrl->nlevels <= level &&
> + VIR_EXPAND_N(resctrl->levels, resctrl->nlevels, level -
resctrl->nlevels + 1) < 0)
> + return NULL;
> +
> + if (!resctrl->levels[level]) {
> + virResctrlAllocPerTypePtr *types = NULL;
> +
> + if (VIR_ALLOC_N(types, VIR_CACHE_TYPE_LAST) < 0)
> + return NULL;
> +
> + if (VIR_ALLOC(resctrl->levels[level]) < 0) {
> + VIR_FREE(types);
> + return NULL;
> + }
> + resctrl->levels[level]->types = types;
> + }
> +
> + a_level = resctrl->levels[level];
> +
> + if (!a_level->types[type] && VIR_ALLOC(a_level->types[type]) <
0)
> + return NULL;
> +
> + return a_level->types[type];
> +}
> +
> +
> +static int
> +virResctrlAllocUpdateMask(virResctrlAllocPtr resctrl,
> + unsigned int level,
> + virCacheType type,
> + unsigned int cache,
> + virBitmapPtr mask)
> +{
> + virResctrlAllocPerTypePtr a_type = virResctrlAllocGetType(resctrl, level,
type);
> +
> + if (!a_type)
> + return -1;
> +
> + if (a_type->nmasks <= cache &&
> + VIR_EXPAND_N(a_type->masks, a_type->nmasks,
> + cache - a_type->nmasks + 1) < 0)
> + return -1;
> +
> + if (!a_type->masks[cache]) {
> + a_type->masks[cache] = virBitmapNew(virBitmapSize(mask));
> +
> + if (!a_type->masks[cache])
> + return -1;
> + }
> +
> + return virBitmapCopy(a_type->masks[cache], mask);
> +}
> +
> +
> +static int
> +virResctrlAllocUpdateSize(virResctrlAllocPtr resctrl,
> + unsigned int level,
> + virCacheType type,
> + unsigned int cache,
> + unsigned long long size)
> +{
> + virResctrlAllocPerTypePtr a_type = virResctrlAllocGetType(resctrl, level,
type);
> +
> + if (!a_type)
> + return -1;
> +
> + if (a_type->nsizes <= cache &&
> + VIR_EXPAND_N(a_type->sizes, a_type->nsizes,
> + cache - a_type->nsizes + 1) < 0)
> + return -1;
> +
> + if (!a_type->sizes[cache] && VIR_ALLOC(a_type->sizes[cache]) <
0)
> + return -1;
> +
> + *(a_type->sizes[cache]) = size;
> +
> + return 0;
> +}
> +
> +
> +/*
> + * Check if there is an allocation for this level/type/cache already. Called
> + * before updating the structure. VIR_CACHE_TYPE_BOTH collides with any type,
> + * the other types collide with itself. This code basically checks if either:
> + * `alloc[level]->types[type]->sizes[cache]`
> + * or
> + * `alloc[level]->types[VIR_CACHE_TYPE_BOTH]->sizes[cache]`
> + * is non-NULL. All the fuzz around it is checking for NULL pointers along
> + * the way.
> + */
> +static bool
> +virResctrlAllocCheckCollision(virResctrlAllocPtr alloc,
> + unsigned int level,
> + virCacheType type,
> + unsigned int cache)
> +{
> + virResctrlAllocPerLevelPtr a_level = NULL;
> + virResctrlAllocPerTypePtr a_type = NULL;
> +
> + if (!alloc)
> + return false;
> +
> + if (alloc->nlevels <= level)
> + return false;
> +
> + a_level = alloc->levels[level];
> +
> + if (!a_level)
> + return false;
> +
> + a_type = a_level->types[VIR_CACHE_TYPE_BOTH];
> +
> + /* If there is an allocation for type 'both', there can be no other
> + * allocation for the same cache */
> + if (a_type && a_type->nsizes > cache &&
a_type->sizes[cache])
> + return true;
> +
> + if (type == VIR_CACHE_TYPE_BOTH) {
> + a_type = a_level->types[VIR_CACHE_TYPE_CODE];
> +
> + if (a_type && a_type->nsizes > cache &&
a_type->sizes[cache])
> + return true;
> +
> + a_type = a_level->types[VIR_CACHE_TYPE_DATA];
> +
> + if (a_type && a_type->nsizes > cache &&
a_type->sizes[cache])
> + return true;
> + } else {
> + a_type = a_level->types[type];
> +
> + if (a_type && a_type->nsizes > cache &&
a_type->sizes[cache])
> + return true;
> + }
> +
> + return false;
> +}
> +
> +
> +int
> +virResctrlAllocSetSize(virResctrlAllocPtr resctrl,
> + unsigned int level,
> + virCacheType type,
> + unsigned int cache,
> + unsigned long long size)
> +{
> + if (virResctrlAllocCheckCollision(resctrl, level, type, cache)) {
> + virReportError(VIR_ERR_XML_ERROR,
> + _("Colliding cache allocations for cache "
> + "level '%u' id '%u', type
'%s'"),
> + level, cache, virCacheTypeToString(type));
> + return -1;
> + }
> +
> + return virResctrlAllocUpdateSize(resctrl, level, type, cache, size);
> +}
> +
> +
> +int
> +virResctrlAllocForeachSize(virResctrlAllocPtr resctrl,
> + virResctrlAllocForeachSizeCallback cb,
> + void *opaque)
> +{
> + int ret = 0;
> + unsigned int level = 0;
> + unsigned int type = 0;
> + unsigned int cache = 0;
> +
> + if (!resctrl)
> + return 0;
> +
> + for (level = 0; level < resctrl->nlevels; level++) {
> + virResctrlAllocPerLevelPtr a_level = resctrl->levels[level];
> +
> + if (!a_level)
> + continue;
> +
> + for (type = 0; type < VIR_CACHE_TYPE_LAST; type++) {
> + virResctrlAllocPerTypePtr a_type = a_level->types[type];
> +
> + if (!a_type)
> + continue;
> +
> + for (cache = 0; cache < a_type->nsizes; cache++) {
> + unsigned long long *size = a_type->sizes[cache];
> +
> + if (!size)
> + continue;
> +
> + ret = cb(level, type, cache, *size, opaque);
> + if (ret < 0)
> + return ret;
> + }
> + }
> + }
> +
> + return 0;
> +}
> +
> +
> +int
> +virResctrlAllocSetID(virResctrlAllocPtr alloc,
> + const char *id)
> +{
> + if (!id) {
> + virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> + _("Resctrl allocation 'id' cannot be
NULL"));
> + return -1;
> + }
> +
> + return VIR_STRDUP(alloc->id, id);
> +}
> +
> +
> +const char *
> +virResctrlAllocGetID(virResctrlAllocPtr alloc)
> +{
> + return alloc->id;
> +}
> +
> +
> +char *
> +virResctrlAllocFormat(virResctrlAllocPtr resctrl)
> +{
> + virBuffer buf = VIR_BUFFER_INITIALIZER;
> + unsigned int level = 0;
> + unsigned int type = 0;
> + unsigned int cache = 0;
> +
> + if (!resctrl)
> + return NULL;
> +
> + for (level = 0; level < resctrl->nlevels; level++) {
> + virResctrlAllocPerLevelPtr a_level = resctrl->levels[level];
> +
> + if (!a_level)
> + continue;
> +
> + for (type = 0; type < VIR_CACHE_TYPE_LAST; type++) {
> + virResctrlAllocPerTypePtr a_type = a_level->types[type];
> +
> + if (!a_type)
> + continue;
> +
> + virBufferAsprintf(&buf, "L%u%s:", level,
virResctrlTypeToString(type));
> +
> + for (cache = 0; cache < a_type->nmasks; cache++) {
> + virBitmapPtr mask = a_type->masks[cache];
> + char *mask_str = NULL;
> +
> + if (!mask)
> + continue;
> +
> + mask_str = virBitmapToString(mask, false, true);
> + if (!mask_str) {
> + virBufferFreeAndReset(&buf);
> + return NULL;
> + }
> +
> + virBufferAsprintf(&buf, "%u=%s;", cache, mask_str);
> + VIR_FREE(mask_str);
> + }
> +
> + virBufferTrim(&buf, ";", 1);
> + virBufferAddChar(&buf, '\n');
> + }
> + }
> +
> + virBufferCheckError(&buf);
> + return virBufferContentAndReset(&buf);
> +}
> +
> +
> +static int
> +virResctrlAllocParseProcessCache(virResctrlInfoPtr resctrl,
> + virResctrlAllocPtr alloc,
> + unsigned int level,
> + virCacheType type,
> + char *cache)
> +{
> + char *tmp = strchr(cache, '=');
> + unsigned int cache_id = 0;
> + virBitmapPtr mask = NULL;
> + int ret = -1;
> +
> + if (!tmp)
> + return 0;
> +
> + *tmp = '\0';
> + tmp++;
> +
> + if (virStrToLong_uip(cache, NULL, 10, &cache_id) < 0) {
> + virReportError(VIR_ERR_INTERNAL_ERROR,
> + _("Invalid cache id '%s'"), cache);
> + return -1;
> + }
> +
> + mask = virBitmapNewString(tmp);
> + if (!mask)
> + return -1;
> +
> + virBitmapShrink(mask, resctrl->levels[level]->types[type]->bits);
> +
> + /* Guess what! All bits set mean there is no allocation and it uses only
> + * unallocated parts of the cache. */
Actually this doesn't seem to be correct from the documentation. It
looks like that in order to make some part of the case dedicated to a
group of processes you need to create that group, set which part of
the cache should be used and make sure that no other group including
the default one doesn't use that part of the cache.
In other words, all bits set means that the processes can use any
part of the cache.
Unfortunately, that's right. I went through the documentation many
times and missed that. Also I got confused by other misinformation
that's circulating in the air =) I fixed it back to the previous
version (it was added in this iteration).
> + if (!virBitmapIsAllSet(mask) &&
> + virResctrlAllocUpdateMask(alloc, level, type, cache_id, mask) < 0)
> + goto cleanup;
> +
> + ret = 0;
> + cleanup:
> + virBitmapFree(mask);
> + return ret;
> +}
> +
> +
> +static int
> +virResctrlAllocParseProcessLine(virResctrlInfoPtr resctrl,
> + virResctrlAllocPtr alloc,
> + char *line)
> +{
> + char **caches = NULL;
> + char *tmp = NULL;
> + unsigned int level = 0;
> + int type = -1;
> + size_t ncaches = 0;
> + size_t i = 0;
> + int ret = -1;
> +
> + /* For no reason there can be spaces */
> + virSkipSpaces((const char **) &line);
> +
> + /* Skip lines that don't concern caches, e.g. MB: etc. */
> + if (line[0] != 'L')
> + return 0;
> +
> + /* And lines that we can't parse too */
> + tmp = strchr(line, ':');
> + if (!tmp)
> + return 0;
> +
> + *tmp = '\0';
> + tmp++;
> +
> + if (virStrToLong_uip(line + 1, &line, 10, &level) < 0) {
> + virReportError(VIR_ERR_INTERNAL_ERROR,
> + _("Cannot parse resctrl schema level
'%s'"),
> + line + 1);
> + return -1;
> + }
> +
> + type = virResctrlTypeFromString(line);
> + if (type < 0) {
> + virReportError(VIR_ERR_INTERNAL_ERROR,
> + _("Cannot parse resctrl schema level
'%s'"),
> + line + 1);
> + return -1;
> + }
> +
> + caches = virStringSplitCount(tmp, ";", 0, &ncaches);
> + if (!caches)
> + return 0;
> +
> + for (i = 0; i < ncaches; i++) {
> + if (virResctrlAllocParseProcessCache(resctrl, alloc, level, type, caches[i])
< 0)
> + goto cleanup;
> + }
> +
> + ret = 0;
> + cleanup:
> + virStringListFree(caches);
> + return ret;
> +}
> +
> +
> +static int
> +virResctrlAllocParse(virResctrlInfoPtr resctrl,
> + virResctrlAllocPtr alloc,
> + const char *schemata)
> +{
> + char **lines = NULL;
> + size_t nlines = 0;
> + size_t i = 0;
> + int ret = -1;
> +
> + lines = virStringSplitCount(schemata, "\n", 0, &nlines);
> + for (i = 0; i < nlines; i++) {
> + if (virResctrlAllocParseProcessLine(resctrl, alloc, lines[i]) < 0)
> + goto cleanup;
> + }
> +
> + ret = 0;
> + cleanup:
> + virStringListFree(lines);
> + return ret;
> +}
> +
> +
> +static void
> +virResctrlAllocSubtractPerType(virResctrlAllocPerTypePtr dst,
> + virResctrlAllocPerTypePtr src)
> +{
> + size_t i = 0;
> +
> + if (!dst || !src)
> + return;
> +
> + for (i = 0; i < dst->nmasks && i < src->nmasks; i++) {
> + if (dst->masks[i] && src->masks[i])
> + virBitmapSubtract(dst->masks[i], src->masks[i]);
> + }
> +}
> +
> +
> +static void
> +virResctrlAllocSubtract(virResctrlAllocPtr dst,
> + virResctrlAllocPtr src)
> +{
> + size_t i = 0;
> + size_t j = 0;
> +
> + if (!src)
> + return;
> +
> + for (i = 0; i < dst->nlevels && i < src->nlevels; i++) {
> + if (dst->levels[i] && src->levels[i]) {
> + for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
> + virResctrlAllocSubtractPerType(dst->levels[i]->types[j],
> + src->levels[i]->types[j]);
> + }
> + }
> + }
> +}
> +
> +
> +static virResctrlAllocPtr
> +virResctrlAllocNewFromInfo(virResctrlInfoPtr info)
> +{
> + size_t i = 0;
> + size_t j = 0;
> + size_t k = 0;
> + virResctrlAllocPtr ret = virResctrlAllocNew();
> + virBitmapPtr mask = NULL;
> +
> + if (!ret)
> + return NULL;
> +
> + for (i = 0; i < info->nlevels; i++) {
> + virResctrlInfoPerLevelPtr i_level = info->levels[i];
> +
> + if (!i_level)
> + continue;
> +
> + for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
> + virResctrlInfoPerTypePtr i_type = i_level->types[j];
> +
> + if (!i_type)
> + continue;
> +
> + virBitmapFree(mask);
> + mask = virBitmapNew(i_type->bits);
> + if (!mask)
> + goto error;
> + virBitmapSetAll(mask);
> +
> + for (k = 0; k <= i_type->max_cache_id; k++) {
> + if (virResctrlAllocUpdateMask(ret, i, j, k, mask) < 0)
> + goto error;
> + }
> + }
> + }
> +
> + cleanup:
> + virBitmapFree(mask);
> + return ret;
> + error:
> + virObjectUnref(ret);
> + ret = NULL;
> + goto cleanup;
> +}
> +
> +/*
> + * This function creates an allocation that represents all unused parts of all
> + * caches in the system. It uses virResctrlInfo for creating a new full
> + * allocation with all bits set (using virResctrlAllocNewFromInfo()) and then
> + * scans for all allocations under /sys/fs/resctrl and subtracts each one of
> + * them from it. That way it can then return an allocation with only bit set
> + * being those that are not mentioned in any other allocation. It is used for
> + * two things, a) calculating the masks when creating allocations and b) from
> + * tests.
> + */
> +virResctrlAllocPtr
> +virResctrlAllocGetUnused(virResctrlInfoPtr resctrl)
> +{
> + virResctrlAllocPtr ret = NULL;
> + virResctrlAllocPtr alloc = NULL;
> + struct dirent *ent = NULL;
> + DIR *dirp = NULL;
> + char *schemata = NULL;
> + int rv = -1;
> +
> + if (virResctrlInfoIsEmpty(resctrl)) {
> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> + _("Resource control is not supported on this
host"));
> + return NULL;
> + }
> +
> + ret = virResctrlAllocNewFromInfo(resctrl);
> + if (!ret)
> + return NULL;
> +
> + if (virFileReadValueString(&schemata,
> + SYSFS_RESCTRL_PATH
> + "/schemata") < 0) {
> + virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> + _("Could not read schemata file for the default
group"));
> + goto error;
> + }
> +
> + alloc = virResctrlAllocNew();
> + if (!alloc)
> + goto error;
> +
> + if (virResctrlAllocParse(resctrl, alloc, schemata) < 0)
> + goto error;
> + if (!virResctrlAllocIsEmpty(alloc))
This check is not required if we change the logic in
virResctrlAllocParseProcessCache(). There cannot be any cache group
that has empty allocation.
Yeah, fixed.
> + virResctrlAllocSubtract(ret, alloc);
> +
> + if (virDirOpen(&dirp, SYSFS_RESCTRL_PATH) < 0)
> + goto error;
> +
> + while ((rv = virDirRead(dirp, &ent, SYSFS_RESCTRL_PATH)) > 0) {
> + if (ent->d_type != DT_DIR)
> + continue;
> +
> + if (STREQ(ent->d_name, "info"))
> + continue;
> +
> + VIR_FREE(schemata);
> + rv = virFileReadValueString(&schemata,
> + SYSFS_RESCTRL_PATH
> + "/%s/schemata",
> + ent->d_name);
> + if (rv == -2)
> + continue;
> +
> + if (rv < 0) {
> + virReportError(VIR_ERR_INTERNAL_ERROR,
> + _("Could not read schemata file for group
%s"),
> + ent->d_name);
> + goto error;
> + }
> +
> + virObjectUnref(alloc);
> + alloc = virResctrlAllocNew();
> + if (!alloc)
> + goto error;
> +
> + if (virResctrlAllocParse(resctrl, alloc, schemata) < 0)
> + goto error;
> +
> + virResctrlAllocSubtract(ret, alloc);
> + }
> + if (rv < 0)
> + goto error;
> +
> + cleanup:
> + virObjectUnref(alloc);
> + VIR_DIR_CLOSE(dirp);
> + VIR_FREE(schemata);
> + return ret;
> +
> + error:
> + virObjectUnref(ret);
> + ret = NULL;
> + goto cleanup;
> +}
> +
> +
> +
> +static int
> +virResctrlAllocSetMask(virResctrlAllocPerTypePtr a_type,
> + unsigned int cache,
> + virBitmapPtr mask)
> +{
> + if (a_type->nmasks <= cache &&
> + VIR_EXPAND_N(a_type->masks, a_type->nmasks,
> + cache - a_type->nmasks + 1) < 0)
> + return -1;
> +
> + a_type->masks[cache] = mask;
> +
> + return 0;
> +}
> +
> +
> +/*
> + * Given the information about requested allocation type `a_type`, the host
> + * cache for a particular type `i_type` and unused bits in the system `f_type`
> + * this function tries to find the smallest free space in which the allocation
> + * for cache id `cache` would fit. We're looking for the smallest place in
> + * order to minimize fragmentation and maximize the possibility of succeeding.
> + */
> +static int
> +virResctrlAllocFindUnused(virResctrlAllocPerTypePtr a_type,
> + virResctrlInfoPerTypePtr i_type,
> + virResctrlAllocPerTypePtr f_type,
> + unsigned int level,
> + unsigned int type,
> + unsigned int cache)
> +{
> + unsigned long long *size = a_type->sizes[cache];
> + virBitmapPtr a_mask = NULL;
> + virBitmapPtr f_mask = NULL;
> + unsigned long long granularity;
> + unsigned long long need_bits;
> + size_t i = 0;
> + ssize_t pos = -1;
> + ssize_t last_bits = 0;
> + ssize_t last_pos = -1;
> +
> + /* If there is no reservation requested we need to set all bits. That's
due
> + * to weird interface of the resctrl sysfs. It's also the reason why we
> + * cannot reserve the whole cache in one allocation. */
> + if (!size) {
> + a_mask = virBitmapNew(i_type->bits);
> + if (!a_mask)
> + return -1;
> +
> + virBitmapSetAll(a_mask);
> +
> + if (virResctrlAllocSetMask(a_type, cache, a_mask) < 0) {
> + virBitmapFree(a_mask);
> + return -1;
> + }
> +
> + return 0;
> + }
> +
> + if (cache >= f_type->nmasks) {
> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
> + _("Cache with id %u does not exists for level
%d"),
> + cache, level);
> + return -1;
> + }
> +
> + f_mask = f_type->masks[cache];
> + if (!f_mask) {
> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
> + _("Cache level %d id %u does not support tuning for
"
> + "scope type '%s'"),
> + level, cache, virCacheTypeToString(type));
> + return -1;
> + }
> +
> + if (*size == i_type->size) {
> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
> + _("Cache allocation for the whole cache is not "
> + "possible, specify size smaller than %llu"),
> + i_type->size);
> + return -1;
> + }
> +
> + granularity = i_type->size / i_type->bits;
Can we use the already calculated granularity?
i_type->control.granularity
Good point, modified appropriately now.
> + need_bits = *size / granularity;
> +
> + if (*size % granularity) {
> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
> + _("Cache allocation of size %llu is not "
> + "divisible by granularity %llu"),
> + *size, granularity);
> + return -1;
> + }
> +
> + if (need_bits < i_type->min_cbm_bits) {
> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
> + _("Cache allocation of size %llu is smaller "
> + "than the minimum allowed allocation %llu"),
> + *size, granularity * i_type->min_cbm_bits);
Here we could use the already calculated minimum allowed allocation,
but we set that value only if it's different from granularity.
This makes me think whether we could modify the code to always set
the minimum allocation and format it only if it's different from
granularity.
Yeah, we could do that. Since it requires patch in yet another part of
the code and the functionality would be unchanged, I'll leave this for a
follow-up.
> + return -1;
> + }
> +
> + while ((pos = virBitmapNextSetBit(f_mask, pos)) >= 0) {
> + ssize_t pos_clear = virBitmapNextClearBit(f_mask, pos);
> + ssize_t bits;
> +
> + if (pos_clear < 0)
> + pos_clear = virBitmapSize(f_mask);
> +
> + bits = pos_clear - pos;
> +
> + /* Not enough bits, move on and skip all of them */
> + if (bits < need_bits) {
> + pos = pos_clear;
> + continue;
> + }
> +
> + /* This fits perfectly */
> + if (bits == need_bits) {
> + last_pos = pos;
> + break;
> + }
> +
> + /* Remember the smaller region if we already found on before */
> + if (last_pos < 0 || (last_bits && bits < last_bits)) {
> + last_bits = bits;
> + last_pos = pos;
> + }
> +
> + pos = pos_clear;
> + }
> +
> + if (last_pos < 0) {
> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
> + _("Not enough room for allocation of "
> + "%llu bytes for level %u cache %u "
> + "scope type '%s'"),
> + *size, level, cache,
> + virCacheTypeToString(type));
> + return -1;
> + }
> +
> + a_mask = virBitmapNew(i_type->bits);
> + if (!a_mask)
> + return -1;
> +
> + for (i = last_pos; i < last_pos + need_bits; i++) {
> + ignore_value(virBitmapSetBit(a_mask, i));
> + ignore_value(virBitmapClearBit(f_mask, i));
> + }
> +
> + if (virResctrlAllocSetMask(a_type, cache, a_mask) < 0) {
> + virBitmapFree(a_mask);
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +
> +/*
> + * This function is called when creating an allocation in the system. What it
> + * does is that it gets all the unused bits using virResctrlAllocGetUnused() and
> + * then tries to find a proper space for every requested allocation effectively
> + * transforming `sizes` into `masks`.
> + */
> +static int
> +virResctrlAllocMasksAssign(virResctrlInfoPtr resctrl,
> + virResctrlAllocPtr alloc)
> +{
> + int ret = -1;
> + unsigned int level = 0;
> + virResctrlAllocPtr alloc_free = NULL;
> +
> + alloc_free = virResctrlAllocGetUnused(resctrl);
> + if (!alloc_free)
> + return -1;
> +
> + for (level = 0; level < alloc->nlevels; level++) {
> + virResctrlAllocPerLevelPtr a_level = alloc->levels[level];
> + virResctrlAllocPerLevelPtr f_level = NULL;
> + unsigned int type = 0;
> +
> + if (!a_level)
> + continue;
> +
> + if (level < alloc_free->nlevels)
> + f_level = alloc_free->levels[level];
> +
> + if (!f_level) {
> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
> + _("Cache level %d does not support tuning"),
> + level);
> + goto cleanup;
> + }
> +
> + for (type = 0; type < VIR_CACHE_TYPE_LAST; type++) {
> + virResctrlAllocPerTypePtr a_type = a_level->types[type];
> + virResctrlAllocPerTypePtr f_type = f_level->types[type];
> + unsigned int cache = 0;
> +
> + if (!a_type)
> + continue;
> +
> + if (!f_type) {
> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
> + _("Cache level %d does not support tuning for
"
> + "scope type '%s'"),
> + level, virCacheTypeToString(type));
> + goto cleanup;
> + }
> +
> + for (cache = 0; cache < a_type->nsizes; cache++) {
> + virResctrlInfoPerLevelPtr i_level = resctrl->levels[level];
> + virResctrlInfoPerTypePtr i_type = i_level->types[type];
> +
> + if (virResctrlAllocFindUnused(a_type, i_type, f_type, level, type,
cache) < 0)
> + goto cleanup;
> + }
> + }
> + }
> +
> + ret = 0;
> + cleanup:
> + virObjectUnref(alloc_free);
> + return ret;
> +}
> +
> +
> +/* This checks if the directory for the alloc exists. If not it tries to create
> + * it and apply appropriate alloc settings. */
> +int
> +virResctrlAllocCreate(virResctrlInfoPtr resctrl,
> + virResctrlAllocPtr alloc,
> + const char *machinename)
> +{
> + char *schemata_path = NULL;
> + char *alloc_str = NULL;
> + int ret = -1;
> + int lockfd = -1;
> +
> + if (!alloc)
> + return 0;
> +
> + if (!resctrl) {
> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> + _("Resource control is not supported on this
host"));
> + return -1;
> + }
> +
> + if (!alloc->id) {
> + virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> + _("Resctrl Allocation ID must be set before
creation"));
> + return -1;
> + }
> +
> + if (!alloc->path &&
> + virAsprintf(&alloc->path, "%s/%s-%s",
> + SYSFS_RESCTRL_PATH, machinename, alloc->id) < 0)
> + return -1;
> +
> + /* Check if this allocation was already created */
> + if (virFileIsDir(alloc->path))
> + return 0;
Shouldn't we fail if the allocation already exists? This function
is called when the guest is started therefore the allocation should
not exist and if it exists there is something wrong and the allocation
will not be applied or wrong allocation may be applied.
Yeah, I thought this will get called for each vCPU (that was one of the earlier
not-sent-upstream implemetations), but it is not now, so I removed this part and
adjusted the next error message that will catch it.
> +
> + if (virFileExists(alloc->path)) {
> + virReportError(VIR_ERR_INTERNAL_ERROR,
> + _("Path '%s' for resctrl allocation exists and
is not a "
> + "directory"), alloc->path);
> + goto cleanup;
> + }
> +
> + lockfd = virResctrlLockWrite();
> + if (lockfd < 0)
> + goto cleanup;
> +
> + if (virResctrlAllocMasksAssign(resctrl, alloc) < 0)
> + goto cleanup;
> +
> + alloc_str = virResctrlAllocFormat(alloc);
> + if (!alloc_str)
> + goto cleanup;
> +
> + if (virAsprintf(&schemata_path, "%s/schemata", alloc->path)
< 0)
> + goto cleanup;
> +
> + if (virFileMakePath(alloc->path) < 0) {
> + virReportSystemError(errno,
> + _("Cannot create resctrl directory
'%s'"),
> + alloc->path);
> + goto cleanup;
> + }
> +
> + VIR_DEBUG("Writing resctrl schemata '%s' into '%s'",
alloc_str, schemata_path);
> + if (virFileWriteStr(schemata_path, alloc_str, 0) < 0) {
> + rmdir(alloc->path);
> + virReportSystemError(errno,
> + _("Cannot write into schemata file
'%s'"),
> + schemata_path);
> + goto cleanup;
> + }
> +
> + ret = 0;
> + cleanup:
> + virResctrlUnlock(lockfd);
> + VIR_FREE(alloc_str);
> + VIR_FREE(schemata_path);
> + return ret;
> +}
> +
> +
> +int
> +virResctrlAllocAddPID(virResctrlAllocPtr alloc,
> + pid_t pid)
> +{
> + char *tasks = NULL;
> + char *pidstr = NULL;
> + int ret = 0;
> +
> + if (!alloc->path) {
> + virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> + _("Cannot add pid to non-existing resctrl
allocation"));
> + return -1;
> + }
> +
> + if (virAsprintf(&tasks, "%s/tasks", alloc->path) < 0)
> + return -1;
> +
> + if (virAsprintf(&pidstr, "%lld", (long long int) pid) < 0)
> + goto cleanup;
> +
> + if (virFileWriteStr(tasks, pidstr, 0) < 0) {
> + virReportSystemError(errno,
> + _("Cannot write pid in tasks file
'%s'"),
> + tasks);
> + goto cleanup;
> + }
> +
> + ret = 0;
> + cleanup:
> + VIR_FREE(tasks);
> + VIR_FREE(pidstr);
> + return ret;
> +}
> +
> +
> +int
> +virResctrlAllocRemove(virResctrlAllocPtr alloc)
> +{
> + int ret = 0;
> +
> + if (!alloc->path)
> + return 0;
> +
> + VIR_DEBUG("Removing resctrl allocation %s", alloc->path);
> + if (rmdir(alloc->path) != 0 && errno != ENOENT) {
> + ret = -errno;
> + VIR_ERROR(_("Unable to remove %s (%d)"), alloc->path, errno);
> + }
> +
> + return ret;
> +}
> diff --git a/src/util/virresctrl.h b/src/util/virresctrl.h
> index 471c02f28dcd..27793c86a745 100644
> --- a/src/util/virresctrl.h
> +++ b/src/util/virresctrl.h
> @@ -68,4 +68,53 @@ virResctrlInfoGetCache(virResctrlInfoPtr resctrl,
> size_t *ncontrols,
> virResctrlInfoPerCachePtr **controls);
>
> +/* Alloc-related things */
> +typedef struct _virResctrlAlloc virResctrlAlloc;
> +typedef virResctrlAlloc *virResctrlAllocPtr;
> +
> +typedef int virResctrlAllocForeachSizeCallback(unsigned int level,
> + virCacheType type,
> + unsigned int cache,
> + unsigned long long size,
> + void *opaque);
> +
> +virResctrlAllocPtr
> +virResctrlAllocNew(void);
> +
> +bool
> +virResctrlAllocIsEmpty(virResctrlAllocPtr resctrl);
> +
> +int
> +virResctrlAllocSetSize(virResctrlAllocPtr resctrl,
> + unsigned int level,
> + virCacheType type,
> + unsigned int cache,
> + unsigned long long size);
> +
> +int
> +virResctrlAllocForeachSize(virResctrlAllocPtr resctrl,
> + virResctrlAllocForeachSizeCallback cb,
> + void *opaque);
> +
> +int
> +virResctrlAllocSetID(virResctrlAllocPtr alloc,
> + const char *id);
> +const char *
> +virResctrlAllocGetID(virResctrlAllocPtr alloc);
> +
> +char *
> +virResctrlAllocFormat(virResctrlAllocPtr alloc);
> +
> +int
> +virResctrlAllocCreate(virResctrlInfoPtr r_info,
> + virResctrlAllocPtr alloc,
> + const char *machinename);
> +
> +int
> +virResctrlAllocAddPID(virResctrlAllocPtr alloc,
> + pid_t pid);
> +
> +int
> +virResctrlAllocRemove(virResctrlAllocPtr alloc);
> +
> #endif /* __VIR_RESCTRL_H__ */
> diff --git a/src/util/virresctrlpriv.h b/src/util/virresctrlpriv.h
> new file mode 100644
> index 000000000000..72b115afd13e
> --- /dev/null
> +++ b/src/util/virresctrlpriv.h
> @@ -0,0 +1,27 @@
> +/*
> + * virresctrlpriv.h:
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library. If not, see
> + * <
http://www.gnu.org/licenses/>.
> + */
> +
> +#ifndef __VIR_RESCTRL_PRIV_H__
> +# define __VIR_RESCTRL_PRIV_H__
> +
> +# include "virresctrl.h"
> +
> +virResctrlAllocPtr
> +virResctrlAllocGetUnused(virResctrlInfoPtr resctrl);
> +
> +#endif /* __VIR_RESCTRL_PRIV_H__ */
> --
> 2.16.1
>
> --
> libvir-list mailing list
> libvir-list(a)redhat.com
>
https://www.redhat.com/mailman/listinfo/libvir-list