Hi Jano,
Thanks a lot for the review! please see my reply below.
On 2018年06月02日 22:50, Ján Tomko wrote:
On Tue, May 29, 2018 at 06:58:02PM +0800, bing.niu(a)intel.com wrote:
> From: Bing Niu <bing.niu(a)intel.com>
>
> Add memory bandwidth allocation support basing on existing
> virresctrl implementation. Two new structures virResctrlInfoMB
> and virResctrlAllocMB are introduced.
>
> virResctrlInfoMB is used to record host system MBA supporting
> information, e.g., minimum bandwidth allowed, bandwidth
> granularity, number of bandwidth controller (same as number of
> last level cache).
>
> virResctrlAllocMB is used for allocating memory bandwidth.
> Following virResctrlAllocPerType, it also employs a nested
> sparse array to indicate whether allocation is available for
> particular llc.
>
> Overall, the memory bandwidth allocation follows the same sequence
> as existing virresctrl cache allocation model. It exposes
> interfaces for probing host's memory bandwidth capability on
> initialization time and memory bandwidth allocation on runtime.
>
> Signed-off-by: Bing Niu <bing.niu(a)intel.com>
> ---
> src/util/virresctrl.c | 415
> +++++++++++++++++++++++++++++++++++++++++++++++---
> src/util/virresctrl.h | 5 +
> 2 files changed, 401 insertions(+), 19 deletions(-)
>
> diff --git a/src/util/virresctrl.c b/src/util/virresctrl.c
> index fc11635..ad050c7 100644
> --- a/src/util/virresctrl.c
> +++ b/src/util/virresctrl.c
> @@ -86,6 +86,20 @@ struct _virResctrlInfoPerType {
> virResctrlInfoPerCache control;
> };
>
> +/* Information about memory bandwidth allocation */
> +typedef struct _virResctrlInfoMB virResctrlInfoMB;
> +typedef virResctrlInfoMB *virResctrlInfoMBPtr;
> +struct _virResctrlInfoMB {
> + /* minimum memory bandwidth allowed*/
> + unsigned int min_bandwidth;
> + /* bandwidth granularity */
> + unsigned int bandwidth_gran;
bandwidth_granularity is more descriptive
will revise in next version
> + /* level number of llc*/
> + unsigned int llc;
cryptic three-letter arconym, you can spell it out
will update in next version
> + /* number of last level cache */
Based on the comment, I don't understand the difference from the
previous value.
sorry for making confuse here, properly due to fogged comments.
The llc element will record the level of last level cache(usually it's
L3 cache, however theoretically we may have different level number of llc).
The max_id is used to track how many of llc host have. In a NUMA system,
we will have more than one llc.
Those two elements are populated inside virResctrlInfoGetCache() when
libvirt initialize host cache capability information by
virCapabilitiesInitCaches().
We use those information for:
1. Guarantee user allocating memory bandwidth at a valid llc.
2. Create memory allocation info only on valid llc when
virResctrlAllocNewFromInfo. Since the virResctrlAllocPtr allocated from
virResctrlAllocNewFromInfo is used to calculate unused resource.
> + unsigned int max_id;
> +};
> +
> typedef struct _virResctrlInfoPerLevel virResctrlInfoPerLevel;
> typedef virResctrlInfoPerLevel *virResctrlInfoPerLevelPtr;
> struct _virResctrlInfoPerLevel {
> @@ -97,6 +111,8 @@ struct _virResctrlInfo {
>
> virResctrlInfoPerLevelPtr *levels;
> size_t nlevels;
> +
> + virResctrlInfoMBPtr mb_info;
> };
>
> static virClassPtr virResctrlInfoClass;
> @@ -126,6 +142,7 @@ virResctrlInfoDispose(void *obj)
> VIR_FREE(level);
> }
>
> + VIR_FREE(resctrl->mb_info);
> VIR_FREE(resctrl->levels);
> }
>
> @@ -201,6 +218,18 @@ struct _virResctrlAllocPerType {
> size_t nmasks;
> };
>
> +/*
> + * virResctrlAllocMB represents one memory bandwidth allocation.
> Since it can have
> + * multiple last level caches in a NUMA system, it is also
> represented as a nested
> + * sparse arrays as virRestrlAllocPerLevel
> + */
> +typedef struct _virResctrlAllocMB virResctrlAllocMB;
> +typedef virResctrlAllocMB *virResctrlAllocMBPtr;
> +struct _virResctrlAllocMB {
> + unsigned int **bandwidth;
> + size_t nsizes;
> +};
> +
> typedef struct _virResctrlAllocPerLevel virResctrlAllocPerLevel;
> typedef virResctrlAllocPerLevel *virResctrlAllocPerLevelPtr;
> struct _virResctrlAllocPerLevel {
> @@ -214,7 +243,7 @@ struct _virResctrlAlloc {
>
> virResctrlAllocPerLevelPtr *levels;
> size_t nlevels;
> -
> + virResctrlAllocMBPtr mba;
> /* The identifier (any unique string for now) */
> char *id;
> /* libvirt-generated path in /sys/fs/resctrl for this particular
> @@ -259,6 +288,13 @@ virResctrlAllocDispose(void *obj)
> VIR_FREE(level);
> }
>
> + if (resctrl->mba) {
> + virResctrlAllocMBPtr mba = resctrl->mba;
> + for (i = 0; i < mba->nsizes; i++)
> + VIR_FREE(mba->bandwidth[i]);
> + }
> +
> + VIR_FREE(resctrl->mba);
> VIR_FREE(resctrl->id);
> VIR_FREE(resctrl->path);
> VIR_FREE(resctrl->levels);
> @@ -364,6 +400,9 @@ virResctrlInfoIsEmpty(virResctrlInfoPtr resctrl)
> if (!resctrl)
> return true;
>
> + if (resctrl->mb_info)
> + return false;
> +
> for (i = 0; i < resctrl->nlevels; i++) {
> virResctrlInfoPerLevelPtr i_level = resctrl->levels[i];
>
> @@ -379,11 +418,62 @@ virResctrlInfoIsEmpty(virResctrlInfoPtr resctrl)
> return true;
> }
>
> +static int
> +virResctrlGetMemoryBandwidthInfo(virResctrlInfoPtr resctrl)
> +{
> + DIR *dirp = NULL;
> + int ret = -1;
> + int rv = -1;
> + virResctrlInfoMBPtr i_mb = NULL;
>
> -#ifdef __linux__
This ifdef removal looks suspicious
This should be caused by diff format. The
pragma is still here.
Hopefully we can get rid of that this by separate renaming to a
individual patch.
> + rv = virDirOpenIfExists(&dirp, SYSFS_RESCTRL_PATH "/info");
> + if (rv <= 0) {
> + ret = rv;
> + goto cleanup;
> + }
> + /* query memory bandwidth allocation info */
> + if (VIR_ALLOC(i_mb) < 0) {
> + ret = -1;
ret is initialized to -1, no need to set it again
Will update in v2
> + goto cleanup;
> + }
> + rv = virFileReadValueUint(&i_mb->bandwidth_gran,
> + SYSFS_RESCTRL_PATH
> "/info/MB/bandwidth_gran");
> + if (rv == -2) {
> + /* The file doesn't exist, so it's unusable for us,
> + * properly mba unsupported */
> + VIR_WARN("The path '" SYSFS_RESCTRL_PATH
> "/info/MB/bandwidth_gran'"
> + "does not exist");
Is there a chance the directory SYSFS_RESCTRL_PATH will exist, but not
this file? We should not spam the logs with warnings on older kernels.
Yes.
It's possible. if resctrl fs mount successfully. That means system
have CAT or MBA or CAT&MBT. Better we change VIR_WARN to VIR_DEBUG for
MBA and CAT information query part. How about this? ;)
> + ret = 0;
> + goto cleanup;
> + } else if (rv < 0) {
> + /* File no existing or other failures are fatal, so just quit */
This comment conflicts with the above statement.
Will update in v2
> + goto cleanup;
> + }
>
> -int
> -virResctrlGetInfo(virResctrlInfoPtr resctrl)
> + rv = virFileReadValueUint(&i_mb->min_bandwidth,
> + SYSFS_RESCTRL_PATH
> "/info/MB/min_bandwidth");
> + if (rv == -2) {
> + /* The file doesn't exist, so it's unusable for us,
> + * properly mba unsupported */
> + VIR_WARN("The path '" SYSFS_RESCTRL_PATH
> "/info/MB/min_bandwidth'"
> + "does not exist");
> + ret = 0;
> + goto cleanup;
> + } else if (rv < 0) {
> + /* File no existing or other failures are fatal, so just quit */
> + goto cleanup;
> + }
> +
> + VIR_STEAL_PTR(resctrl->mb_info, i_mb);
> + ret = 0;
> + cleanup:
> + VIR_DIR_CLOSE(dirp);
> + VIR_FREE(i_mb);
> + return ret;
> +}
> +
> +static int
> +virResctrlGetCacheInfo(virResctrlInfoPtr resctrl)
> {
> DIR *dirp = NULL;
> char *endptr = NULL;
> @@ -512,6 +602,23 @@ virResctrlGetInfo(virResctrlInfoPtr resctrl)
> return ret;
> }
>
> +#ifdef __linux__
> +int
> +virResctrlGetInfo(virResctrlInfoPtr resctrl)
> +{
> + int ret;
Our convention is to initialize ret to -1 and then overwrite it exactly
once before returning from the function, othrewise it gets hard to
follow.
Will update in v2
> +
> + ret = virResctrlGetMemoryBandwidthInfo(resctrl);
> + if (ret < 0) {
> + virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> + _("Get Memory Bandwidth fail"));
> + goto out;
> + }
> + ret = virResctrlGetCacheInfo(resctrl);
> + out:
> + return ret;
> +}
> +
> #else /* ! __linux__ */
>
> int
> @@ -534,12 +641,26 @@ virResctrlInfoGetCache(virResctrlInfoPtr resctrl,
> {
> virResctrlInfoPerLevelPtr i_level = NULL;
> virResctrlInfoPerTypePtr i_type = NULL;
> + virResctrlInfoMBPtr mb_info = NULL;
> size_t i = 0;
> int ret = -1;
>
> if (virResctrlInfoIsEmpty(resctrl))
> return 0;
>
> + /* Let's take the opportunity to update the number of
> + * last level cache. This number is used to calculate
> + * free memory bandwidth */
> + if (resctrl->mb_info) {
> + mb_info = resctrl->mb_info;
> + if (level > mb_info->llc) {
> + mb_info->llc = level;
> + mb_info->max_id = 1;
> + } else if (mb_info->llc == level) {
> + mb_info->max_id++;
> + }
> + }
> +
> if (level >= resctrl->nlevels)
> return 0;
>
> @@ -600,6 +721,9 @@ virResctrlAllocIsEmpty(virResctrlAllocPtr resctrl)
> if (!resctrl)
> return true;
>
> + if (resctrl->mba)
> + return false;
> +
> for (i = 0; i < resctrl->nlevels; i++) {
> virResctrlAllocPerLevelPtr a_level = resctrl->levels[i];
>
> @@ -855,16 +979,218 @@ virResctrlAllocGetID(virResctrlAllocPtr alloc)
> }
>
>
> -char *
> -virResctrlAllocFormat(virResctrlAllocPtr resctrl)
The rename should be in a separate patch, to make the diff easier to
read.
Will separate in v2.
> +int
> +virResctrlSetMemoryBandwidth(virResctrlAllocPtr resctrl,
> + unsigned int id,
> + unsigned int memory_bandwidth)
> +{
> + virResctrlAllocMBPtr mba = resctrl->mba;
> +
> + if (!mba) {
> + if (VIR_ALLOC(mba) < 0)
> + return -1;
> + resctrl->mba = mba;
> + }
> +
> + if (mba->nsizes <= id &&
> + VIR_EXPAND_N(mba->bandwidth, mba->nsizes,
> + id - mba->nsizes + 1) < 0)
> + return -1;
> +
> + if (mba->bandwidth[id]) {
> + virReportError(VIR_ERR_XML_ERROR,
> + _("Collision Memory Bandwidth on node %d"),
> + id);
> + return -1;
> + }
> +
> + if (VIR_ALLOC(mba->bandwidth[id]) < 0)
> + return -1;
> +
> + *(mba->bandwidth[id]) = memory_bandwidth;
> + return 0;
> +}
> +
> +
> +static int
> +virResctrlAllocMemoryBandwidthFormat(virResctrlAllocPtr alloc,
> virBufferPtr buf)
Usually we put the output variables first.
Will update in v2
> +{
> + int id;
> +
> + if (!alloc->mba)
> + goto out;
> +
> + virBufferAddLit(buf, "MB:");
> +
> + for (id = 0; id < alloc->mba->nsizes; id++) {
> + if (alloc->mba->bandwidth[id]) {
> + virBufferAsprintf(buf, "%u=%u;", id,
> + *(alloc->mba->bandwidth[id]));
> + }
> + }
> +
> + virBufferTrim(buf, ";", 1);
> + virBufferAddChar(buf, '\n');
> + virBufferCheckError(buf);
> + out:
pointless label
Will update in v2
> + return 0;
> +}
> +
> +static int
> +virResctrlAllocMemoryBandwidth(virResctrlInfoPtr resctrl,
> + virResctrlAllocPtr alloc,
> virResctrlAllocPtr free)
> +{
> + int id;
> + int ret;
> + virResctrlAllocMBPtr mb_alloc = alloc->mba;
> + virResctrlAllocMBPtr mb_free = free->mba;
> + virResctrlInfoMBPtr mb_info = resctrl->mb_info;
> +
> + if (!mb_alloc)
> + return 0;
> +
> + if (mb_alloc && !mb_info) {
> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> + _("RDT Memory Bandwidth allocationi"
allocation
Will update in v2
> + " unsupported"));
> + ret = -1;
ret should be initialized to -1
same as above.
> + goto out;
> + }
> +
> + for (id = 0; id < mb_alloc->nsizes; id ++) {
> + if (!mb_alloc->bandwidth[id])
> + continue;
> +
> + if (*(mb_alloc->bandwidth[id]) % mb_info->bandwidth_gran) {
> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
> + _("Memory Bandwidth allocation of size "
> + "%u is not divisible by granularity %u"),
> + *(mb_alloc->bandwidth[id]),
> + mb_info->bandwidth_gran);
> + ret = -1;
> + goto out;
> + }
> + if (*(mb_alloc->bandwidth[id]) < mb_info->min_bandwidth) {
> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
> + _("Memory Bandwidth allocation of size "
> + "%u is smaller than the minimum "
> + "allowed allocation %u"),
> + *(mb_alloc->bandwidth[id]),
> + mb_info->min_bandwidth);
> + ret = -1;
> + goto out;
> + }
> + if (id >= mb_info->max_id) {
> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
> + _("bandwidth controller %u not exist,"
> + " max controller id %u"),
> + id, mb_info->max_id - 1);
> + return -1;
> + }
> + if (*(mb_alloc->bandwidth[id]) > *(mb_free->bandwidth[id])) {
> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
> + _("Not enough room for allocation of %u "
> + "bandwidth for node %u%%, freed %u%%"),
> + id, *(mb_alloc->bandwidth[id]),
> + *(mb_free->bandwidth[id]));
> + return -1;
> + }
> + }
> + ret = 0;
> + out:
Pointless label.
Will remove in v2
> + return ret;
> +}
> +
> +
> +static int
> +virResctrlAllocParseMemoryBandwidthLine(virResctrlInfoPtr resctrl,
> + virResctrlAllocPtr alloc,
> + char *line)
> +{
> + char **mbs = NULL;
> + char *tmp = NULL;
> + unsigned int bandwidth;
> + size_t nmb = 0;
> + unsigned int id;
> + size_t i;
> + int ret;
> +
> + /* For no reason there can be spaces */
> + virSkipSpaces((const char **) &line);
> +
> + if (STRNEQLEN(line, "MB", 2))
> + return 0;
> +
> + if (!resctrl || !resctrl->mb_info ||
> + (!resctrl->mb_info->min_bandwidth)
|| should be at the end of the line
Will update in v2
> + || !(resctrl->mb_info->bandwidth_gran)) {
> + virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> + _("Missing or inconsistent resctrl info for "
> + "memory bandwidth allocation"));
> + }
> +
> + if (!alloc->mba)
> + if (VIR_ALLOC(alloc->mba) < 0)
> + return -1;
The outer if will need {} around the body.
Will fix in v2.
Is there a chance of alloc->mba being non-NULL here?Theoretically yes.
This
function is used to pars string line queried from resctrl file
schemata. Each resource is in a individual line. So the function will be
invoked for each line. Only first time, We need allocate mba structure.
I take a look at RDT kernel driver. In current implementation, MBA
information will be formated into one single line, so this function is
validly executed only once. If removing above if(!alloc->mba) also work.
However, I prefer to keep it to get rid of any assumptions of kernel
driver's behavior. How do you think, Jano? :)
> +
> + tmp = strchr(line, ':');
> + if (!tmp)
> + return 0;
> + tmp++;
> +
> + mbs = virStringSplitCount(tmp, ";", 0, &nmb);
> + if (!nmb)
> + return 0;
> +
> + for (i = 0; i < nmb; i++) {
> + tmp = strchr(mbs[i], '=');
> + *tmp = '\0';
> + tmp++;
> +
> + if (virStrToLong_uip(mbs[i], NULL, 10, &id) < 0) {
> + virReportError(VIR_ERR_INTERNAL_ERROR,
> + _("Invalid node id %u "), id);
> + ret = -1;
ret should be initialized to -1 instead of setting it to -1 after every
error
Will fix in v2.
> + goto clean;
cleanup is the standard label name
> + }
> + if (virStrToLong_uip(tmp, NULL, 10, &bandwidth) < 0) {
> + virReportError(VIR_ERR_INTERNAL_ERROR,
> + _("Invalid bandwidth %u"), bandwidth);
> + ret = -1;
> + goto clean;
> + }
> + if (alloc->mba->nsizes <= id &&
> + VIR_EXPAND_N(alloc->mba->bandwidth, alloc->mba->nsizes,
> + id - alloc->mba->nsizes + 1) < 0) {
> + ret = -1;
> + goto clean;
> + }
> + if (!alloc->mba->bandwidth[id]) {
> + if (VIR_ALLOC(alloc->mba->bandwidth[id]) < 0) {
> + ret = -1;
> + goto clean;
> + }
> + }
> +
> + *(alloc->mba->bandwidth[id]) = bandwidth;
Why does the array contain pointer to the value, instead of the value
itself?
This is referenced from virresctrl CAT part. It is possible allocation
only happened in some of llc. If the pointer is NULL, that means there
is no memory allocation on that llc.
Also, VIR_APPEND_ELEMENT is nicer than VIR_EXPAND + manual assignment.
Will fix in
v2.
> + }
> + ret = 0;
> + clean:
> + virStringListFree(mbs);
> + return ret;
> +}
> +
> +
> +static int
> +virResctrlAllocCacheFormat(virResctrlAllocPtr resctrl, virBufferPtr buf)
> {
> - virBuffer buf = VIR_BUFFER_INITIALIZER;
> unsigned int level = 0;
> unsigned int type = 0;
> unsigned int cache = 0;
> + int ret = 0;
-1 is the default
>
> if (!resctrl)
> - return NULL;
> + return 0;
The return type change should be in a separate patch.
Will fix in v2.
>
> for (level = 0; level < resctrl->nlevels; level++) {
> virResctrlAllocPerLevelPtr a_level = resctrl->levels[level];
> @@ -878,7 +1204,7 @@ virResctrlAllocFormat(virResctrlAllocPtr resctrl)
> if (!a_type)
> continue;
>
> - virBufferAsprintf(&buf, "L%u%s:", level,
> virResctrlTypeToString(type));
> + virBufferAsprintf(buf, "L%u%s:", level,
> virResctrlTypeToString(type));
And so should the switch from local virBuffer to virBufferPtr
>
> for (cache = 0; cache < a_type->nmasks; cache++) {
> virBitmapPtr mask = a_type->masks[cache];
> @@ -889,18 +1215,49 @@ virResctrlAllocFormat(virResctrlAllocPtr resctrl)
>
> mask_str = virBitmapToString(mask, false, true);
> if (!mask_str) {
> - virBufferFreeAndReset(&buf);
> - return NULL;
> + ret = -1;
> + goto out;
> }
>
> - virBufferAsprintf(&buf, "%u=%s;", cache, mask_str);
> + virBufferAsprintf(buf, "%u=%s;", cache, mask_str);
> VIR_FREE(mask_str);
> }
>
> - virBufferTrim(&buf, ";", 1);
> - virBufferAddChar(&buf, '\n');
> + virBufferTrim(buf, ";", 1);
> + virBufferAddChar(buf, '\n');
> }
> }
> + out:
pointless label
Will fix in v2.
> + return ret;
> +}
> +
> +
> +static void
> +virResctrlMemoryBandwidthSubstract(virResctrlAllocPtr free,
> + virResctrlAllocPtr used)
> +{
> + size_t i;
> +
> + if (used->mba) {
> + for (i = 0; i < used->mba->nsizes; i++) {
> + if (used->mba->bandwidth[i])
> + *(free->mba->bandwidth[i]) -=
> *(used->mba->bandwidth[i]);
> + }
> + }
> +}
> +
> +
> @@ -1389,7 +1765,6 @@ virResctrlAllocFindUnused(virResctrlAllocPtr alloc,
> return ret;
> }
>
> -
Unrelated whitespace change.
Will fix in v2.
> static int
> virResctrlAllocCopyMasks(virResctrlAllocPtr dst,
> virResctrlAllocPtr src)
> @@ -1451,6 +1826,9 @@ virResctrlAllocMasksAssign(virResctrlInfoPtr
> resctrl,
> if (!alloc_default)
> goto cleanup;
>
> + if (virResctrlAllocMemoryBandwidth(resctrl, alloc, alloc_free) < 0)
> + goto cleanup;
> +
> if (virResctrlAllocCopyMasks(alloc, alloc_default) < 0)
> goto cleanup;
>
> @@ -1524,7 +1902,6 @@ virResctrlAllocDeterminePath(virResctrlAllocPtr
> alloc,
> return 0;
> }
>
> -
Same here
Will fix in v2.
> /* This checks if the directory for the alloc exists. If not it tries
> to create
> * it and apply appropriate alloc settings. */
> int
Jano