[libvirt] [V2 0/5] Support cache tune in libvirt

This series patches are for supportting CAT featues, which also called cache tune in libvirt. First to expose cache information which could be tuned in capabilites XML. Then add new domain xml element support to add cacahe bank which will apply on this libvirt domain. This series patches add a util file `resctrl.c/h`, an interface to talk with linux kernel's sys fs. There are some TODO work such as logic of supportting l3code/l3data, expose new public interface to get free cache information. Some discussion about this feature support can be found from: https://www.redhat.com/archives/libvir-list/2017-January/msg00644.html Eli Qiao (5): Resctrl: Add some utils functions Resctrl: expose cache information to capabilities Resctrl: Add new xml element to support cache tune Resctrl: Add private interface to set cachebanks Qemu: Set cache banks docs/schemas/domaincommon.rng | 41 ++ include/libvirt/virterror.h | 1 + src/Makefile.am | 1 + src/conf/capabilities.c | 56 +++ src/conf/capabilities.h | 23 + src/conf/domain_conf.c | 134 ++++++ src/conf/domain_conf.h | 18 + src/libvirt_private.syms | 10 + src/qemu/qemu_capabilities.c | 68 +++ src/qemu/qemu_driver.c | 4 + src/qemu/qemu_process.c | 10 + src/util/virerror.c | 1 + src/util/virresctrl.c | 994 ++++++++++++++++++++++++++++++++++++++++++ src/util/virresctrl.h | 135 ++++++ 14 files changed, 1496 insertions(+) create mode 100644 src/util/virresctrl.c create mode 100644 src/util/virresctrl.h -- 1.9.1

This patch adds some utils struct and functions to expose resctrl information. virResCtrlAvailable: If resctrl interface exist on host virResCtrlGet: get specify type resource contral information virResCtrlInit: initialize resctrl struct from the host's sys fs. ResCtrlAll[]: an array to maintain resource control information. Signed-off-by: Eli Qiao <liyong.qiao@intel.com> --- include/libvirt/virterror.h | 1 + src/Makefile.am | 1 + src/libvirt_private.syms | 4 + src/util/virerror.c | 1 + src/util/virresctrl.c | 330 ++++++++++++++++++++++++++++++++++++++++++++ src/util/virresctrl.h | 89 ++++++++++++ 6 files changed, 426 insertions(+) create mode 100644 src/util/virresctrl.c create mode 100644 src/util/virresctrl.h diff --git a/include/libvirt/virterror.h b/include/libvirt/virterror.h index 2efee8f..3dd2d08 100644 --- a/include/libvirt/virterror.h +++ b/include/libvirt/virterror.h @@ -132,6 +132,7 @@ typedef enum { VIR_FROM_PERF = 65, /* Error from perf */ VIR_FROM_LIBSSH = 66, /* Error from libssh connection transport */ + VIR_FROM_RESCTRL = 67, /* Error from resource control */ # ifdef VIR_ENUM_SENTINELS VIR_ERR_DOMAIN_LAST diff --git a/src/Makefile.am b/src/Makefile.am index 2f32d41..b626f29 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -162,6 +162,7 @@ UTIL_SOURCES = \ util/virprocess.c util/virprocess.h \ util/virqemu.c util/virqemu.h \ util/virrandom.h util/virrandom.c \ + util/virresctrl.h util/virresctrl.c \ util/virrotatingfile.h util/virrotatingfile.c \ util/virscsi.c util/virscsi.h \ util/virscsivhost.c util/virscsivhost.h \ diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 8e994c7..743e5ac 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -2313,6 +2313,10 @@ virRandomGenerateWWN; virRandomInt; +# util/virresctrl.h +virResCtrlAvailable; +virResCtrlInit; + # util/virrotatingfile.h virRotatingFileReaderConsume; virRotatingFileReaderFree; diff --git a/src/util/virerror.c b/src/util/virerror.c index ef17fb5..93dfd4f 100644 --- a/src/util/virerror.c +++ b/src/util/virerror.c @@ -139,6 +139,7 @@ VIR_ENUM_IMPL(virErrorDomain, VIR_ERR_DOMAIN_LAST, "Perf", /* 65 */ "Libssh transport layer", + "Rescouce Control", ) diff --git a/src/util/virresctrl.c b/src/util/virresctrl.c new file mode 100644 index 0000000..63bc808 --- /dev/null +++ b/src/util/virresctrl.c @@ -0,0 +1,330 @@ +/* + * virresctrl.c: methods for managing resource contral + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * <http://www.gnu.org/licenses/>. + * + * Authors: + * Eli Qiao <liyong.qiao@intel.com> + */ +#include <config.h> + +#include <sys/ioctl.h> +#if defined HAVE_SYS_SYSCALL_H +# include <sys/syscall.h> +#endif +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + +#include "virresctrl.h" +#include "viralloc.h" +#include "virerror.h" +#include "virfile.h" +#include "virhostcpu.h" +#include "virlog.h" +#include "virstring.h" +#include "nodeinfo.h" + +VIR_LOG_INIT("util.resctrl"); + +#define VIR_FROM_THIS VIR_FROM_RESCTRL + +static unsigned int host_id = 0; + +static virResCtrl ResCtrlAll[] = { + { + .name = "L3", + .cache_level = "l3", + }, + { + .name = "L3DATA", + .cache_level = "l3", + }, + { + .name = "L3CODE", + .cache_level = "l3", + }, + { + .name = "L2", + .cache_level = "l2", + }, +}; + +static int virResCtrlGetInfoStr(const int type, const char *item, char **str) +{ + int ret = 0; + char *tmp; + char *path; + + if (asprintf(&path, "%s/%s/%s", RESCTRL_INFO_DIR, ResCtrlAll[type].name, item) < 0) + return -1; + if (virFileReadAll(path, 10, str) < 0) { + ret = -1; + goto cleanup; + } + + if ((tmp = strchr(*str, '\n'))) { + *tmp = '\0'; + } + +cleanup: + VIR_FREE(path); + return ret; +} + + +static int virResCtrlGetCPUValue(const char* path, char** value) +{ + int ret = -1; + char* tmp; + + if(virFileReadAll(path, 10, value) < 0) { + goto cleanup; + } + if ((tmp = strchr(*value, '\n'))) { + *tmp = '\0'; + } + ret = 0; +cleanup: + return ret; +} + +static int virResctrlGetCPUSocketID(const size_t cpu, int* socket_id) +{ + int ret = -1; + char* physical_package_path = NULL; + char* physical_package = NULL; + if (virAsprintf(&physical_package_path, + "%s/cpu/cpu%zu/topology/physical_package_id", + SYSFS_SYSTEM_PATH, cpu) < 0) { + return -1; + } + + if(virResCtrlGetCPUValue(physical_package_path, + &physical_package) < 0) + goto cleanup; + + if (virStrToLong_i(physical_package, NULL, 0, socket_id) < 0) + goto cleanup; + + ret = 0; +cleanup: + VIR_FREE(physical_package); + VIR_FREE(physical_package_path); + return ret; +} + +static int virResCtrlGetCPUCache(const size_t cpu, int type, int *cache) +{ + int ret = -1; + char* cache_dir = NULL; + char* cache_str = NULL; + char* tmp; + int carry = -1; + + if (virAsprintf(&cache_dir, + "%s/cpu/cpu%zu/cache/index%d/size", + SYSFS_SYSTEM_PATH, cpu, type) < 0) + return -1; + + if(virResCtrlGetCPUValue(cache_dir, &cache_str) < 0) + goto cleanup; + + tmp = cache_str; + + while (*tmp != '\0') + tmp++; + if (*(tmp - 1) == 'K') { + *(tmp - 1) = '\0'; + carry = 1; + } + else if (*(tmp - 1) == 'M') { + *(tmp - 1) = '\0'; + carry = 1024; + } + + if (virStrToLong_i(cache_str, NULL, 0, cache) < 0) + goto cleanup; + + *cache = (*cache) * carry; + + if (*cache < 0) + goto cleanup; + + ret = 0; +cleanup: + VIR_FREE(cache_dir); + VIR_FREE(cache_str); + return ret; +} + +/* Fill all cache bank informations */ +static virResCacheBankPtr virResCtrlGetCacheBanks(int type, int* n_sockets) +{ + int npresent_cpus; + int index = -1; + virResCacheBankPtr bank; + + *n_sockets = 1; + if ((npresent_cpus = virHostCPUGetCount()) < 0) + return NULL; + + if (type == RDT_RESOURCE_L3 + || type == RDT_RESOURCE_L3DATA + || type == RDT_RESOURCE_L3CODE) + index = 3; + else if (type == RDT_RESOURCE_L2) { + index = 2; + } + + if (index == -1) + return NULL; + + if(VIR_ALLOC_N(bank, *n_sockets) < 0) + { + *n_sockets = 0; + return NULL; + } + + for( size_t i = 0; i < npresent_cpus ; i ++) { + int s_id; + int cache_size; + + if (virResctrlGetCPUSocketID(i, &s_id) < 0) { + goto error; + } + + if(s_id > (*n_sockets - 1)) { + size_t cur = *n_sockets; + size_t exp = s_id - (*n_sockets) + 1; + if(VIR_EXPAND_N(bank, cur, exp) < 0) { + goto error; + } + } + *n_sockets = s_id + 1; + if (bank[s_id].cpu_mask == NULL) { + if (!(bank[s_id].cpu_mask = virBitmapNew(npresent_cpus))) + goto error; + } + + ignore_value(virBitmapSetBit(bank[s_id].cpu_mask, i)); + + if (bank[s_id].cache_size == 0) { + if (virResCtrlGetCPUCache(i, index, &cache_size) < 0) { + goto error; + } + bank[s_id].cache_size = cache_size; + bank[s_id].cache_min = cache_size / ResCtrlAll[type].cbm_len; + } + } + return bank; + +error: + *n_sockets = 0; + VIR_FREE(bank); + return NULL; +} + +static int virResCtrlGetConfig(int type) +{ + int ret; + int i; + char *str; + + /* Read min_cbm_bits from resctrl. + eg: /sys/fs/resctrl/info/L3/num_closids + */ + if ((ret = virResCtrlGetInfoStr(type, "num_closids", &str)) < 0) { + return ret; + } + if (virStrToLong_i(str, NULL, 10, &ResCtrlAll[type].num_closid) < 0) { + return -1; + } + VIR_FREE(str); + + /* Read min_cbm_bits from resctrl. + eg: /sys/fs/resctrl/info/L3/cbm_mask + */ + if ((ret = virResCtrlGetInfoStr(type, "min_cbm_bits", &str)) < 0) { + return ret; + } + if (virStrToLong_i(str, NULL, 10, &ResCtrlAll[type].min_cbm_bits) < 0) { + return -1; + } + VIR_FREE(str); + + /* Read cbm_mask string from resctrl. + eg: /sys/fs/resctrl/info/L3/cbm_mask + */ + if ((ret = virResCtrlGetInfoStr(type, "cbm_mask", &str)) < 0) { + return ret; + } + + /* Calculate cbm length from the default cbm_mask. */ + ResCtrlAll[type].cbm_len = strlen(str) * 4; + VIR_FREE(str); + + /* Get all cache bank informations */ + ResCtrlAll[type].cache_banks = virResCtrlGetCacheBanks(type, + &(ResCtrlAll[type].num_banks)); + + if(ResCtrlAll[type].cache_banks == NULL) + return -1; + + for( i = 0; i < ResCtrlAll[type].num_banks; i++) + { + /*L3CODE and L3DATA shares same L3 resource, so they should + * have same host_id. */ + if (type == RDT_RESOURCE_L3CODE) { + ResCtrlAll[type].cache_banks[i].host_id = ResCtrlAll[RDT_RESOURCE_L3DATA].cache_banks[i].host_id; + } + else { + ResCtrlAll[type].cache_banks[i].host_id = host_id++; + } + } + + ResCtrlAll[type].enabled = true; + + return ret; +} + +int virResCtrlInit(void) { + int i = 0; + char *tmp; + int rc = 0; + + for(i = 0; i < RDT_NUM_RESOURCES; i++) { + if ((rc = asprintf(&tmp, "%s/%s", RESCTRL_INFO_DIR, ResCtrlAll[i].name)) < 0) { + continue; + } + if (virFileExists(tmp)) { + if ((rc = virResCtrlGetConfig(i)) < 0 ) + VIR_WARN("Ignor error while get config for %d", i); + } + + VIR_FREE(tmp); + } + return rc; +} + +bool virResCtrlAvailable(void) { + if (!virFileExists(RESCTRL_INFO_DIR)) + return false; + return true; +} + +virResCtrlPtr virResCtrlGet(int type) { + return &ResCtrlAll[type]; +} diff --git a/src/util/virresctrl.h b/src/util/virresctrl.h new file mode 100644 index 0000000..f713e66 --- /dev/null +++ b/src/util/virresctrl.h @@ -0,0 +1,89 @@ +/* + * * virrscctrl.h: methods for managing rscctrl + * * + * * Copyright (C) 2016 Intel, Inc. + * * + * * This library is free software; you can redistribute it and/or + * * modify it under the terms of the GNU Lesser General Public + * * License as published by the Free Software Foundation; either + * * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * <http://www.gnu.org/licenses/>. + * + * Authors: + * Eli Qiao <liyong.qiao@intel.com> + */ + +#ifndef __VIR_RESCTRL_H__ +# define __VIR_RESCTRL_H__ + +# include "virutil.h" +# include "virbitmap.h" + +#define RESCTRL_DIR "/sys/fs/resctrl" +#define RESCTRL_INFO_DIR "/sys/fs/resctrl/info" +#define SYSFS_SYSTEM_PATH "/sys/devices/system" + +#define MAX_CPU_SOCKET_NUM 8 +#define MAX_CBM_BIT_LEN 32 +#define MAX_SCHEMATA_LEN 1024 +#define MAX_FILE_LEN ( 10 * 1024 * 1024) + +enum { + RDT_RESOURCE_L3, + RDT_RESOURCE_L3DATA, + RDT_RESOURCE_L3CODE, + RDT_RESOURCE_L2, + /* Must be the last */ + RDT_NUM_RESOURCES, +}; + + +typedef struct _virResCacheBank virResCacheBank; +typedef virResCacheBank *virResCacheBankPtr; +struct _virResCacheBank { + unsigned int host_id; + unsigned long long cache_size; + unsigned long long cache_left; + unsigned long long cache_min; + virBitmapPtr cpu_mask; +}; + +/** + * struct rdt_resource - attributes of an RDT resource + * @enabled: Is this feature enabled on this machine + * @name: Name to use in "schemata" file + * @num_closid: Number of CLOSIDs available + * @max_cbm: Largest Cache Bit Mask allowed + * @min_cbm_bits: Minimum number of consecutive bits to be set + * in a cache bit mask + * @cache_level: Which cache level defines scope of this domain + * @num_banks: Number of cache bank on this machine. + * @cache_banks: Array of cache bank + */ +typedef struct _virResCtrl virResCtrl; +typedef virResCtrl *virResCtrlPtr; +struct _virResCtrl { + bool enabled; + const char *name; + int num_closid; + int cbm_len; + int min_cbm_bits; + const char* cache_level; + int num_banks; + virResCacheBankPtr cache_banks; +}; + + +bool virResCtrlAvailable(void); +int virResCtrlInit(void); +virResCtrlPtr virResCtrlGet(int); + +#endif -- 1.9.1

This patch expose cache information to host's capabilites xml. For l3 cache allocation <cache> <bank id='0' type='l3' size='56320' unit='KiB' cpus='0-21,44-65'> <control min='2816' reserved='2816' unit='KiB' scope='L3'/> </bank> <bank id='1' type='l3' size='56320' unit='KiB' cpus='22-43,66-87'> <control min='2816' reserved='2816' unit='KiB' scope='L3'/> </bank> </cache> For l3 cache allocation supported cdp(seperate data/code): <cache> <bank id='0' type='l3' size='56320' unit='KiB' cpus='0-21,44-65'> <control min='2816' reserved='2816' unit='KiB' scope='L3DATA'/> <control min='2816' reserved='2816' unit='KiB' scope='L3CODE'/> </bank> <bank id='1' type='l3' size='56320' unit='KiB' cpus='22-43,66-87'> <control min='2816' reserved='2816' unit='KiB' scope='L3DATA'/> <control min='2816' reserved='2816' unit='KiB' scope='L3CODE'/> </bank> </cache> RFC on mailing list. https://www.redhat.com/archives/libvir-list/2017-January/msg00644.html Signed-off-by: Eli Qiao <liyong.qiao@intel.com> --- src/conf/capabilities.c | 56 ++++++++++++++++++++++++++++++++++++ src/conf/capabilities.h | 23 +++++++++++++++ src/libvirt_private.syms | 1 + src/qemu/qemu_capabilities.c | 68 ++++++++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_driver.c | 4 +++ 5 files changed, 152 insertions(+) diff --git a/src/conf/capabilities.c b/src/conf/capabilities.c index 9ab343b..290c25f 100644 --- a/src/conf/capabilities.c +++ b/src/conf/capabilities.c @@ -198,6 +198,18 @@ virCapabilitiesClearSecModel(virCapsHostSecModelPtr secmodel) } static void +virCapabilitiesClearCacheBank(virCapsHostCacheBankPtr cachebank) +{ + size_t i; + for (i = 0; i < cachebank->ncontrol; i++) { + VIR_FREE(cachebank->control[i].scope); + } + VIR_FREE(cachebank->type); + VIR_FREE(cachebank->cpus); +} + + +static void virCapabilitiesDispose(void *object) { virCapsPtr caps = object; @@ -221,6 +233,10 @@ virCapabilitiesDispose(void *object) virCapabilitiesClearSecModel(&caps->host.secModels[i]); VIR_FREE(caps->host.secModels); + for (i = 0; i < caps->host.ncachebank; i++) + virCapabilitiesClearCacheBank(caps->host.cachebank[i]); + VIR_FREE(caps->host.cachebank); + VIR_FREE(caps->host.netprefix); VIR_FREE(caps->host.pagesSize); virCPUDefFree(caps->host.cpu); @@ -844,6 +860,41 @@ virCapabilitiesFormatNUMATopology(virBufferPtr buf, return 0; } +static int +virCapabilitiesFormatCache(virBufferPtr buf, + size_t ncachebank, + virCapsHostCacheBankPtr *cachebank) +{ + size_t i; + size_t j; + + virBufferAddLit(buf, "<cache>\n"); + virBufferAdjustIndent(buf, 2); + + for( i = 0 ; i < ncachebank; i++) { + virBufferAsprintf(buf, + "<bank id='%u' type='%s' size='%llu' unit='KiB' cpus='%s'>\n", + cachebank[i]->id, + cachebank[i]->type, + cachebank[i]->size, + cachebank[i]->cpus); + + virBufferAdjustIndent(buf, 2); + for( j = 0; j < cachebank[i]->ncontrol; j ++) { + virBufferAsprintf(buf, + "<control min='%llu' reserved='%llu' unit='KiB' scope='%s'/>\n", + cachebank[i]->control[j].min, + cachebank[i]->control[j].reserved, + cachebank[i]->control[j].scope); + } + virBufferAdjustIndent(buf, -2); + virBufferAddLit(buf, "</bank>\n"); + } + virBufferAdjustIndent(buf, -2); + virBufferAddLit(buf, "</cache>\n"); + return 0; +} + /** * virCapabilitiesFormatXML: * @caps: capabilities to format @@ -931,6 +982,11 @@ virCapabilitiesFormatXML(virCapsPtr caps) virBufferAddLit(&buf, "</migration_features>\n"); } + if (caps->host.ncachebank && + virCapabilitiesFormatCache(&buf, caps->host.ncachebank, + caps->host.cachebank) < 0) + return NULL; + if (caps->host.netprefix) virBufferAsprintf(&buf, "<netprefix>%s</netprefix>\n", caps->host.netprefix); diff --git a/src/conf/capabilities.h b/src/conf/capabilities.h index cfdc34a..b446de5 100644 --- a/src/conf/capabilities.h +++ b/src/conf/capabilities.h @@ -138,6 +138,25 @@ struct _virCapsHostSecModel { virCapsHostSecModelLabelPtr labels; }; +typedef struct _virCapsHostCacheControl virCapsHostCacheControl; +typedef virCapsHostCacheControl *virCapsHostCacheControlPtr; +struct _virCapsHostCacheControl { + unsigned long long min; + unsigned long long reserved; + char* scope; +}; + +typedef struct _virCapsHostCacheBank virCapsHostCacheBank; +typedef virCapsHostCacheBank *virCapsHostCacheBankPtr; +struct _virCapsHostCacheBank { + unsigned int id; + char* type; + char* cpus; + unsigned long long size; + size_t ncontrol; + virCapsHostCacheControlPtr control; +}; + typedef struct _virCapsHost virCapsHost; typedef virCapsHost *virCapsHostPtr; struct _virCapsHost { @@ -160,6 +179,10 @@ struct _virCapsHost { size_t nsecModels; virCapsHostSecModelPtr secModels; + size_t ncachebank; + size_t ncachebank_max; + virCapsHostCacheBankPtr *cachebank; + char *netprefix; virCPUDefPtr cpu; int nPagesSize; /* size of pagesSize array */ diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 743e5ac..d93b775 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -2316,6 +2316,7 @@ virRandomInt; # util/virresctrl.h virResCtrlAvailable; virResCtrlInit; +virResCtrlGet; # util/virrotatingfile.h virRotatingFileReaderConsume; diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index 3247d25..23f416d 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -45,6 +45,7 @@ #include "qemu_domain.h" #define __QEMU_CAPSRIV_H_ALLOW__ #include "qemu_capspriv.h" +#include "virresctrl.h" #include <fcntl.h> #include <sys/stat.h> @@ -1098,7 +1099,71 @@ virQEMUCapsInitCPU(virCapsPtr caps, goto cleanup; } +static int +virQEMUCapsInitCache(virCapsPtr caps) +{ + int i, j; + virResCtrlPtr resctrl; + virCapsHostCacheBankPtr bank; + + for (i = 0; i < RDT_NUM_RESOURCES; i ++) + { + /* L3DATA and L3CODE share L3 resources */ + if ( i == RDT_RESOURCE_L3CODE ) + continue; + resctrl = virResCtrlGet(i); + + if(resctrl->enabled) { + for( j = 0; j < resctrl->num_banks; j++) + { + if(VIR_RESIZE_N(caps->host.cachebank, caps->host.ncachebank_max, + caps->host.ncachebank, 1) < 0) + return -1; + + if(VIR_ALLOC(bank) < 0) + return -1; + + bank->id = resctrl->cache_banks[j].host_id; + if(VIR_STRDUP(bank->type, resctrl->cache_level) < 0) + goto err; + if(VIR_STRDUP(bank->cpus, virBitmapFormat(resctrl->cache_banks[j].cpu_mask)) < 0) + goto err; + bank->size = resctrl->cache_banks[j].cache_size; + /*L3DATA and L3CODE shares L3 cache resources, so fill them to the control element*/ + if ( i == RDT_RESOURCE_L3DATA ) { + if(VIR_EXPAND_N(bank->control, bank->ncontrol, 2) < 0) + goto err; + + bank->control[0].min = virResCtrlGet(RDT_RESOURCE_L3DATA)->cache_banks[j].cache_min; + bank->control[0].reserved = bank->control[0].min * (virResCtrlGet(RDT_RESOURCE_L3DATA)->min_cbm_bits); + if(VIR_STRDUP(bank->control[0].scope, + virResCtrlGet(RDT_RESOURCE_L3DATA)->name) < 0) + goto err; + + bank->control[1].min = virResCtrlGet(RDT_RESOURCE_L3CODE)->cache_banks[j].cache_min; + bank->control[1].reserved = bank->control[1].min * (virResCtrlGet(RDT_RESOURCE_L3CODE)->min_cbm_bits); + if(VIR_STRDUP(bank->control[1].scope, + virResCtrlGet(RDT_RESOURCE_L3CODE)->name) < 0) + goto err; + } + else { + if(VIR_EXPAND_N(bank->control, bank->ncontrol, 1) < 0) + goto err; + bank->control[0].min = resctrl->cache_banks[j].cache_min; + bank->control[0].reserved = bank->control[0].min * resctrl->min_cbm_bits; + if(VIR_STRDUP(bank->control[0].scope, resctrl->name) < 0) + goto err; + } + caps->host.cachebank[caps->host.ncachebank++] = bank; + } + } + } + return 0; +err: + VIR_FREE(bank); + return -1; +} static int virQEMUCapsInitPages(virCapsPtr caps) { @@ -1144,6 +1209,9 @@ virCapsPtr virQEMUCapsInit(virQEMUCapsCachePtr cache) if (virQEMUCapsInitCPU(caps, hostarch) < 0) VIR_WARN("Failed to get host CPU"); + if (virQEMUCapsInitCache(caps) < 0) + VIR_WARN("Failed to get host cache"); + /* Add the power management features of the host */ if (virNodeSuspendGetTargetMask(&caps->host.powerMgmt) < 0) VIR_WARN("Failed to get host power management capabilities"); diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 37ccfdf..520b74d 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -105,6 +105,7 @@ #include "vircgroup.h" #include "virperf.h" #include "virnuma.h" +#include "virresctrl.h" #include "dirname.h" #include "network/bridge_driver.h" @@ -849,6 +850,9 @@ qemuStateInitialize(bool privileged, run_gid = cfg->group; } + if(virResCtrlAvailable() && virResCtrlInit() < 0) + VIR_WARN("Faild to initialize resource control."); + qemu_driver->qemuCapsCache = virQEMUCapsCacheNew(cfg->libDir, cfg->cacheDir, run_uid, -- 1.9.1

This patch adds new xml element to support cache tune as: <cputune> ... <cachetune id='1' host_id='0' type='l3' size='2816' unit='KiB'/> ... </cputune> id: any non-minus number host_id: reference of the host's cache banks id, it's from capabilities type: cache bank type size: should be multiples of the min_size of the bank on host. --- docs/schemas/domaincommon.rng | 41 +++++++++++++ src/conf/domain_conf.c | 134 ++++++++++++++++++++++++++++++++++++++++++ src/conf/domain_conf.h | 18 ++++++ src/libvirt_private.syms | 3 + src/util/virresctrl.c | 25 ++++++++ src/util/virresctrl.h | 3 +- 6 files changed, 223 insertions(+), 1 deletion(-) diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index cc6e0d0..d0f9e54 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -795,6 +795,27 @@ </attribute> </element> </zeroOrMore> + <zeroOrMore> + <element name="cachetune"> + <attribute name="id"> + <ref name="cacheid"/> + </attribute> + <attribute name="host_id"> + <ref name="hostid"/> + </attribute> + <attribute name="type"> + <ref name="cachetype"/> + </attribute> + <attribute name="size"> + <ref name="unsignedInt"/> + </attribute> + <optional> + <attribute name="unit"> + <ref name="cacheunit"/> + </attribute> + </optional> + </element> + </zeroOrMore> <optional> <element name="emulatorpin"> <attribute name="cpuset"> @@ -5451,6 +5472,26 @@ <param name="minInclusive">-1</param> </data> </define> + <define name="cacheid"> + <data type="unsignedShort"> + <param name="pattern">[0-9]+</param> + </data> + </define> + <define name="hostid"> + <data type="unsignedShort"> + <param name="pattern">[0-9]+</param> + </data> + </define> + <define name="cachetype"> + <data type="string"> + <param name="pattern">(l3)</param> + </data> + </define> + <define name="cacheunit"> + <data type="string"> + <param name="pattern">KiB</param> + </data> + </define> <!-- weight currently is in range [100, 1000] --> <define name="weight"> <data type="unsignedInt"> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index c06b128..0304b36 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -56,6 +56,7 @@ #include "virstring.h" #include "virnetdev.h" #include "virhostdev.h" +#include "virresctrl.h" #define VIR_FROM_THIS VIR_FROM_DOMAIN @@ -15604,6 +15605,116 @@ virDomainVcpuPinDefParseXML(virDomainDefPtr def, return ret; } +/* Parse the XML definition for cachetune + * and a cachetune has the form + * <cachetune id='0' host_id='0' type='l3' size='1024' unit='KiB'/> + */ +static int +virDomainCacheTuneDefParseXML(virDomainDefPtr def, + int n, + xmlNodePtr* nodes) +{ + char* tmp = NULL; + int i; + size_t j; + int type = -1 ; + virDomainCacheBankPtr bank = NULL; + virResCtrlPtr resctrl; + + if (VIR_ALLOC_N(bank, n) < 0) + goto cleanup; + + for (i = 0; i < n; i++) { + if (!(tmp = virXMLPropString(nodes[i], "id"))) { + virReportError(VIR_ERR_XML_ERROR, "%s", _("missing id in cache tune")); + goto cleanup; + } + if (virStrToLong_uip(tmp, NULL, 10, &(bank[i].id)) < 0) { + virReportError(VIR_ERR_XML_ERROR, + _("invalid setting for cache id '%s'"), tmp); + goto cleanup; + } + + VIR_FREE(tmp); + if (!(tmp = virXMLPropString(nodes[i], "host_id"))) { + virReportError(VIR_ERR_XML_ERROR, "%s", _("missing host id in cache tune")); + goto cleanup; + } + if (virStrToLong_uip(tmp, NULL, 10, &(bank[i].host_id)) < 0) { + virReportError(VIR_ERR_XML_ERROR, + _("invalid setting for cache host id '%s'"), tmp); + goto cleanup; + } + VIR_FREE(tmp); + + if (!(tmp = virXMLPropString(nodes[i], "size"))) { + virReportError(VIR_ERR_XML_ERROR, "%s", _("missing size in cache tune")); + goto cleanup; + } + if (virStrToLong_ull(tmp, NULL, 10, &(bank[i].size)) < 0) { + virReportError(VIR_ERR_XML_ERROR, + _("invalid setting for cache size '%s'"), tmp); + goto cleanup; + } + VIR_FREE(tmp); + + if (!(tmp = virXMLPropString(nodes[i], "type"))) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("missing cache type")); + goto cleanup; + } + + if ((type = virResCtrlTypeFromString(tmp)) < 0) { + virReportError(VIR_ERR_XML_ERROR, + _("'unsupported cache type '%s'"), tmp); + goto cleanup; + } + + resctrl = virResCtrlGet(type); + + if(resctrl == NULL || !resctrl->enabled) { + virReportError(VIR_ERR_XML_ERROR, + _("'host doesn't enabled cache type '%s'"), tmp); + goto cleanup; + } + + bool found_host_id = false; + /* Loop for banks to search host_id */ + for(j = 0; j < resctrl->num_banks; j++) { + if (resctrl->cache_banks[j].host_id == bank[i].host_id) { + found_host_id = true; + break; + } + } + + if (! found_host_id) { + virReportError(VIR_ERR_XML_ERROR, + _("'cache bank's host id %u not found on the host"), + bank[i].host_id); + goto cleanup; + } + + if (bank[i].size == 0 || + bank[i].size % resctrl->cache_banks[j].cache_min != 0) { + virReportError(VIR_ERR_XML_ERROR, + _("'the size shoud be multiplies of '%llu'"), + resctrl->cache_banks[j].cache_min); + goto cleanup; + } + + if (VIR_STRDUP(bank[i].type, tmp) < 0) + goto cleanup; + def->cachetune.cache_banks = bank; + } + + def->cachetune.n_banks = n; + return 0; + +cleanup: + VIR_FREE(bank); + VIR_FREE(tmp); + return -1; +} /* Parse the XML definition for a iothreadpin * and an iothreadspin has the form @@ -16882,6 +16993,13 @@ virDomainDefParseXML(xmlDocPtr xml, } VIR_FREE(nodes); + if ((n = virXPathNodeSet("./cputune/cachetune", ctxt, &nodes)) < 0) + goto error; + + if (virDomainCacheTuneDefParseXML(def, n ,nodes) < 0) + goto error; + VIR_FREE(nodes); + if ((n = virXPathNodeSet("./cputune/emulatorpin", ctxt, &nodes)) < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("cannot extract emulatorpin nodes")); @@ -23398,6 +23516,20 @@ virDomainSchedulerFormat(virBufferPtr buf, } +static void +virDomainCacheTuneDefFormat(virBufferPtr buf, + virDomainCachetunePtr cache) +{ + size_t i; + for(i = 0; i < cache->n_banks; i ++) { + virBufferAsprintf(buf, "<cachetune id='%u' host_id='%u' " + "type='%s' size='%llu' unit='KiB'/>\n", + cache->cache_banks[i].id, + cache->cache_banks[i].host_id, + cache->cache_banks[i].type, + cache->cache_banks[i].size); + } +} static int virDomainCputuneDefFormat(virBufferPtr buf, @@ -23461,6 +23593,8 @@ virDomainCputuneDefFormat(virBufferPtr buf, VIR_FREE(cpumask); } + virDomainCacheTuneDefFormat(&childrenBuf, &def->cachetune); + if (def->cputune.emulatorpin) { char *cpumask; virBufferAddLit(&childrenBuf, "<emulatorpin "); diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 507ace8..49e01ac 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -2132,6 +2132,22 @@ struct _virDomainMemtune { unsigned long long swap_hard_limit; /* in kibibytes, limit at off_t bytes */ }; +typedef struct _virDomainCacheBank virDomainCacheBank; +typedef virDomainCacheBank *virDomainCacheBankPtr; +struct _virDomainCacheBank { + unsigned int id; + unsigned int host_id; + unsigned long long size; + char* type; +}; + +typedef struct _virDomainCachetune virDomainCachetune; +typedef virDomainCachetune *virDomainCachetunePtr; +struct _virDomainCachetune { + size_t n_banks; + virDomainCacheBankPtr cache_banks; +}; + typedef struct _virDomainPowerManagement virDomainPowerManagement; typedef virDomainPowerManagement *virDomainPowerManagementPtr; @@ -2196,6 +2212,8 @@ struct _virDomainDef { virDomainCputune cputune; + virDomainCachetune cachetune; + virDomainNumaPtr numa; virDomainResourceDefPtr resource; virDomainIdMapDef idmap; diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index d93b775..08a0bc4 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -2317,6 +2317,9 @@ virRandomInt; virResCtrlAvailable; virResCtrlInit; virResCtrlGet; +virResCtrlTypeFromString; +virResCtrlTypeToString; + # util/virrotatingfile.h virRotatingFileReaderConsume; diff --git a/src/util/virresctrl.c b/src/util/virresctrl.c index 63bc808..76d4ab8 100644 --- a/src/util/virresctrl.c +++ b/src/util/virresctrl.c @@ -41,6 +41,31 @@ VIR_LOG_INIT("util.resctrl"); #define VIR_FROM_THIS VIR_FROM_RESCTRL +VIR_ENUM_IMPL(virResCtrl, RDT_NUM_RESOURCES, + "l3", "l3data", "l3code", "l2"); + +#define CONSTRUCT_RESCTRL_PATH(domain_name, item_name) \ +do { \ + if (NULL == domain_name) { \ + if (asprintf(&path, "%s/%s", RESCTRL_DIR, item_name) < 0)\ + return -1; \ + } \ + else { \ + if (asprintf(&path, "%s/%s/%s", RESCTRL_DIR, \ + domain_name, \ + item_name) < 0) \ + return -1; \ + } \ +} while(0) + +#define VIR_RESCTRL_ENABLED(type) \ + ResCtrlAll[type].enabled + +#define VIR_RESCTRL_GET_SCHEMATA(count) ((1 << count) - 1) + +#define VIR_RESCTRL_SET_SCHEMATA(p, type, pos, val) \ + p->schematas[type]->schemata_items[pos] = val + static unsigned int host_id = 0; static virResCtrl ResCtrlAll[] = { diff --git a/src/util/virresctrl.h b/src/util/virresctrl.h index f713e66..3cc41da 100644 --- a/src/util/virresctrl.h +++ b/src/util/virresctrl.h @@ -42,9 +42,10 @@ enum { RDT_RESOURCE_L3CODE, RDT_RESOURCE_L2, /* Must be the last */ - RDT_NUM_RESOURCES, + RDT_NUM_RESOURCES }; +VIR_ENUM_DECL(virResCtrl); typedef struct _virResCacheBank virResCacheBank; typedef virResCacheBank *virResCacheBankPtr; -- 1.9.1

virResCtrlSetCacheBanks: Set cache banks of a libvirt domain. It will create new resource domain under `/sys/fs/resctrl` and fill the schemata according the cache banks configration. virResCtrlUpdate: Update the schemata after libvirt domain destroy. Signed-off-by: Eli Qiao <liyong.qiao@intel.com> --- src/libvirt_private.syms | 2 + src/util/virresctrl.c | 641 ++++++++++++++++++++++++++++++++++++++++++++++- src/util/virresctrl.h | 47 +++- 3 files changed, 688 insertions(+), 2 deletions(-) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 08a0bc4..2b3278a 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -2319,6 +2319,8 @@ virResCtrlInit; virResCtrlGet; virResCtrlTypeFromString; virResCtrlTypeToString; +virResCtrlSetCacheBanks; +virResCtrlUpdate; # util/virrotatingfile.h diff --git a/src/util/virresctrl.c b/src/util/virresctrl.c index 76d4ab8..6be581c 100644 --- a/src/util/virresctrl.c +++ b/src/util/virresctrl.c @@ -68,6 +68,11 @@ do { \ static unsigned int host_id = 0; +/* Global static struct to be maintained which is a interface */ +static virResCtrlDomain DomainAll; + +/* Global static struct array to be maintained which indicate + * resource status on a host */ static virResCtrl ResCtrlAll[] = { { .name = "L3", @@ -87,6 +92,64 @@ static virResCtrl ResCtrlAll[] = { }, }; +/* + * How many bits is set in schemata + * eg: + * virResCtrlBitsNum(1011) = 2 */ +static int virResCtrlBitsContinuesNum(unsigned schemata) { + int ret = 0; + for (int i = 0; i < MAX_CBM_BIT_LEN; i ++) { + if ((schemata & 0x1) == 0x1) + ret++; + else + break; + schemata = schemata >> 1; + if (schemata == 0) break; + } + return ret; +} + +static int virResCtrlGetStr(const char *domain_name, const char *item_name, char **ret) +{ + char *path; + int rc = 0; + + CONSTRUCT_RESCTRL_PATH(domain_name, item_name); + + if (virFileReadAll(path, MAX_FILE_LEN, ret) < 0) { + rc = -1; + goto cleanup; + } + +cleanup: + VIR_FREE(path); + return rc; +} + +static int virResCtrlGetTasks(const char *domain_name, char **pids) +{ + return virResCtrlGetStr(domain_name, "tasks", pids); +} + +static int virResCtrlGetSchemata(const int type, const char *name, char **schemata) +{ + int rc; + char *tmp, *end; + char *buf; + + if ((rc = virResCtrlGetStr(name, "schemata", &buf)) < 0) + return rc; + + tmp = strstr(buf, ResCtrlAll[type].name); + end = strchr(tmp, '\n'); + *end = '\0'; + if(VIR_STRDUP(*schemata, tmp) < 0) + rc = -1; + + VIR_FREE(buf); + return rc; +} + static int virResCtrlGetInfoStr(const int type, const char *item, char **str) { int ret = 0; @@ -109,6 +172,70 @@ cleanup: return ret; } +/* Return pointer of and ncount of schemata*/ +static virResSchemataPtr virParseSchemata(const char* schemata_str, int* ncount) +{ + const char *p, *q; + int pos; + int ischemata; + virResSchemataPtr schemata; + virResSchemataItemPtr schemataitems, tmpitem; + unsigned int socket_no = 0; + char *tmp; + + if(VIR_ALLOC(schemata) < 0) + goto cleanup; + + p = q = schemata_str; + pos = strchr(schemata_str, ':') - p; + + /* calculate cpu socket count */ + *ncount = 1; + while((q = strchr(p, ';')) != 0) { + p = q + 1; + (*ncount)++; + } + + /* allocat an arrry to store schemata for each socket*/ + if(VIR_ALLOC_N_QUIET(tmpitem, *ncount) < 0) + goto cleanup; + + schemataitems = tmpitem; + + p = q = schemata_str + pos + 1; + + while(*p != '\0'){ + if (*p == '='){ + q = p + 1; + + tmpitem->socket_no = socket_no++; + + while(*p != ';' && *p != '\0') p++; + + if (VIR_STRNDUP(tmp, q, p-q) < 0) + goto cleanup; + + if (virStrToLong_i(tmp, NULL, 16, &ischemata) < 0) + goto cleanup; + + VIR_FREE(tmp); + tmp = NULL; + tmpitem->schemata = ischemata; + tmpitem ++; + schemata->n_schemata_items +=1; + } + p++; + } + + schemata->schemata_items = schemataitems; + return schemata; + +cleanup: + VIR_FREE(schemata); + VIR_FREE(tmpitem); + return NULL; +} + static int virResCtrlGetCPUValue(const char* path, char** value) { @@ -252,6 +379,7 @@ static virResCacheBankPtr virResCtrlGetCacheBanks(int type, int* n_sockets) } bank[s_id].cache_size = cache_size; bank[s_id].cache_min = cache_size / ResCtrlAll[type].cbm_len; + bank[s_id].cache_left = cache_size - (bank[s_id].cache_min * ResCtrlAll[type].min_cbm_bits); } } return bank; @@ -268,7 +396,7 @@ static int virResCtrlGetConfig(int type) int i; char *str; - /* Read min_cbm_bits from resctrl. + /* Read num_closids from resctrl. eg: /sys/fs/resctrl/info/L3/num_closids */ if ((ret = virResCtrlGetInfoStr(type, "num_closids", &str)) < 0) { @@ -325,6 +453,512 @@ static int virResCtrlGetConfig(int type) return ret; } +/* Remove the Domain from sysfs, this should only success no pids in tasks + * of a partition. + */ +static +int virRscctrlRemoveDomain(const char *name) +{ + char *path = NULL; + int rc = 0; + + if ((rc = asprintf(&path, "%s/%s", RESCTRL_DIR, name)) < 0) + return rc; + rc = rmdir(path); + VIR_FREE(path); + return rc; +} + +/* assemble schemata string*/ +static +char* virResCtrlAssembleSchemata(virResSchemataPtr schemata, int type) +{ + virBuffer buf = VIR_BUFFER_INITIALIZER; + + virBufferAsprintf(&buf, "%s:%u=%x", ResCtrlAll[type].name, + schemata->schemata_items[0].socket_no, + schemata->schemata_items[0].schemata); + + for(int i = 1; i < schemata->n_schemata_items; i++) { + virBufferAsprintf(&buf, ";%u=%x", + schemata->schemata_items[i].socket_no, + schemata->schemata_items[i].schemata); + } + + return virBufferContentAndReset(&buf); +} + +/* Refresh default domains' schemata + */ +static +int virResCtrlRefreshSchemata(void) +{ + int i; + int j; + int k; + unsigned int tmp_schemata; + unsigned int default_schemata; + + virResDomainPtr header, p; + + header = DomainAll.domains; + + for (i = 0; i < RDT_NUM_RESOURCES; i++) { + if (VIR_RESCTRL_ENABLED(i)) { + for(j = 0; j < header->schematas[i]->n_schemata_items; j ++) { + p = header->next; + default_schemata = VIR_RESCTRL_GET_SCHEMATA(ResCtrlAll[i].cbm_len); + tmp_schemata = 0; + /* NOTEs: if only header domain, the schemata will be set to default one*/ + for (k = 1; k < DomainAll.num_domains; k++) { + tmp_schemata |= p->schematas[i]->schemata_items[j].schemata; + p = p->next; + } + /* sys fs doens't let us use 0 */ + int min_bits = VIR_RESCTRL_GET_SCHEMATA(ResCtrlAll[i].min_cbm_bits); + if((tmp_schemata & min_bits) == min_bits) + tmp_schemata -= min_bits; + + default_schemata ^= tmp_schemata; + + int bitsnum = virResCtrlBitsContinuesNum(default_schemata); + // calcuate header's schemata + // NOTES: resctrl sysfs only allow us to set a continues schemata + header->schematas[i]->schemata_items[j].schemata = VIR_RESCTRL_GET_SCHEMATA(bitsnum); + ResCtrlAll[i].cache_banks[j].cache_left = + (bitsnum - ResCtrlAll[i].min_cbm_bits) * ResCtrlAll[i].cache_banks[j].cache_min; + } + } + } + + return 0; + +} + +/* Refresh all domains', remove the domains which has no task ids. + * This will be used after VM pause, restart, destroy etc. + */ +static int +virResCtrlRefresh(void) +{ + int i; + char* tasks; + unsigned int origin_count = DomainAll.num_domains; + virResDomainPtr p, pre, del=NULL; + pre = DomainAll.domains; + p = pre->next; + + for (i = 1; i < origin_count; i++) { + if(virResCtrlGetTasks(p->name, &tasks) < 0) { + VIR_WARN("Failed to get tasks from %s", p->name); + pre = p; + p = p->next; + } + if(virStringIsEmpty(tasks)) { + pre->next = p->next; + if(p->next != NULL) + p->next->pre = pre; + + del = p; + p = p->next; + if(virRscctrlRemoveDomain(del->name) < 0) + VIR_WARN("Failed to remove partition %s", p->name); + + VIR_DEBUG("Remove partition %s", del->name); + + VIR_FREE(del->name); + VIR_FREE(del->tasks); + VIR_FREE(del); + del = NULL; + + DomainAll.num_domains -=1; + } else { + pre = p; + p = p->next; + } + VIR_FREE(tasks); + + } + + return virResCtrlRefreshSchemata(); +} + +/* Get a domain ptr by domain's name*/ +static +virResDomainPtr virResCtrlGetDomain(const char* name) { + int i; + virResDomainPtr p = DomainAll.domains; + for(i = 0; i < DomainAll.num_domains; i++) + { + if((p->name) && (strcmp(name, p->name) == 0)) { + return p; + } + p = p->next; + } + return NULL; +} + +static int +virResCtrlAddTask(virResDomainPtr dom, pid_t pid) +{ + virBuffer buf = VIR_BUFFER_INITIALIZER; + if(dom->tasks == NULL) { + virBufferAsprintf(&buf, "%lld\n", (long long)pid); + } else { + virBufferAsprintf(&buf, "%s%lld\n", dom->tasks, (long long)pid); + VIR_FREE(dom->tasks); + } + dom->tasks = virBufferContentAndReset(&buf); + return 0; +} + +static int +virResCtrlWrite(const char* name, const char* item, const char* content) +{ + char* path; + int writefd; + int rc = -1; + + CONSTRUCT_RESCTRL_PATH(name, item); + + if (!virFileExists(path)) + goto cleanup; + + if ((writefd = open(path, O_WRONLY | O_APPEND, S_IRUSR | S_IWUSR)) < 0) + goto cleanup; + + if (safewrite(writefd, content, strlen(content)) < 0) + goto cleanup; + + rc = 0; +cleanup: + VIR_FREE(path); + VIR_FORCE_CLOSE(writefd); + return rc; +} + +/* if name == NULL we load default schemata */ +static +virResDomainPtr virResCtrlLoadDomain(const char* name) +{ + char* schematas; + char* tasks = NULL; + virResDomainPtr p; + + if(VIR_ALLOC(p) < 0) + goto cleanup; + + if(name != NULL && virResCtrlGetTasks(name, &tasks) < 0) + goto cleanup; + + for(int i = 0; i < RDT_NUM_RESOURCES; i++) { + if(VIR_RESCTRL_ENABLED(i)) { + if (virResCtrlGetSchemata(i, name, &schematas) < 0) + goto cleanup; + p->schematas[i] = virParseSchemata(schematas, &(p->n_sockets)); + VIR_FREE(schematas); + } + } + + p->tasks = tasks; + + if((name != NULL) && (VIR_STRDUP(p->name, name)) < 0) + goto cleanup; + + return p; + +cleanup: + VIR_FREE(p); + return NULL; +} + +static +virResDomainPtr virResCtrlCreateDomain(const char* name) +{ + char *path; + mode_t mode = 0755; + virResDomainPtr p; + if (asprintf(&path, "%s/%s", RESCTRL_DIR, name) < 0) + return NULL; + if (virDirCreate(path, mode, 0, 0, 0) < 0) + goto cleanup; + + if((p = virResCtrlLoadDomain(name)) == NULL) + return p; + + /* sys fs doens't let us use 0. + * reset schemata to min_bits*/ + for(int i = 0; i < RDT_NUM_RESOURCES; i++) { + if(VIR_RESCTRL_ENABLED(i)) { + int min_bits = VIR_RESCTRL_GET_SCHEMATA(ResCtrlAll[i].min_cbm_bits); + for(int j = 0; j < p->n_sockets; j++) + p->schematas[i]->schemata_items[j].schemata = min_bits; + } + } + + VIR_FREE(path); + return p; + +cleanup: + VIR_FREE(path); + return NULL; +} + +static +int virResCtrlDestroyDomain(virResDomainPtr p) +{ + char *path; + if (asprintf(&path, "%s/%s", RESCTRL_DIR, p->name) < 0) + return -1; + rmdir(path); + + VIR_FREE(p->name); + p->name = NULL; + VIR_FREE(p->tasks); + VIR_FREE(p); + p = NULL; + return 0; +} + +/* flush domains's information to sysfs*/ +static int +virResCtrlFlushDomainToSysfs(virResDomainPtr dom) +{ + int i; + char* schemata; + char* tmp; + int rc = -1; + virBuffer buf = VIR_BUFFER_INITIALIZER; + + for(i = 0; i < RDT_NUM_RESOURCES; i++) { + if(VIR_RESCTRL_ENABLED(i)) { + tmp = virResCtrlAssembleSchemata(dom->schematas[i], i); + virBufferAsprintf(&buf, "%s\n", tmp); + VIR_FREE(tmp); + } + } + + schemata = virBufferContentAndReset(&buf); + + if(virResCtrlWrite(dom->name, "schemata", schemata) < 0) + goto cleanup; + if(!virStringIsEmpty(dom->tasks) + && virResCtrlWrite(dom->name, "tasks", dom->tasks) < 0) + goto cleanup; + rc = 0; + +cleanup: + VIR_FREE(schemata); + return rc; +} + +static virResDomainPtr virResCtrlGetAllDomains(unsigned int *len) +{ + struct dirent *ent; + DIR *dp = NULL; + int direrr; + + *len = 0; + virResDomainPtr header, tmp, tmp_pre; + header = tmp = tmp_pre = NULL; + if (virDirOpenQuiet(&dp, RESCTRL_DIR) < 0) { + if (errno == ENOENT) + return NULL; + VIR_ERROR(_("Unable to open %s (%d)"), RESCTRL_DIR, errno); + goto cleanup; + } + + header = virResCtrlLoadDomain(NULL); + if (header == NULL) + goto cleanup; + + header->next = NULL; + + *len = 1; + + while ((direrr = virDirRead(dp, &ent, NULL)) > 0) { + if ((ent->d_type != DT_DIR) || STREQ(ent->d_name, "info")) + continue; + + tmp = virResCtrlLoadDomain(ent->d_name); + if (tmp == NULL) + goto cleanup; + + tmp->next = NULL; + + if(header->next == NULL) + header->next = tmp; + + if(tmp_pre == NULL) + tmp->pre = header; + else { + tmp->pre = tmp_pre; + tmp_pre->next = tmp; + } + + tmp_pre = tmp; + (*len) ++; + } + return header; + +cleanup: + VIR_DIR_CLOSE(dp); + tmp_pre = tmp = header; + while(tmp) { + tmp_pre = tmp; + tmp = tmp->next; + VIR_FREE(tmp_pre); + } + return NULL; +} + +static int +virResCtrlAppendDomain(virResDomainPtr dom) +{ + virResDomainPtr p = DomainAll.domains; + while(p->next != NULL) p=p->next; + p->next = dom; + dom->pre = p; + DomainAll.num_domains +=1; + return 0; +} + +static int +virResCtrlGetSocketIdByHostID(int type, unsigned int hostid) +{ + int i; + for( i = 0; i < ResCtrlAll[type].num_banks; i++) { + if(ResCtrlAll[type].cache_banks[i].host_id == hostid) + return i; + } + return -1; +} + +static int +virResCtrlCalculateSchemata(int type, + int sid, + unsigned hostid, + unsigned long long size) +{ + int i; + int count; + virResDomainPtr p; + unsigned int tmp_schemata; + unsigned int schemata_sum = 0; + + if(ResCtrlAll[type].cache_banks[sid].cache_left < size) { + VIR_ERROR("Note enough cache left on bank %u", hostid); + return -1; + } + if ((count = size / ResCtrlAll[type].cache_banks[sid].cache_min) <= 0) { + VIR_ERROR("Error cache size %llu", size); + return -1; + } + + tmp_schemata = VIR_RESCTRL_GET_SCHEMATA(count); + + p = DomainAll.domains; + p = p->next; + for (i = 1; i < DomainAll.num_domains; i ++) { + schemata_sum |= p->schematas[type]->schemata_items[sid].schemata; + p = p->next; + } + + tmp_schemata = tmp_schemata << (ResCtrlAll[type].cbm_len - count); + + while ((tmp_schemata & schemata_sum) != 0) + tmp_schemata = tmp_schemata >> 1; + return tmp_schemata; +} + +int virResCtrlSetCacheBanks(virDomainCachetunePtr cachetune, + unsigned char* uuid, pid_t pid) +{ + size_t i; + char name[VIR_UUID_STRING_BUFLEN]; + virResDomainPtr p; + int type; + int sid; + int schemata; + + virUUIDFormat(uuid, name); + + for(i = 0 ; i < cachetune->n_banks; i++) { + VIR_DEBUG("cache_banks %u, %u, %llu, %s", + cachetune->cache_banks[i].id, + cachetune->cache_banks[i].host_id, + cachetune->cache_banks[i].size, + cachetune->cache_banks[i].type); + } + + p = virResCtrlGetDomain(name); + if (p == NULL) { + VIR_DEBUG("no domain name %s found, create new one!", name); + p = virResCtrlCreateDomain(name); + } + + if (p != NULL) { + for(i = 0 ; i < cachetune->n_banks; i++) { + if ((type = virResCtrlTypeFromString( + cachetune->cache_banks[i].type)) < 0 ) { + VIR_WARN("Ignore unknown cache type %s.", + cachetune->cache_banks[i].type); + continue; + } + + if((sid = virResCtrlGetSocketIdByHostID( + type, cachetune->cache_banks[i].host_id)) < 0) { + VIR_WARN("Can not find cache bank host id %u.", + cachetune->cache_banks[i].host_id); + continue; + } + + if((schemata = virResCtrlCalculateSchemata( + type, sid, cachetune->cache_banks[i].host_id, + cachetune->cache_banks[i].size)) < 0) { + VIR_WARN("Failed to set schemata for cache bank id %u", + cachetune->cache_banks[i].id); + continue; + } + + p->schematas[type]->schemata_items[sid].schemata = schemata; + } + + virResCtrlAddTask(p, pid); + + if(virResCtrlFlushDomainToSysfs(p) < 0) { + VIR_WARN("failed to flush domain %s to sysfs", name); + virResCtrlDestroyDomain(p); + return -1; + } + virResCtrlAppendDomain(p); + } else { + VIR_ERROR("Failed to create a domain in sysfs"); + return -1; + } + + virResCtrlRefresh(); + /* after refresh, flush header's schemata changes to sys fs */ + if(virResCtrlFlushDomainToSysfs(DomainAll.domains) < 0) + VIR_WARN("failed to flush domain to sysfs"); + + return 0; +} + +/* Should be called after pid disappeared, we recalculate + * schemata of default and flush it to sys fs. + */ +int virResCtrlUpdate(void) { + int rc; + if ((rc = virResCtrlRefresh()) < 0) + VIR_WARN("failed to refresh rescontrol"); + + if ((rc = virResCtrlFlushDomainToSysfs(DomainAll.domains)) < 0) + VIR_WARN("failed to flush domain to sysfs"); + + return rc; +} + int virResCtrlInit(void) { int i = 0; char *tmp; @@ -341,6 +975,11 @@ int virResCtrlInit(void) { VIR_FREE(tmp); } + + DomainAll.domains = virResCtrlGetAllDomains(&(DomainAll.num_domains)); + + if((rc = virResCtrlRefresh()) < 0) + VIR_WARN("failed to refresh resource control"); return rc; } diff --git a/src/util/virresctrl.h b/src/util/virresctrl.h index 3cc41da..11f43d8 100644 --- a/src/util/virresctrl.h +++ b/src/util/virresctrl.h @@ -26,6 +26,7 @@ # include "virutil.h" # include "virbitmap.h" +# include "domain_conf.h" #define RESCTRL_DIR "/sys/fs/resctrl" #define RESCTRL_INFO_DIR "/sys/fs/resctrl/info" @@ -82,9 +83,53 @@ struct _virResCtrl { virResCacheBankPtr cache_banks; }; +/** + * a virResSchemata represents a schemata object under a resource control + * domain. + */ +typedef struct _virResSchemataItem virResSchemataItem; +typedef virResSchemataItem *virResSchemataItemPtr; +struct _virResSchemataItem { + unsigned int socket_no; + unsigned schemata; +}; + +typedef struct _virResSchemata virResSchemata; +typedef virResSchemata *virResSchemataPtr; +struct _virResSchemata { + unsigned int n_schemata_items; + virResSchemataItemPtr schemata_items; +}; + +/** + * a virResDomain represents a resource control domain. It's a double linked + * list. + */ + +typedef struct _virResDomain virResDomain; +typedef virResDomain *virResDomainPtr; + +struct _virResDomain { + char* name; + virResSchemataPtr schematas[RDT_NUM_RESOURCES]; + char* tasks; + int n_sockets; + virResDomainPtr pre; + virResDomainPtr next; +}; + +/* All resource control domains on this host*/ + +typedef struct _virResCtrlDomain virResCtrlDomain; +typedef virResCtrlDomain *virResCtrlDomainPtr; +struct _virResCtrlDomain { + unsigned int num_domains; + virResDomainPtr domains; +}; bool virResCtrlAvailable(void); int virResCtrlInit(void); virResCtrlPtr virResCtrlGet(int); - +int virResCtrlSetCacheBanks(virDomainCachetunePtr, unsigned char*, pid_t); +int virResCtrlUpdate(void); #endif -- 1.9.1

Set cache banks while booting a new domain. Signed-off-by: Eli Qiao <liyong.qiao@intel.com> --- src/qemu/qemu_process.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 184440d..ec3d3c2 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -76,6 +76,7 @@ #include "configmake.h" #include "nwfilter_conf.h" #include "netdev_bandwidth_conf.h" +#include "virresctrl.h" #define VIR_FROM_THIS VIR_FROM_QEMU @@ -5714,6 +5715,11 @@ qemuProcessLaunch(virConnectPtr conn, qemuProcessAutoDestroyAdd(driver, vm, conn) < 0) goto cleanup; + VIR_DEBUG("Cache allocation"); + + if (virResCtrlSetCacheBanks(&(vm->def->cachetune), vm->def->uuid, vm->pid) < 0) + goto cleanup; + ret = 0; cleanup: @@ -6216,6 +6222,10 @@ void qemuProcessStop(virQEMUDriverPtr driver, virPerfFree(priv->perf); priv->perf = NULL; + if(virResCtrlUpdate() < 0) + VIR_WARN("Failed to update resource control for %s", + vm->def->name); + qemuProcessRemoveDomainStatus(driver, vm); /* Remove VNC and Spice ports from port reservation bitmap, but only if -- 1.9.1
participants (1)
-
Eli Qiao