[libvirt] [PATCH 1/1] perf: add support to perf event for MBM

Some Intel processor families (e.g. the Intel Xeon processor E5 v3 family) introduced some RDT (Resource Director Technology) features to monitor or control shared resource. Among these features, MBM (Memory Bandwidth Monitoring), which is build on the CMT (Cache Monitoring Technology) infrastructure, provides OS/VMM a way to monitor bandwidth from one level of cache to another. With current perf framework, this patch adds support to perf event for MBM. Signed-off-by: Qiaowei Ren <qiaowei.ren@intel.com> --- include/libvirt/libvirt-domain.h | 18 ++++++++++++ src/qemu/qemu_driver.c | 35 +++++++++++++++++++---- src/util/virperf.c | 60 ++++++++++++++++++++++++---------------- src/util/virperf.h | 2 ++ 4 files changed, 85 insertions(+), 30 deletions(-) diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h index 160f20f..e4594f0 100644 --- a/include/libvirt/libvirt-domain.h +++ b/include/libvirt/libvirt-domain.h @@ -1904,6 +1904,24 @@ void virDomainStatsRecordListFree(virDomainStatsRecordPtr *stats); */ # define VIR_PERF_PARAM_CMT "cmt" +/** + * VIR_PERF_PARAM_MBMT: + * + * Macro for typed parameter name that represents MBMT perf event + * which can be used to monitor total system bandwidth (bytes/s) + * from one level of cache to another. + */ +# define VIR_PERF_PARAM_MBMT "mbmt" + +/** + * VIR_PERF_PARAM_MBML: + * + * Macro for typed parameter name that represents MBML perf event + * which can be used to monitor the amount of data (bytes/s) sent + * through the memory controller on the socket. + */ +# define VIR_PERF_PARAM_MBML "mbml" + int virDomainGetPerfEvents(virDomainPtr dom, virTypedParameterPtr *params, int *nparams, diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index c4c4968..a3bd7ec 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -10051,6 +10051,8 @@ qemuDomainSetPerfEvents(virDomainPtr dom, if (virTypedParamsValidate(params, nparams, VIR_PERF_PARAM_CMT, VIR_TYPED_PARAM_BOOLEAN, + VIR_PERF_PARAM_MBMT, VIR_TYPED_PARAM_BOOLEAN, + VIR_PERF_PARAM_MBML, VIR_TYPED_PARAM_BOOLEAN, NULL) < 0) return -1; @@ -19495,20 +19497,38 @@ qemuDomainGetStatsBlock(virQEMUDriverPtr driver, #undef QEMU_ADD_COUNT_PARAM static int -qemuDomainGetStatsPerfCmt(virPerfPtr perf, +qemuDomainGetStatsPerfRdt(virPerfPtr perf, + virPerfEventType type, virDomainStatsRecordPtr record, int *maxparams) { - uint64_t cache = 0; + uint64_t value = 0; - if (virPerfReadEvent(perf, VIR_PERF_EVENT_CMT, &cache) < 0) + if (virPerfReadEvent(perf, type, &value) < 0) return -1; - if (virTypedParamsAddULLong(&record->params, + if (type == VIR_PERF_EVENT_CMT && + virTypedParamsAddULLong(&record->params, &record->nparams, maxparams, "perf.cache", - cache) < 0) + value) < 0) + return -1; + + if (type == VIR_PERF_EVENT_MBMT && + virTypedParamsAddULLong(&record->params, + &record->nparams, + maxparams, + "perf.total_bytes", + value) < 0) + return -1; + + if (type == VIR_PERF_EVENT_MBML && + virTypedParamsAddULLong(&record->params, + &record->nparams, + maxparams, + "perf.local_bytes", + value) < 0) return -1; return 0; @@ -19531,7 +19551,10 @@ qemuDomainGetStatsPerf(virQEMUDriverPtr driver ATTRIBUTE_UNUSED, switch (i) { case VIR_PERF_EVENT_CMT: - if (qemuDomainGetStatsPerfCmt(priv->perf, record, maxparams) < 0) + case VIR_PERF_EVENT_MBMT: + case VIR_PERF_EVENT_MBML: + if (qemuDomainGetStatsPerfRdt(priv->perf, i, + record, maxparams) < 0) goto cleanup; break; } diff --git a/src/util/virperf.c b/src/util/virperf.c index bd65587..826d36a 100644 --- a/src/util/virperf.c +++ b/src/util/virperf.c @@ -38,7 +38,7 @@ VIR_LOG_INIT("util.perf"); #define VIR_FROM_THIS VIR_FROM_PERF VIR_ENUM_IMPL(virPerfEvent, VIR_PERF_EVENT_LAST, - "cmt"); + "cmt", "mbmt", "mbml"); struct virPerfEvent { int type; @@ -110,10 +110,10 @@ virPerfGetEvent(virPerfPtr perf, } static int -virPerfCmtEnable(virPerfEventPtr event, +virPerfRdtEnable(virPerfEventPtr event, pid_t pid) { - struct perf_event_attr cmt_attr; + struct perf_event_attr rdt_attr; char *buf = NULL; char *tmp = NULL; unsigned int event_type, scale; @@ -127,32 +127,42 @@ virPerfCmtEnable(virPerfEventPtr event, if (virStrToLong_ui(buf, NULL, 10, &event_type) < 0) { virReportSystemError(errno, "%s", - _("failed to get cmt event type")); + _("failed to get rdt event type")); goto error; } VIR_FREE(buf); - if (virFileReadAll("/sys/devices/intel_cqm/events/llc_occupancy.scale", - 10, &buf) < 0) - goto error; + memset(&rdt_attr, 0, sizeof(rdt_attr)); + rdt_attr.size = sizeof(rdt_attr); + rdt_attr.type = event_type; + rdt_attr.inherit = 1; + rdt_attr.disabled = 1; + rdt_attr.enable_on_exec = 0; - if (virStrToLong_ui(buf, NULL, 10, &scale) < 0) { - virReportSystemError(errno, "%s", - _("failed to get cmt scaling factor")); - goto error; + switch (event->type) { + case VIR_PERF_EVENT_CMT: + if (virFileReadAll("/sys/devices/intel_cqm/events/llc_occupancy.scale", + 10, &buf) < 0) + goto error; + + if (virStrToLong_ui(buf, NULL, 10, &scale) < 0) { + virReportSystemError(errno, "%s", + _("failed to get cmt scaling factor")); + goto error; + } + event->efields.cmt.scale = scale; + + rdt_attr.config = 1; + break; + case VIR_PERF_EVENT_MBMT: + rdt_attr.config = 2; + break; + case VIR_PERF_EVENT_MBML: + rdt_attr.config = 3; + break; } - event->efields.cmt.scale = scale; - - memset(&cmt_attr, 0, sizeof(cmt_attr)); - cmt_attr.size = sizeof(cmt_attr); - cmt_attr.type = event_type; - cmt_attr.config = 1; - cmt_attr.inherit = 1; - cmt_attr.disabled = 1; - cmt_attr.enable_on_exec = 0; - - event->fd = syscall(__NR_perf_event_open, &cmt_attr, pid, -1, -1, 0); + event->fd = syscall(__NR_perf_event_open, &rdt_attr, pid, -1, -1, 0); if (event->fd < 0) { virReportSystemError(errno, _("Unable to open perf type=%d for pid=%d"), @@ -162,7 +172,7 @@ virPerfCmtEnable(virPerfEventPtr event, if (ioctl(event->fd, PERF_EVENT_IOC_ENABLE) < 0) { virReportSystemError(errno, "%s", - _("Unable to enable perf event for CMT")); + _("Unable to enable perf event for RDT")); goto error; } @@ -186,7 +196,9 @@ virPerfEventEnable(virPerfPtr perf, switch (type) { case VIR_PERF_EVENT_CMT: - if (virPerfCmtEnable(event, pid) < 0) + case VIR_PERF_EVENT_MBMT: + case VIR_PERF_EVENT_MBML: + if (virPerfRdtEnable(event, pid) < 0) return -1; break; case VIR_PERF_EVENT_LAST: diff --git a/src/util/virperf.h b/src/util/virperf.h index 8ec8753..769e85a 100644 --- a/src/util/virperf.h +++ b/src/util/virperf.h @@ -26,6 +26,8 @@ typedef enum { VIR_PERF_EVENT_CMT, + VIR_PERF_EVENT_MBMT, + VIR_PERF_EVENT_MBML, VIR_PERF_EVENT_LAST } virPerfEventType; -- 1.9.1

On Thu, May 12, 2016 at 14:55:16 +0800, Qiaowei Ren wrote:
Some Intel processor families (e.g. the Intel Xeon processor E5 v3 family) introduced some RDT (Resource Director Technology) features to monitor or control shared resource. Among these features, MBM (Memory Bandwidth Monitoring), which is build on the CMT (Cache Monitoring Technology) infrastructure, provides OS/VMM a way to monitor bandwidth from one level of cache to another.
With current perf framework, this patch adds support to perf event for MBM.
Signed-off-by: Qiaowei Ren <qiaowei.ren@intel.com> --- include/libvirt/libvirt-domain.h | 18 ++++++++++++ src/qemu/qemu_driver.c | 35 +++++++++++++++++++---- src/util/virperf.c | 60 ++++++++++++++++++++++++---------------- src/util/virperf.h | 2 ++ 4 files changed, 85 insertions(+), 30 deletions(-)
diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h index 160f20f..e4594f0 100644 --- a/include/libvirt/libvirt-domain.h +++ b/include/libvirt/libvirt-domain.h @@ -1904,6 +1904,24 @@ void virDomainStatsRecordListFree(virDomainStatsRecordPtr *stats); */ # define VIR_PERF_PARAM_CMT "cmt"
+/** + * VIR_PERF_PARAM_MBMT: + * + * Macro for typed parameter name that represents MBMT perf event + * which can be used to monitor total system bandwidth (bytes/s) + * from one level of cache to another. + */ +# define VIR_PERF_PARAM_MBMT "mbmt" + +/** + * VIR_PERF_PARAM_MBML: + * + * Macro for typed parameter name that represents MBML perf event + * which can be used to monitor the amount of data (bytes/s) sent + * through the memory controller on the socket. + */ +# define VIR_PERF_PARAM_MBML "mbml"
So I was a little bit mistaken what this is documenting. So these document the arguments for virDomainSetPerfEvents where the perf stuff is enabled. I was actually thinking this is documenting the values that are returned via the virConnectGetAllDomainStats/virDomainListGetStats APIs. I've just noticed that the perf events are not documented at all there. This should be fixed also for the CMT event. This part of the documentation should then state which fields in the *Stats APIs the perf events correspond to. In the documentation for the fields returned bulk Stats API the individual fields should be marked by which event they are produced.
+ int virDomainGetPerfEvents(virDomainPtr dom, virTypedParameterPtr *params, int *nparams, diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index c4c4968..a3bd7ec 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -10051,6 +10051,8 @@ qemuDomainSetPerfEvents(virDomainPtr dom,
if (virTypedParamsValidate(params, nparams, VIR_PERF_PARAM_CMT, VIR_TYPED_PARAM_BOOLEAN, + VIR_PERF_PARAM_MBMT, VIR_TYPED_PARAM_BOOLEAN, + VIR_PERF_PARAM_MBML, VIR_TYPED_PARAM_BOOLEAN, NULL) < 0) return -1;
@@ -19495,20 +19497,38 @@ qemuDomainGetStatsBlock(virQEMUDriverPtr driver, #undef QEMU_ADD_COUNT_PARAM
static int -qemuDomainGetStatsPerfCmt(virPerfPtr perf, +qemuDomainGetStatsPerfRdt(virPerfPtr perf, + virPerfEventType type, virDomainStatsRecordPtr record, int *maxparams) { - uint64_t cache = 0; + uint64_t value = 0;
- if (virPerfReadEvent(perf, VIR_PERF_EVENT_CMT, &cache) < 0) + if (virPerfReadEvent(perf, type, &value) < 0) return -1;
- if (virTypedParamsAddULLong(&record->params, + if (type == VIR_PERF_EVENT_CMT && + virTypedParamsAddULLong(&record->params, &record->nparams, maxparams, "perf.cache", - cache) < 0) + value) < 0) + return -1; + + if (type == VIR_PERF_EVENT_MBMT && + virTypedParamsAddULLong(&record->params, + &record->nparams, + maxparams, + "perf.total_bytes",
If you pass this string into this function you won't need to special case all the different types. Also by naming the fields similarly to the name of the feature (virPerfEvent enum) you could entirely avoid the need to pass the name of the value as it could be generated by the virPerfEventTypeToString. (concatenate it with 'perf.' to get 'perf.mbmt', 'perf.cmt' and 'perf.mbml'). Unfortunately the implementation of the CMT event already used a different name for the stats entry. Since this actually was never documented I would be in favor of changing perf.cache to perf.cmt, so that the macros can be used. I cc'd danpb who reviewed and pushed the first impl for another opinion. [...]
diff --git a/src/util/virperf.c b/src/util/virperf.c index bd65587..826d36a 100644 --- a/src/util/virperf.c +++ b/src/util/virperf.c
[...]
@@ -127,32 +127,42 @@ virPerfCmtEnable(virPerfEventPtr event,
if (virStrToLong_ui(buf, NULL, 10, &event_type) < 0) { virReportSystemError(errno, "%s", - _("failed to get cmt event type")); + _("failed to get rdt event type"));
[...]
@@ -162,7 +172,7 @@ virPerfCmtEnable(virPerfEventPtr event,
if (ioctl(event->fd, PERF_EVENT_IOC_ENABLE) < 0) { virReportSystemError(errno, "%s", - _("Unable to enable perf event for CMT")); + _("Unable to enable perf event for RDT"));
Both error messages could keep the name of the event by using virPerfEventTypeToString. Peter

-----Original Message----- From: Peter Krempa [mailto:pkrempa@redhat.com] Sent: Thursday, May 12, 2016 4:31 PM To: Ren, Qiaowei <qiaowei.ren@intel.com> Cc: libvir-list@redhat.com; berrange@redhat.com Subject: Re: [libvirt] [PATCH 1/1] perf: add support to perf event for MBM
On Thu, May 12, 2016 at 14:55:16 +0800, Qiaowei Ren wrote:
Some Intel processor families (e.g. the Intel Xeon processor E5 v3 family) introduced some RDT (Resource Director Technology) features to monitor or control shared resource. Among these features, MBM (Memory Bandwidth Monitoring), which is build on the CMT (Cache Monitoring Technology) infrastructure, provides OS/VMM a way to monitor bandwidth from one level of cache to another.
With current perf framework, this patch adds support to perf event for MBM.
Signed-off-by: Qiaowei Ren <qiaowei.ren@intel.com> --- include/libvirt/libvirt-domain.h | 18 ++++++++++++ src/qemu/qemu_driver.c | 35 +++++++++++++++++++---- src/util/virperf.c | 60 ++++++++++++++++++++++++---------------- src/util/virperf.h | 2 ++ 4 files changed, 85 insertions(+), 30 deletions(-)
diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h index 160f20f..e4594f0 100644 --- a/include/libvirt/libvirt-domain.h +++ b/include/libvirt/libvirt-domain.h @@ -1904,6 +1904,24 @@ void virDomainStatsRecordListFree(virDomainStatsRecordPtr *stats); */ # define VIR_PERF_PARAM_CMT "cmt"
+/** + * VIR_PERF_PARAM_MBMT: + * + * Macro for typed parameter name that represents MBMT perf event + * which can be used to monitor total system bandwidth (bytes/s) + * from one level of cache to another. + */ +# define VIR_PERF_PARAM_MBMT "mbmt" + +/** + * VIR_PERF_PARAM_MBML: + * + * Macro for typed parameter name that represents MBML perf event + * which can be used to monitor the amount of data (bytes/s) sent + * through the memory controller on the socket. + */ +# define VIR_PERF_PARAM_MBML "mbml"
So I was a little bit mistaken what this is documenting. So these document the arguments for virDomainSetPerfEvents where the perf stuff is enabled.
I was actually thinking this is documenting the values that are returned via the virConnectGetAllDomainStats/virDomainListGetStats APIs. I've just noticed that the perf events are not documented at all there.
This should be fixed also for the CMT event. This part of the documentation should then state which fields in the *Stats APIs the perf events correspond to.
In the documentation for the fields returned bulk Stats API the individual fields should be marked by which event they are produced.
Sure. The documentation will be added.
+ int virDomainGetPerfEvents(virDomainPtr dom, virTypedParameterPtr *params, int *nparams, diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index c4c4968..a3bd7ec 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -10051,6 +10051,8 @@ qemuDomainSetPerfEvents(virDomainPtr dom,
if (virTypedParamsValidate(params, nparams, VIR_PERF_PARAM_CMT, VIR_TYPED_PARAM_BOOLEAN, + VIR_PERF_PARAM_MBMT, VIR_TYPED_PARAM_BOOLEAN, + VIR_PERF_PARAM_MBML, + VIR_TYPED_PARAM_BOOLEAN, NULL) < 0) return -1;
@@ -19495,20 +19497,38 @@ qemuDomainGetStatsBlock(virQEMUDriverPtr driver, #undef QEMU_ADD_COUNT_PARAM
static int -qemuDomainGetStatsPerfCmt(virPerfPtr perf, +qemuDomainGetStatsPerfRdt(virPerfPtr perf, + virPerfEventType type, virDomainStatsRecordPtr record, int *maxparams) { - uint64_t cache = 0; + uint64_t value = 0;
- if (virPerfReadEvent(perf, VIR_PERF_EVENT_CMT, &cache) < 0) + if (virPerfReadEvent(perf, type, &value) < 0) return -1;
- if (virTypedParamsAddULLong(&record->params, + if (type == VIR_PERF_EVENT_CMT && + virTypedParamsAddULLong(&record->params, &record->nparams, maxparams, "perf.cache", - cache) < 0) + value) < 0) + return -1; + + if (type == VIR_PERF_EVENT_MBMT && + virTypedParamsAddULLong(&record->params, + &record->nparams, + maxparams, + "perf.total_bytes",
If you pass this string into this function you won't need to special case all the different types.
Also by naming the fields similarly to the name of the feature (virPerfEvent enum) you could entirely avoid the need to pass the name of the value as it could be generated by the virPerfEventTypeToString. (concatenate it with 'perf.' to get 'perf.mbmt', 'perf.cmt' and 'perf.mbml').
Unfortunately the implementation of the CMT event already used a different name for the stats entry.
Since this actually was never documented I would be in favor of changing perf.cache to perf.cmt, so that the macros can be used. I cc'd danpb who reviewed and pushed the first impl for another opinion.
Yes. I guess also that the new perf feature should be not used by customers and maybe we can change perf.cache to perf.cmt. Otherwise we can only use the macros for mbmt & mbml Thanks, Qiaowei
participants (3)
-
Peter Krempa
-
Qiaowei Ren
-
Ren, Qiaowei