On Thu, Oct 29, 2015 at 14:02:29 +0800, Qiaowei Ren wrote:
One RFC in
https://www.redhat.com/archives/libvir-list/2015-June/msg01509.html
CMT (Cache Monitoring Technology) can be used to measure the
usage of cache by VM running on the host. This patch will
extend the bulk stats API (virDomainListGetStats) to add this
field. Applications based on libvirt can use this API to achieve
cache usage of VM. Because CMT implementation in Linux kernel
is based on perf mechanism, this patch will enable perf event
for CMT when VM is created and disable it when VM is destroyed.
Signed-off-by: Qiaowei Ren <qiaowei.ren(a)intel.com>
---
include/libvirt/libvirt-domain.h | 1 +
src/qemu/qemu_domain.h | 3 ++
src/qemu/qemu_driver.c | 48 ++++++++++++++++++++++
src/qemu/qemu_process.c | 86 ++++++++++++++++++++++++++++++++++++++++
4 files changed, 138 insertions(+)
diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h
index e8202cf..fb5e1f4 100644
--- a/include/libvirt/libvirt-domain.h
+++ b/include/libvirt/libvirt-domain.h
@@ -1764,6 +1764,7 @@ typedef enum {
VIR_DOMAIN_STATS_VCPU = (1 << 3), /* return domain virtual CPU info */
VIR_DOMAIN_STATS_INTERFACE = (1 << 4), /* return domain interfaces info */
VIR_DOMAIN_STATS_BLOCK = (1 << 5), /* return domain block info */
+ VIR_DOMAIN_STATS_CACHE = (1 << 6), /* return domain block info */
This flag is not documented anywhere and the comment is completely
wrong.
} virDomainStatsTypes;
typedef enum {
diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
index 54e1e7b..31bce33 100644
--- a/src/qemu/qemu_domain.h
+++ b/src/qemu/qemu_domain.h
@@ -196,6 +196,9 @@ struct _qemuDomainObjPrivate {
bool hookRun; /* true if there was a hook run over this domain */
+ int cmt_fd; /* perf handler for CMT */
So you declare cmt_fd as int...
+
+
Why two empty lines?
/* Bitmaps below hold data from the auto NUMA feature */
virBitmapPtr autoNodeset;
virBitmapPtr autoCpuset;
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 4cfae03..8c678c9 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -19320,6 +19320,53 @@ qemuDomainGetStatsBlock(virQEMUDriverPtr driver,
#undef QEMU_ADD_COUNT_PARAM
+static int
+qemuDomainGetStatsCache(virQEMUDriverPtr driver ATTRIBUTE_UNUSED,
+ virDomainObjPtr dom,
+ virDomainStatsRecordPtr record,
+ int *maxparams,
+ unsigned int privflags ATTRIBUTE_UNUSED)
+{
+ qemuDomainObjPrivatePtr priv = dom->privateData;
+ FILE *fd;
+ unsigned long long cache = 0;
+ int scaling_factor = 0;
+
+ if (priv->cmt_fd <= 0)
Shouldn't this only check for cmt_fd < 0?
+ return -1;
+
+ if (read(priv->cmt_fd, &cache, sizeof(uint64_t)) < 0) {
Shouldn't cache be declared as uint64_t then?
+ virReportSystemError(errno, "%s",
+ _("Unable to read cache data"));
+ return -1;
+ }
+
+ fd = fopen("/sys/devices/intel_cqm/events/llc_occupancy.scale",
"r");
+ if (!fd) {
+ virReportSystemError(errno, "%s",
+ _("Unable to open CMT scale file"));
+ return -1;
+ }
+ if (fscanf(fd, "%d", &scaling_factor) != 1) {
+ virReportSystemError(errno, "%s",
+ _("Unable to read CMT scale file"));
+ VIR_FORCE_FCLOSE(fd);
+ return -1;
+ }
+ VIR_FORCE_FCLOSE(fd);
+
+ cache *= scaling_factor;
+
+ if (virTypedParamsAddULLong(&record->params,
+ &record->nparams,
+ maxparams,
+ "cache.current",
+ cache) < 0)
Any documentation of this new statistics is missing. The commit message
doesn't really help understanding it either.
+ return -1;
+
+ return 0;
+}
+
typedef int
(*qemuDomainGetStatsFunc)(virQEMUDriverPtr driver,
virDomainObjPtr dom,
@@ -19340,6 +19387,7 @@ static struct qemuDomainGetStatsWorker
qemuDomainGetStatsWorkers[] = {
{ qemuDomainGetStatsVcpu, VIR_DOMAIN_STATS_VCPU, false },
{ qemuDomainGetStatsInterface, VIR_DOMAIN_STATS_INTERFACE, false },
{ qemuDomainGetStatsBlock, VIR_DOMAIN_STATS_BLOCK, true },
+ { qemuDomainGetStatsCache, VIR_DOMAIN_STATS_CACHE, false },
{ NULL, 0, false }
};
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index ba84182..00b889d 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -25,8 +25,11 @@
#include <unistd.h>
#include <signal.h>
#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
#if defined(__linux__)
# include <linux/capability.h>
+# include <linux/perf_event.h>
What if there's no perf_event.h header file?
#elif defined(__FreeBSD__)
# include <sys/param.h>
# include <sys/cpuset.h>
@@ -4274,6 +4277,75 @@ qemuLogOperation(virDomainObjPtr vm,
goto cleanup;
}
+/*
+ * Enable CMT(Cache Monitoring Technology) to measure the usage of
+ * cache by VM running on the node.
+ *
+ * Because the hypervisor implement CMT support basedon perf mechanism,
+ * we should enable perf event for CMT. The function 'sys_erf_event_open'
+ * is perf syscall wrapper.
+ */
+#ifdef __linux__
+static long sys_perf_event_open(struct perf_event_attr *hw_event,
+ pid_t pid, int cpu, int group_fd,
One parameter per line is easier to read.
+ unsigned long flags)
+{
+ return syscall(__NR_perf_event_open, hw_event, pid, cpu,
+ group_fd, flags);
Wrong indentation.
+}
+static int qemuCmtEnable(virDomainObjPtr vm)
+{
+ qemuDomainObjPrivatePtr priv = vm->privateData;
+ struct perf_event_attr cmt_attr;
+ int event_type;
+ FILE *fp;
+
+ fp = fopen("/sys/devices/intel_cqm/type", "r");
+ if (!fp) {
+ virReportSystemError(errno, "%s",
+ _("CMT is not available on this host"));
+ return -1;
+ }
+ if (fscanf(fp, "%d", &event_type) != 1) {
+ virReportSystemError(errno, "%s",
+ _("Unable to read event type file."));
+ VIR_FORCE_FCLOSE(fp);
+ return -1;
+ }
+ VIR_FORCE_FCLOSE(fp);
+
+ memset(&cmt_attr, 0, sizeof(struct perf_event_attr));
+ cmt_attr.size = sizeof(struct perf_event_attr);
+ cmt_attr.type = event_type;
+ cmt_attr.config = 1;
+ cmt_attr.inherit = 1;
+ cmt_attr.disabled = 1;
+ cmt_attr.enable_on_exec = 0;
+
+ priv->cmt_fd = sys_perf_event_open(&cmt_attr, vm->pid, -1, -1, 0);
...while here you assign long (from sys_perf_event_open) to cmt_fd.
+ if (priv->cmt_fd < 0) {
+ virReportSystemError(errno,
+ _("Unable to open perf type=%d for pid=%d"),
+ event_type, vm->pid);
+ return -1;
+ }
+
+ if (ioctl(priv->cmt_fd, PERF_EVENT_IOC_ENABLE) < 0) {
+ virReportSystemError(errno, "%s",
+ _("Unable to enable perf event for CMT"));
+ return -1;
+ }
+
+ return 0;
+}
+#else
+static int qemuCmtEnable(virDomainObjPtr vm)
+{
+ virReportUnsupportedError();
+ return -1;
+}
+#endif
+
int qemuProcessStart(virConnectPtr conn,
virQEMUDriverPtr driver,
virDomainObjPtr vm,
@@ -4954,6 +5026,11 @@ int qemuProcessStart(virConnectPtr conn,
if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm) < 0)
goto cleanup;
+ VIR_DEBUG("Setting CMT perf counter");
+ if (qemuCmtEnable(vm) < 0)
+ virReportSystemError(errno, "%s",
+ _("CMT is not available on this host"));
+
By setting en error here, you overwrite any useful error set inside
qemuCmtEnable. Anyway since you apparently want to make the error non
fatal, you should not even set it. The messages should be logged as
warnings or even lower.
If there is no performance or other impact of having cmt enabled, we
could enable it on hosts that support it, but we either need to log any
errors with info level or first check whether the host supports cmt and
only try to enable it if it does, in which case we could report the
errors as warnings.
However, if a negative impact of enabling cmt is possible, we need an
explicit configuration option (turned off by default) that would ask
libvirt to enable cmt. Any errors could be logged as warnings in this
case.
And what if libvirtd is restarted? Shouldn't we call sys_perf_event_open
again?
/* finally we can call the 'started' hook script if any
*/
if (virHookPresent(VIR_HOOK_DRIVER_QEMU)) {
char *xml = qemuDomainDefFormatXML(driver, vm->def, 0);
@@ -5122,6 +5199,15 @@ void qemuProcessStop(virQEMUDriverPtr driver,
virPortAllocatorRelease(driver->migrationPorts, priv->nbdPort);
priv->nbdPort = 0;
+ /* Disable CMT */
+ if (priv->cmt_fd > 0) {
cmt_fd >= 0 ?
+ if (ioctl(priv->cmt_fd, PERF_EVENT_IOC_DISABLE) < 0)
{
+ virReportSystemError(errno, "%s",
+ _("Unable to disable perf event for CMT"));
A warning should be good enough.
+ }
+ VIR_FORCE_CLOSE(priv->cmt_fd);
+ }
+
This should go into a dedicated function.
if (priv->agent) {
qemuAgentClose(priv->agent);
priv->agent = NULL;
Jirka