[libvirt] [PATCH] perf: add one more perf event support

With current perf framework, this patch adds support and documentation for branch instructions perf event. Signed-off-by: Nitesh Konkar <nitkon12@linux.vnet.ibm.com> --- docs/formatdomain.html.in | 6 ++++ docs/schemas/domaincommon.rng | 1 + include/libvirt/libvirt-domain.h | 10 +++++++ src/libvirt-domain.c | 38 +++++++++++++------------ src/qemu/qemu_driver.c | 1 + src/util/virperf.c | 6 +++- src/util/virperf.h | 9 +++--- tests/genericxml2xmlindata/generic-perf.xml | 1 + tools/virsh.pod | 43 +++++++++++++++-------------- 9 files changed, 72 insertions(+), 43 deletions(-) diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 6bd02cc..259b2c6 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -1927,6 +1927,7 @@ <event name='instructions' enabled='yes'/> <event name='cache_references' enabled='no'/> <event name='cache_misses' enabled='no'/> + <event name='branch_instructions' enabled='no'/> </perf> ... </pre> @@ -1972,6 +1973,11 @@ <td>the count of cache misses by applications running on the platform</td> <td><code>perf.cache_misses</code></td> </tr> + <tr> + <td><code>hardware_instructions</code></td> + <td>the count of hardware instructions by applications running on the platform</td> + <td><code>perf.hardware_instructions</code></td> + </tr> </table> <h3><a name="elementsDevices">Devices</a></h3> diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index bb903ef..5fdc036 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -427,6 +427,7 @@ <value>instructions</value> <value>cache_references</value> <value>cache_misses</value> + <value>branch_instructions</value> </choice> </attribute> <attribute name="enabled"> diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h index a8435ab..aad2541 100644 --- a/include/libvirt/libvirt-domain.h +++ b/include/libvirt/libvirt-domain.h @@ -2125,6 +2125,16 @@ void virDomainStatsRecordListFree(virDomainStatsRecordPtr *stats); */ # define VIR_PERF_PARAM_CPU_CYCLES "cpu_cycles" +/** + * VIR_PERF_PARAM_BRANCH_INSTRUCTIONS: + * + * Macro for typed parameter name that represents branch instructions + * perf event which can be used to measure the count of branch instructions + * by applications running on the platform. It corresponds to the + * "perf.branch_instructions" field in the *Stats APIs. + */ +# define VIR_PERF_PARAM_BRANCH_INSTRUCTIONS "branch_instructions" + int virDomainGetPerfEvents(virDomainPtr dom, virTypedParameterPtr *params, int *nparams, diff --git a/src/libvirt-domain.c b/src/libvirt-domain.c index ce199f0..c12c87f 100644 --- a/src/libvirt-domain.c +++ b/src/libvirt-domain.c @@ -11195,24 +11195,26 @@ virConnectGetDomainCapabilities(virConnectPtr conn, * * VIR_DOMAIN_STATS_PERF: Return perf event statistics. * The typed parameter keys are in this format: - * "perf.cmt" - the usage of l3 cache (bytes) by applications running on the - * platform as unsigned long long. It is produced by cmt perf - * event. - * "perf.mbmt" - the total system bandwidth (bytes/s) from one level of cache - * to another as unsigned long long. It is produced by mbmt perf - * event. - * "perf.mbml" - the amount of data (bytes/s) sent through the memory controller - * on the socket as unsigned long long. It is produced by mbml - * perf event. - * "perf.cache_misses" - the count of cache misses as unsigned long long. - * It is produced by cache_misses perf event. - * "perf.cache_references" - the count of cache hits as unsigned long long. - * It is produced by cache_references perf event. - * "perf.instructions" - The count of instructions as unsigned long long. - * It is produced by instructions perf event. - * "perf.cpu_cycles" - The count of cpu cycles (total/elapsed) as an - * unsigned long long. It is produced by cpu_cycles - * perf event. + * "perf.cmt" - the usage of l3 cache (bytes) by applications running on the + * platform as unsigned long long. It is produced by cmt perf + * event. + * "perf.mbmt" - the total system bandwidth (bytes/s) from one level of cache + * to another as unsigned long long. It is produced by mbmt perf + * event. + * "perf.mbml" - the amount of data (bytes/s) sent through the memory controller + * on the socket as unsigned long long. It is produced by mbml + * perf event. + * "perf.cache_misses" - the count of cache misses as unsigned long long. + * It is produced by cache_misses perf event. + * "perf.cache_references" - the count of cache hits as unsigned long long. + * It is produced by cache_references perf event. + * "perf.instructions" - The count of instructions as unsigned long long. + * It is produced by instructions perf event. + * "perf.cpu_cycles" - The count of cpu cycles (total/elapsed) as an + * unsigned long long. It is produced by cpu_cycles + * perf event. + * "perf.branch_instructions" - The count of branch instructions as unsigned long long. + * It is produced by branch_instructions perf event. * * Note that entire stats groups or individual stat fields may be missing from * the output in case they are not supported by the given hypervisor, are not diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 3517aa2..c7fad30 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -9852,6 +9852,7 @@ qemuDomainSetPerfEvents(virDomainPtr dom, VIR_PERF_PARAM_INSTRUCTIONS, VIR_TYPED_PARAM_BOOLEAN, VIR_PERF_PARAM_CACHE_REFERENCES, VIR_TYPED_PARAM_BOOLEAN, VIR_PERF_PARAM_CACHE_MISSES, VIR_TYPED_PARAM_BOOLEAN, + VIR_PERF_PARAM_BRANCH_INSTRUCTIONS, VIR_TYPED_PARAM_BOOLEAN, NULL) < 0) return -1; diff --git a/src/util/virperf.c b/src/util/virperf.c index 5d57962..635faf1 100644 --- a/src/util/virperf.c +++ b/src/util/virperf.c @@ -40,7 +40,8 @@ VIR_LOG_INIT("util.perf"); VIR_ENUM_IMPL(virPerfEvent, VIR_PERF_EVENT_LAST, "cmt", "mbmt", "mbml", "cpu_cycles", "instructions", - "cache_references", "cache_misses"); + "cache_references", "cache_misses", + "branch_instructions"); struct virPerfEvent { int type; @@ -85,6 +86,9 @@ static struct virPerfEventAttr attrs[] = { {.type = VIR_PERF_EVENT_CACHE_MISSES, .attrType = PERF_TYPE_HARDWARE, .attrConfig = PERF_COUNT_HW_CACHE_MISSES}, + {.type = VIR_PERF_EVENT_BRANCH_INSTRUCTIONS, + .attrType = PERF_TYPE_HARDWARE, + .attrConfig = PERF_COUNT_HW_BRANCH_INSTRUCTIONS}, }; typedef struct virPerfEventAttr *virPerfEventAttrPtr; diff --git a/src/util/virperf.h b/src/util/virperf.h index 3fca2d0..e43f332 100644 --- a/src/util/virperf.h +++ b/src/util/virperf.h @@ -32,10 +32,11 @@ typedef enum { VIR_PERF_EVENT_MBMT, /* Memory Bandwidth Monitoring Total */ VIR_PERF_EVENT_MBML, /* Memory Bandwidth Monitor Limit for controller */ - VIR_PERF_EVENT_CPU_CYCLES, /* Count of CPU Cycles (total/elapsed) */ - VIR_PERF_EVENT_INSTRUCTIONS, /* Count of instructions for application */ - VIR_PERF_EVENT_CACHE_REFERENCES, /* Cache hits by applications */ - VIR_PERF_EVENT_CACHE_MISSES, /* Cache misses by applications */ + VIR_PERF_EVENT_CPU_CYCLES, /* Count of CPU Cycles (total/elapsed) */ + VIR_PERF_EVENT_INSTRUCTIONS, /* Count of instructions for application */ + VIR_PERF_EVENT_CACHE_REFERENCES, /* Cache hits by applications */ + VIR_PERF_EVENT_CACHE_MISSES, /* Cache misses by applications */ + VIR_PERF_EVENT_BRANCH_INSTRUCTIONS, /* Count of branch instructions by applications*/ VIR_PERF_EVENT_LAST } virPerfEventType; diff --git a/tests/genericxml2xmlindata/generic-perf.xml b/tests/genericxml2xmlindata/generic-perf.xml index a914133..92e5847 100644 --- a/tests/genericxml2xmlindata/generic-perf.xml +++ b/tests/genericxml2xmlindata/generic-perf.xml @@ -20,6 +20,7 @@ <event name='instructions' enabled='yes'/> <event name='cache_references' enabled='no'/> <event name='cache_misses' enabled='no'/> + <event name='branch_instructions' enabled='yes'/> </perf> <devices> </devices> diff --git a/tools/virsh.pod b/tools/virsh.pod index 247d235..aa5b756 100644 --- a/tools/virsh.pod +++ b/tools/virsh.pod @@ -945,7 +945,8 @@ I<--perf> returns the statistics of all enabled perf events: "perf.cpu_cycles" - the count of cpu cycles (total/elapsed), "perf.instructions" - the count of instructions, "perf.cache_references" - the count of cache hits, -"perf.cache_misses" - the count of caches misses +"perf.cache_misses" - the count of caches misses, +"perf.branch_instructions" - the count of branch instructions See the B<perf> command for more details about each event. @@ -2270,25 +2271,27 @@ performance event. B<eventSpec> is a string list of one or more events separated by commas. Valid event names are as follows: B<Valid perf event names> - cmt - A PQos (Platform Qos) feature to monitor the - usage of cache by applications running on the - platform. - mbmt - Provides a way to monitor the total system - memory bandwidth between one level of cache - and another. - mbml - Provides a way to limit the amount of data - (bytes/s) send through the memory controller - on the socket. - cache_misses - Provides the count of cache misses by - applications running on the platform. - cache_references - Provides the count of cache hits by - applications running on th e platform. - instructions - Provides the count of instructions executed - by applications running on the platform. - cpu_cycles - Provides the count of cpu cycles - (total/elapsed). May be used with - instructions in order to get a cycles - per instruction. + cmt - A PQos (Platform Qos) feature to monitor the + usage of cache by applications running on the + platform. + mbmt - Provides a way to monitor the total system + memory bandwidth between one level of cache + and another. + mbml - Provides a way to limit the amount of data + (bytes/s) send through the memory controller + on the socket. + cache_misses - Provides the count of cache misses by + applications running on the platform. + cache_references - Provides the count of cache hits by + applications running on th e platform. + instructions - Provides the count of instructions executed + by applications running on the platform. + cpu_cycles - Provides the count of cpu cycles + (total/elapsed). May be used with + instructions in order to get a cycles + per instruction. + branch_instructions - Provides the count of branch instructions + executed by applications running on the platform. B<Note>: The statistics can be retrieved using the B<domstats> command using the I<--perf> flag. -- 1.9.3

On Tue, Dec 06, 2016 at 15:52:08 +0530, Nitesh Konkar wrote:
With current perf framework, this patch adds support and documentation for branch instructions perf event.
Signed-off-by: Nitesh Konkar <nitkon12@linux.vnet.ibm.com> --- docs/formatdomain.html.in | 6 ++++ docs/schemas/domaincommon.rng | 1 + include/libvirt/libvirt-domain.h | 10 +++++++ src/libvirt-domain.c | 38 +++++++++++++------------ src/qemu/qemu_driver.c | 1 + src/util/virperf.c | 6 +++- src/util/virperf.h | 9 +++--- tests/genericxml2xmlindata/generic-perf.xml | 1 + tools/virsh.pod | 43 +++++++++++++++-------------- 9 files changed, 72 insertions(+), 43 deletions(-)
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 6bd02cc..259b2c6 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -1927,6 +1927,7 @@ <event name='instructions' enabled='yes'/> <event name='cache_references' enabled='no'/> <event name='cache_misses' enabled='no'/> + <event name='branch_instructions' enabled='no'/> </perf> ... </pre> @@ -1972,6 +1973,11 @@ <td>the count of cache misses by applications running on the platform</td> <td><code>perf.cache_misses</code></td> </tr> + <tr> + <td><code>hardware_instructions</code></td> + <td>the count of hardware instructions by applications running on the platform</td> + <td><code>perf.hardware_instructions</code></td> + </tr> </table>
The two sections above reference a different name. Similarly to the rest of the code.

[...]
--- a/src/libvirt-domain.c +++ b/src/libvirt-domain.c @@ -11195,24 +11195,26 @@ virConnectGetDomainCapabilities(virConnectPtr conn, * * VIR_DOMAIN_STATS_PERF: Return perf event statistics. * The typed parameter keys are in this format: - * "perf.cmt" - the usage of l3 cache (bytes) by applications running on the - * platform as unsigned long long. It is produced by cmt perf - * event. - * "perf.mbmt" - the total system bandwidth (bytes/s) from one level of cache - * to another as unsigned long long. It is produced by mbmt perf - * event. - * "perf.mbml" - the amount of data (bytes/s) sent through the memory controller - * on the socket as unsigned long long. It is produced by mbml - * perf event. - * "perf.cache_misses" - the count of cache misses as unsigned long long. - * It is produced by cache_misses perf event. - * "perf.cache_references" - the count of cache hits as unsigned long long. - * It is produced by cache_references perf event. - * "perf.instructions" - The count of instructions as unsigned long long. - * It is produced by instructions perf event. - * "perf.cpu_cycles" - The count of cpu cycles (total/elapsed) as an - * unsigned long long. It is produced by cpu_cycles - * perf event. + * "perf.cmt" - the usage of l3 cache (bytes) by applications running on the + * platform as unsigned long long. It is produced by cmt perf + * event. + * "perf.mbmt" - the total system bandwidth (bytes/s) from one level of cache + * to another as unsigned long long. It is produced by mbmt perf + * event. + * "perf.mbml" - the amount of data (bytes/s) sent through the memory controller + * on the socket as unsigned long long. It is produced by mbml + * perf event. + * "perf.cache_misses" - the count of cache misses as unsigned long long. + * It is produced by cache_misses perf event. + * "perf.cache_references" - the count of cache hits as unsigned long long. + * It is produced by cache_references perf event. + * "perf.instructions" - The count of instructions as unsigned long long. + * It is produced by instructions perf event. + * "perf.cpu_cycles" - The count of cpu cycles (total/elapsed) as an + * unsigned long long. It is produced by cpu_cycles + * perf event. + * "perf.branch_instructions" - The count of branch instructions as unsigned long long. + * It is produced by branch_instructions perf event.
In addition to Peter's comments, the above hunk with adding spaces makes the lines go longer than 80 characters making things mostly unreadable. In the long run the data gets used to generate the webpage docs, see: http://libvirt.org/html/libvirt-libvirt-domain.html#virConnectGetAllDomainSt... I think a better way to format is seen in virDomainMemoryStats. It doesn't fit perfectly, but sure looks better than the mass of text that results. In any case, when you do this kind of formatting change - you need to check what gets generated to see if it looks reasonable *and* it would need to be in a separate patch. [...]
--- a/src/util/virperf.h +++ b/src/util/virperf.h @@ -32,10 +32,11 @@ typedef enum { VIR_PERF_EVENT_MBMT, /* Memory Bandwidth Monitoring Total */ VIR_PERF_EVENT_MBML, /* Memory Bandwidth Monitor Limit for controller */
- VIR_PERF_EVENT_CPU_CYCLES, /* Count of CPU Cycles (total/elapsed) */ - VIR_PERF_EVENT_INSTRUCTIONS, /* Count of instructions for application */ - VIR_PERF_EVENT_CACHE_REFERENCES, /* Cache hits by applications */ - VIR_PERF_EVENT_CACHE_MISSES, /* Cache misses by applications */ + VIR_PERF_EVENT_CPU_CYCLES, /* Count of CPU Cycles (total/elapsed) */ + VIR_PERF_EVENT_INSTRUCTIONS, /* Count of instructions for application */ + VIR_PERF_EVENT_CACHE_REFERENCES, /* Cache hits by applications */ + VIR_PERF_EVENT_CACHE_MISSES, /* Cache misses by applications */ + VIR_PERF_EVENT_BRANCH_INSTRUCTIONS, /* Count of branch instructions by applications*/
Again - same here. There's no need for the extra spaces. [...]
--- a/tools/virsh.pod +++ b/tools/virsh.pod @@ -945,7 +945,8 @@ I<--perf> returns the statistics of all enabled perf events: "perf.cpu_cycles" - the count of cpu cycles (total/elapsed), "perf.instructions" - the count of instructions, "perf.cache_references" - the count of cache hits, -"perf.cache_misses" - the count of caches misses +"perf.cache_misses" - the count of caches misses, +"perf.branch_instructions" - the count of branch instructions
See the B<perf> command for more details about each event.
@@ -2270,25 +2271,27 @@ performance event. B<eventSpec> is a string list of one or more events separated by commas. Valid event names are as follows:
B<Valid perf event names> - cmt - A PQos (Platform Qos) feature to monitor the - usage of cache by applications running on the - platform. - mbmt - Provides a way to monitor the total system - memory bandwidth between one level of cache - and another. - mbml - Provides a way to limit the amount of data - (bytes/s) send through the memory controller - on the socket. - cache_misses - Provides the count of cache misses by - applications running on the platform. - cache_references - Provides the count of cache hits by - applications running on th e platform. - instructions - Provides the count of instructions executed - by applications running on the platform. - cpu_cycles - Provides the count of cpu cycles - (total/elapsed). May be used with - instructions in order to get a cycles - per instruction. + cmt - A PQos (Platform Qos) feature to monitor the + usage of cache by applications running on the + platform. + mbmt - Provides a way to monitor the total system + memory bandwidth between one level of cache + and another. + mbml - Provides a way to limit the amount of data + (bytes/s) send through the memory controller + on the socket. + cache_misses - Provides the count of cache misses by + applications running on the platform. + cache_references - Provides the count of cache hits by + applications running on th e platform. + instructions - Provides the count of instructions executed + by applications running on the platform. + cpu_cycles - Provides the count of cpu cycles + (total/elapsed). May be used with + instructions in order to get a cycles + per instruction. + branch_instructions - Provides the count of branch instructions + executed by applications running on the platform.
B<Note>: The statistics can be retrieved using the B<domstats> command using the I<--perf> flag.
And this just makes the output unreadable on an 80 character wide display/terminal.... It's much easier to view in your branch just use 'man tools/virsh.1' and search on 'cmt' (with your 80 column wide terminal/window). John
participants (3)
-
John Ferlan
-
Nitesh Konkar
-
Peter Krempa