tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265

[
    {
        "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
        "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)",
        "MetricGroup": "TopdownL1",
        "MetricName": "Frontend_Bound",
        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound."
    },
    {
        "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
        "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
        "MetricGroup": "TopdownL1_SMT",
        "MetricName": "Frontend_Bound_SMT",
        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU."
    },
    {
        "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
        "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD)",
        "MetricGroup": "TopdownL1",
        "MetricName": "Bad_Speculation",
        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example."
    },
    {
        "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.",
        "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
        "MetricGroup": "TopdownL1_SMT",
        "MetricName": "Bad_Speculation_SMT",
        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU."
    },
    {
        "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
        "MetricConstraint": "NO_NMI_WATCHDOG",
        "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) )",
        "MetricGroup": "TopdownL1",
        "MetricName": "Backend_Bound",
        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound."
    },
    {
        "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
        "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) )",
        "MetricGroup": "TopdownL1_SMT",
        "MetricName": "Backend_Bound_SMT",
        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU."
    },
    {
        "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)",
        "MetricGroup": "TopdownL1",
        "MetricName": "Retiring",
        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved.  Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. "
    },
    {
        "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.",
        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
        "MetricGroup": "TopdownL1_SMT",
        "MetricName": "Retiring_SMT",
        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved.  Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. SMT version; use when SMT is enabled and measuring per logical CPU."
    },
    {
        "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
        "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
        "MetricGroup": "Ret;Summary",
        "MetricName": "IPC"
    },
    {
        "BriefDescription": "Uops Per Instruction",
        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
        "MetricGroup": "Pipeline;Ret;Retire",
        "MetricName": "UPI"
    },
    {
        "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
        "MetricExpr": "1 / (INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD)",
        "MetricGroup": "Pipeline;Mem",
        "MetricName": "CPI"
    },
    {
        "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
        "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
        "MetricGroup": "Pipeline",
        "MetricName": "CLKS"
    },
    {
        "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
        "MetricExpr": "4 * CPU_CLK_UNHALTED.THREAD",
        "MetricGroup": "TmaL1",
        "MetricName": "SLOTS"
    },
    {
        "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
        "MetricExpr": "4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
        "MetricGroup": "TmaL1_SMT",
        "MetricName": "SLOTS_SMT"
    },
    {
        "BriefDescription": "The ratio of Executed- by Issued-Uops",
        "MetricExpr": "UOPS_DISPATCHED.THREAD / UOPS_ISSUED.ANY",
        "MetricGroup": "Cor;Pipeline",
        "MetricName": "Execute_per_Issue",
        "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage."
    },
    {
        "BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)",
        "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
        "MetricGroup": "Ret;SMT;TmaL1",
        "MetricName": "CoreIPC"
    },
    {
        "BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)",
        "MetricExpr": "INST_RETIRED.ANY / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
        "MetricGroup": "Ret;SMT;TmaL1_SMT",
        "MetricName": "CoreIPC_SMT"
    },
    {
        "BriefDescription": "Floating Point Operations Per Cycle",
        "MetricExpr": "( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE ) / CPU_CLK_UNHALTED.THREAD",
        "MetricGroup": "Ret;Flops",
        "MetricName": "FLOPc"
    },
    {
        "BriefDescription": "Floating Point Operations Per Cycle",
        "MetricExpr": "( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
        "MetricGroup": "Ret;Flops_SMT",
        "MetricName": "FLOPc_SMT"
    },
    {
        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
        "MetricExpr": "UOPS_DISPATCHED.THREAD / (( cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2 ) if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@)",
        "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
        "MetricName": "ILP"
    },
    {
        "BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
        "MetricExpr": "( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
        "MetricGroup": "SMT",
        "MetricName": "CORE_CLKS"
    },
    {
        "BriefDescription": "Total number of retired Instructions, Sample with: INST_RETIRED.PREC_DIST",
        "MetricExpr": "INST_RETIRED.ANY",
        "MetricGroup": "Summary;TmaL1",
        "MetricName": "Instructions"
    },
    {
        "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / cpu@UOPS_RETIRED.RETIRE_SLOTS\\,cmask\\=1@",
        "MetricGroup": "Pipeline;Ret",
        "MetricName": "Retire"
    },
    {
        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
        "MetricExpr": "IDQ.DSB_UOPS / (( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS ) )",
        "MetricGroup": "DSB;Fed;FetchBW",
        "MetricName": "DSB_Coverage"
    },
    {
        "BriefDescription": "Average CPU Utilization",
        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
        "MetricGroup": "HPC;Summary",
        "MetricName": "CPU_Utilization"
    },
    {
        "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
        "MetricExpr": "(CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC) * msr@tsc@ / 1000000000 / duration_time",
        "MetricGroup": "Summary;Power",
        "MetricName": "Average_Frequency"
    },
    {
        "BriefDescription": "Giga Floating Point Operations Per Second",
        "MetricExpr": "( ( 1 * ( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * ( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8 * SIMD_FP_256.PACKED_SINGLE ) / 1000000000 ) / duration_time",
        "MetricGroup": "Cor;Flops;HPC",
        "MetricName": "GFLOPs",
        "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
    },
    {
        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
        "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
        "MetricGroup": "Power",
        "MetricName": "Turbo_Utilization"
    },
    {
        "BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
        "MetricExpr": "1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
        "MetricGroup": "SMT",
        "MetricName": "SMT_2T_Utilization"
    },
    {
        "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
        "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
        "MetricGroup": "OS",
        "MetricName": "Kernel_Utilization"
    },
    {
        "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
        "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
        "MetricGroup": "OS",
        "MetricName": "Kernel_CPI"
    },
    {
        "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
        "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time",
        "MetricGroup": "HPC;Mem;MemoryBW;SoC",
        "MetricName": "DRAM_BW_Use"
    },
    {
        "BriefDescription": "Socket actual clocks when any core is active on that socket",
        "MetricExpr": "cbox_0@event\\=0x0@",
        "MetricGroup": "SoC",
        "MetricName": "Socket_CLKS"
    },
    {
        "BriefDescription": "Uncore frequency per die [GHZ]",
        "MetricExpr": "cbox_0@event\\=0x0@ / #num_dies / duration_time / 1000000000",
        "MetricGroup": "SoC",
        "MetricName": "UNCORE_FREQ"
    },
    {
        "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
        "MetricGroup": "Branches;OS",
        "MetricName": "IpFarBranch"
    },
    {
        "BriefDescription": "C3 residency percent per core",
        "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
        "MetricGroup": "Power",
        "MetricName": "C3_Core_Residency"
    },
    {
        "BriefDescription": "C6 residency percent per core",
        "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
        "MetricGroup": "Power",
        "MetricName": "C6_Core_Residency"
    },
    {
        "BriefDescription": "C7 residency percent per core",
        "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
        "MetricGroup": "Power",
        "MetricName": "C7_Core_Residency"
    },
    {
        "BriefDescription": "C2 residency percent per package",
        "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
        "MetricGroup": "Power",
        "MetricName": "C2_Pkg_Residency"
    },
    {
        "BriefDescription": "C3 residency percent per package",
        "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
        "MetricGroup": "Power",
        "MetricName": "C3_Pkg_Residency"
    },
    {
        "BriefDescription": "C6 residency percent per package",
        "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
        "MetricGroup": "Power",
        "MetricName": "C6_Pkg_Residency"
    },
    {
        "BriefDescription": "C7 residency percent per package",
        "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
        "MetricGroup": "Power",
        "MetricName": "C7_Pkg_Residency"
    }
]