From: Greg Kroah-Hartman Date: Mon, 4 Apr 2022 11:06:01 +0000 (+0200) Subject: 5.17-stable patches X-Git-Tag: v5.17.2~54 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=a4cf07a31012185e4f1f142a29841256bf8b19b9;p=thirdparty%2Fkernel%2Fstable-queue.git 5.17-stable patches added patches: arm-dts-spear1340-update-serial-node-properties.patch arm-dts-spear13xx-update-spi-dma-properties.patch arm64-dts-ls1043a-update-i2c-dma-properties.patch arm64-dts-ls1046a-update-i2c-node-dma-properties.patch asoc-sof-intel-fix-build-error-without-snd_soc_sof_pci_dev.patch asoc-topology-allow-tlv-control-to-be-either-read-or-write.patch docs-sysctl-kernel-add-missing-bit-to-panic_print.patch ice-xsk-fix-indexing-in-ice_tx_xsk_pool.patch media-ov6650-add-try-support-to-selection-api-operations.patch media-ov6650-fix-crop-rectangle-affected-by-set-format.patch n64cart-convert-bi_disk-to-bi_bdev-bd_disk-fix-build.patch openvswitch-fixed-nd-target-mask-field-in-the-flow-dump.patch perf-vendor-events-update-metrics-for-skylake-server.patch pinctrl-canonical-rsel-resistance-selection-property.patch spi-mediatek-support-tick_delay-without-enhance_timing.patch torture-make-torture.sh-help-message-match-reality.patch um-fix-uml_mconsole-stop-go.patch vdpa-mlx5-avoid-processing-works-if-workqueue-was-destroyed.patch xsk-do-not-write-null-in-sw-ring-at-allocation-failure.patch --- diff --git a/queue-5.17/arm-dts-spear1340-update-serial-node-properties.patch b/queue-5.17/arm-dts-spear1340-update-serial-node-properties.patch new file mode 100644 index 00000000000..d30355bc7ea --- /dev/null +++ b/queue-5.17/arm-dts-spear1340-update-serial-node-properties.patch @@ -0,0 +1,37 @@ +From 583d6b0062640def86f3265aa1042ecb6672516e Mon Sep 17 00:00:00 2001 +From: Kuldeep Singh +Date: Sat, 26 Mar 2022 09:53:10 +0530 +Subject: ARM: dts: spear1340: Update serial node properties + +From: Kuldeep Singh + +commit 583d6b0062640def86f3265aa1042ecb6672516e upstream. + +Reorder dma and dma-names property for serial node to make it compliant +with bindings. + +Fixes: 6e8887f60f60 ("ARM: SPEAr13xx: Pass generic DW DMAC platform data from DT") +Signed-off-by: Kuldeep Singh +Acked-by: Viresh Kumar +Link: https://lore.kernel.org/r/20220326042313.97862-3-singh.kuldeep87k@gmail.com' +Signed-off-by: Arnd Bergmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm/boot/dts/spear1340.dtsi | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/arch/arm/boot/dts/spear1340.dtsi ++++ b/arch/arm/boot/dts/spear1340.dtsi +@@ -134,9 +134,9 @@ + reg = <0xb4100000 0x1000>; + interrupts = <0 105 0x4>; + status = "disabled"; +- dmas = <&dwdma0 12 0 1>, +- <&dwdma0 13 1 0>; +- dma-names = "tx", "rx"; ++ dmas = <&dwdma0 13 0 1>, ++ <&dwdma0 12 1 0>; ++ dma-names = "rx", "tx"; + }; + + thermal@e07008c4 { diff --git a/queue-5.17/arm-dts-spear13xx-update-spi-dma-properties.patch b/queue-5.17/arm-dts-spear13xx-update-spi-dma-properties.patch new file mode 100644 index 00000000000..e528d2e4d34 --- /dev/null +++ b/queue-5.17/arm-dts-spear13xx-update-spi-dma-properties.patch @@ -0,0 +1,37 @@ +From 31d3687d6017c7ce6061695361598d9cda70807a Mon Sep 17 00:00:00 2001 +From: Kuldeep Singh +Date: Sat, 26 Mar 2022 09:53:09 +0530 +Subject: ARM: dts: spear13xx: Update SPI dma properties + +From: Kuldeep Singh + +commit 31d3687d6017c7ce6061695361598d9cda70807a upstream. + +Reorder dmas and dma-names property for spi controller node to make it +compliant with bindings. + +Fixes: 6e8887f60f60 ("ARM: SPEAr13xx: Pass generic DW DMAC platform data from DT") +Signed-off-by: Kuldeep Singh +Acked-by: Viresh Kumar +Link: https://lore.kernel.org/r/20220326042313.97862-2-singh.kuldeep87k@gmail.com' +Signed-off-by: Arnd Bergmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm/boot/dts/spear13xx.dtsi | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/arch/arm/boot/dts/spear13xx.dtsi ++++ b/arch/arm/boot/dts/spear13xx.dtsi +@@ -284,9 +284,9 @@ + #size-cells = <0>; + interrupts = <0 31 0x4>; + status = "disabled"; +- dmas = <&dwdma0 4 0 0>, +- <&dwdma0 5 0 0>; +- dma-names = "tx", "rx"; ++ dmas = <&dwdma0 5 0 0>, ++ <&dwdma0 4 0 0>; ++ dma-names = "rx", "tx"; + }; + + rtc@e0580000 { diff --git a/queue-5.17/arm64-dts-ls1043a-update-i2c-dma-properties.patch b/queue-5.17/arm64-dts-ls1043a-update-i2c-dma-properties.patch new file mode 100644 index 00000000000..92e2fac61bc --- /dev/null +++ b/queue-5.17/arm64-dts-ls1043a-update-i2c-dma-properties.patch @@ -0,0 +1,37 @@ +From aa4df840d1c5eab2bb33695efe4409b3e5526749 Mon Sep 17 00:00:00 2001 +From: Kuldeep Singh +Date: Sat, 26 Mar 2022 09:53:11 +0530 +Subject: arm64: dts: ls1043a: Update i2c dma properties + +From: Kuldeep Singh + +commit aa4df840d1c5eab2bb33695efe4409b3e5526749 upstream. + +Reorder dmas and dma-names properties for i2c controller node to make it +compliant with bindings. + +Fixes: 6d453cd22357 ("arm64: dts: add Freescale LS1043a SoC support") +Signed-off-by: Kuldeep Singh +CC: soc@kernel.org +Link: https://lore.kernel.org/r/20220326042313.97862-4-singh.kuldeep87k@gmail.com' +Signed-off-by: Arnd Bergmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi ++++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi +@@ -536,9 +536,9 @@ + clock-names = "i2c"; + clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL + QORIQ_CLK_PLL_DIV(1)>; +- dmas = <&edma0 1 39>, +- <&edma0 1 38>; +- dma-names = "tx", "rx"; ++ dmas = <&edma0 1 38>, ++ <&edma0 1 39>; ++ dma-names = "rx", "tx"; + status = "disabled"; + }; + diff --git a/queue-5.17/arm64-dts-ls1046a-update-i2c-node-dma-properties.patch b/queue-5.17/arm64-dts-ls1046a-update-i2c-node-dma-properties.patch new file mode 100644 index 00000000000..2b090824859 --- /dev/null +++ b/queue-5.17/arm64-dts-ls1046a-update-i2c-node-dma-properties.patch @@ -0,0 +1,37 @@ +From eeeb4f1075d71d67083c75f69247206e9b3d9f4a Mon Sep 17 00:00:00 2001 +From: Kuldeep Singh +Date: Sat, 26 Mar 2022 09:53:12 +0530 +Subject: arm64: dts: ls1046a: Update i2c node dma properties + +From: Kuldeep Singh + +commit eeeb4f1075d71d67083c75f69247206e9b3d9f4a upstream. + +Reorder dmas and dma-names properties for i2c controller node to make it +compliant with bindings. + +Fixes: 8126d88162a5 ("arm64: dts: add QorIQ LS1046A SoC support") +Signed-off-by: Kuldeep Singh +CC: soc@kernel.org +Link: https://lore.kernel.org/r/20220326042313.97862-5-singh.kuldeep87k@gmail.com' +Signed-off-by: Arnd Bergmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi ++++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi +@@ -499,9 +499,9 @@ + interrupts = ; + clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL + QORIQ_CLK_PLL_DIV(2)>; +- dmas = <&edma0 1 39>, +- <&edma0 1 38>; +- dma-names = "tx", "rx"; ++ dmas = <&edma0 1 38>, ++ <&edma0 1 39>; ++ dma-names = "rx", "tx"; + status = "disabled"; + }; + diff --git a/queue-5.17/asoc-sof-intel-fix-build-error-without-snd_soc_sof_pci_dev.patch b/queue-5.17/asoc-sof-intel-fix-build-error-without-snd_soc_sof_pci_dev.patch new file mode 100644 index 00000000000..a62363aa892 --- /dev/null +++ b/queue-5.17/asoc-sof-intel-fix-build-error-without-snd_soc_sof_pci_dev.patch @@ -0,0 +1,39 @@ +From 664d66dc0a64b32e60a5ad59a9aebb08676a612b Mon Sep 17 00:00:00 2001 +From: Zheng Bin +Date: Wed, 23 Mar 2022 17:25:01 +0800 +Subject: ASoC: SOF: Intel: Fix build error without SND_SOC_SOF_PCI_DEV + +From: Zheng Bin + +commit 664d66dc0a64b32e60a5ad59a9aebb08676a612b upstream. + +If SND_SOC_SOF_PCI_DEV is n, bulding fails: + +sound/soc/sof/intel/pci-tng.o:(.data+0x1c0): undefined reference to `sof_pci_probe' +sound/soc/sof/intel/pci-tng.o:(.data+0x1c8): undefined reference to `sof_pci_remove' +sound/soc/sof/intel/pci-tng.o:(.data+0x1e0): undefined reference to `sof_pci_shutdown' +sound/soc/sof/intel/pci-tng.o:(.data+0x290): undefined reference to `sof_pci_pm' + +Make SND_SOC_SOF_MERRIFIELD select SND_SOC_SOF_PCI_DEV to fix this. + +Fixes: 8d4ba1be3d22 ("ASoC: SOF: pci: split PCI into different drivers") +Reported-by: Hulk Robot +Signed-off-by: Zheng Bin +Acked-by: Pierre-Louis Bossart +Link: https://lore.kernel.org/r/20220323092501.145879-1-zhengbin13@huawei.com +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman +--- + sound/soc/sof/intel/Kconfig | 1 + + 1 file changed, 1 insertion(+) + +--- a/sound/soc/sof/intel/Kconfig ++++ b/sound/soc/sof/intel/Kconfig +@@ -84,6 +84,7 @@ if SND_SOC_SOF_PCI + config SND_SOC_SOF_MERRIFIELD + tristate "SOF support for Tangier/Merrifield" + default SND_SOC_SOF_PCI ++ select SND_SOC_SOF_PCI_DEV + select SND_SOC_SOF_INTEL_ATOM_HIFI_EP + help + This adds support for Sound Open Firmware for Intel(R) platforms diff --git a/queue-5.17/asoc-topology-allow-tlv-control-to-be-either-read-or-write.patch b/queue-5.17/asoc-topology-allow-tlv-control-to-be-either-read-or-write.patch new file mode 100644 index 00000000000..67bb0135cfd --- /dev/null +++ b/queue-5.17/asoc-topology-allow-tlv-control-to-be-either-read-or-write.patch @@ -0,0 +1,44 @@ +From feb00b736af64875560f371fe7f58b0b7f239046 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Amadeusz=20S=C5=82awi=C5=84ski?= + +Date: Wed, 12 Jan 2022 18:00:29 +0100 +Subject: ASoC: topology: Allow TLV control to be either read or write +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Amadeusz Sławiński + +commit feb00b736af64875560f371fe7f58b0b7f239046 upstream. + +There is no reason to force readwrite access on TLV controls. It can be +either read, write or both. This is further evidenced in code where it +performs following checks: + if ((k->access & SNDRV_CTL_ELEM_ACCESS_TLV_READ) && !sbe->get) + return -EINVAL; + if ((k->access & SNDRV_CTL_ELEM_ACCESS_TLV_WRITE) && !sbe->put) + return -EINVAL; + +Fixes: 1a3232d2f61d ("ASoC: topology: Add support for TLV bytes controls") +Signed-off-by: Amadeusz Sławiński +Reviewed-by: Cezary Rojewski +Reviewed-by: Pierre-Louis Bossart +Link: https://lore.kernel.org/r/20220112170030.569712-3-amadeuszx.slawinski@linux.intel.com +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman +--- + sound/soc/soc-topology.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/sound/soc/soc-topology.c ++++ b/sound/soc/soc-topology.c +@@ -512,7 +512,8 @@ static int soc_tplg_kcontrol_bind_io(str + + if (le32_to_cpu(hdr->ops.info) == SND_SOC_TPLG_CTL_BYTES + && k->iface & SNDRV_CTL_ELEM_IFACE_MIXER +- && k->access & SNDRV_CTL_ELEM_ACCESS_TLV_READWRITE ++ && (k->access & SNDRV_CTL_ELEM_ACCESS_TLV_READ ++ || k->access & SNDRV_CTL_ELEM_ACCESS_TLV_WRITE) + && k->access & SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK) { + struct soc_bytes_ext *sbe; + struct snd_soc_tplg_bytes_control *be; diff --git a/queue-5.17/docs-sysctl-kernel-add-missing-bit-to-panic_print.patch b/queue-5.17/docs-sysctl-kernel-add-missing-bit-to-panic_print.patch new file mode 100644 index 00000000000..b80035c6e66 --- /dev/null +++ b/queue-5.17/docs-sysctl-kernel-add-missing-bit-to-panic_print.patch @@ -0,0 +1,55 @@ +From a1ff1de00db21ecb956213f046b79741b64c6b65 Mon Sep 17 00:00:00 2001 +From: "Guilherme G. Piccoli" +Date: Wed, 23 Mar 2022 16:07:03 -0700 +Subject: docs: sysctl/kernel: add missing bit to panic_print + +From: Guilherme G. Piccoli + +commit a1ff1de00db21ecb956213f046b79741b64c6b65 upstream. + +Patch series "Some improvements on panic_print". + +This is a mix of a documentation fix with some additions to the +"panic_print" syscall / parameter. The goal here is being able to collect +all CPUs backtraces during a panic event and also to enable "panic_print" +in a kdump event - details of the reasoning and design choices in the +patches. + +This patch (of 3): + +Commit de6da1e8bcf0 ("panic: add an option to replay all the printk +message in buffer") added a new bit to the sysctl/kernel parameter +"panic_print", but the documentation was added only in +kernel-parameters.txt, not in the sysctl guide. + +Fix it here by adding bit 5 to sysctl admin-guide documentation. + +[rdunlap@infradead.org: fix table format warning] + Link: https://lkml.kernel.org/r/20220109055635.6999-1-rdunlap@infradead.org + +Link: https://lkml.kernel.org/r/20211109202848.610874-1-gpiccoli@igalia.com +Link: https://lkml.kernel.org/r/20211109202848.610874-2-gpiccoli@igalia.com +Fixes: de6da1e8bcf0 ("panic: add an option to replay all the printk message in buffer") +Signed-off-by: Guilherme G. Piccoli +Reviewed-by: Feng Tang +Cc: Luis Chamberlain +Cc: Kees Cook +Cc: Iurii Zaikin +Cc: Samuel Iglesias Gonsalvez +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/admin-guide/sysctl/kernel.rst | 1 + + 1 file changed, 1 insertion(+) + +--- a/Documentation/admin-guide/sysctl/kernel.rst ++++ b/Documentation/admin-guide/sysctl/kernel.rst +@@ -795,6 +795,7 @@ bit 1 print system memory info + bit 2 print timer info + bit 3 print locks info if ``CONFIG_LOCKDEP`` is on + bit 4 print ftrace buffer ++bit 5 print all printk messages in buffer + ===== ============================================ + + So for example to print tasks and memory info on panic, user can:: diff --git a/queue-5.17/ice-xsk-fix-indexing-in-ice_tx_xsk_pool.patch b/queue-5.17/ice-xsk-fix-indexing-in-ice_tx_xsk_pool.patch new file mode 100644 index 00000000000..9351f2f43f0 --- /dev/null +++ b/queue-5.17/ice-xsk-fix-indexing-in-ice_tx_xsk_pool.patch @@ -0,0 +1,48 @@ +From 1ac2524de7b366633fc336db6c94062768d0ab03 Mon Sep 17 00:00:00 2001 +From: Maciej Fijalkowski +Date: Mon, 28 Mar 2022 16:21:23 +0200 +Subject: ice: xsk: Fix indexing in ice_tx_xsk_pool() + +From: Maciej Fijalkowski + +commit 1ac2524de7b366633fc336db6c94062768d0ab03 upstream. + +Ice driver tries to always create XDP rings array to be +num_possible_cpus() sized, regardless of user's queue count setting that +can be changed via ethtool -L for example. + +Currently, ice_tx_xsk_pool() calculates the qid by decrementing the +ring->q_index by the count of XDP queues, but ring->q_index is set to 'i ++ vsi->alloc_txq'. + +When user did ethtool -L $IFACE combined 1, alloc_txq is 1, but +vsi->num_xdp_txq is still num_possible_cpus(). Then, ice_tx_xsk_pool() +will do OOB access and in the final result ring would not get xsk_pool +pointer assigned. Then, each ice_xsk_wakeup() call will fail with error +and it will not be possible to get into NAPI and do the processing from +driver side. + +Fix this by decrementing vsi->alloc_txq instead of vsi->num_xdp_txq from +ring-q_index in ice_tx_xsk_pool() so the calculation is reflected to the +setting of ring->q_index. + +Fixes: 22bf877e528f ("ice: introduce XDP_TX fallback path") +Signed-off-by: Maciej Fijalkowski +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20220328142123.170157-5-maciej.fijalkowski@intel.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/intel/ice/ice.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/intel/ice/ice.h ++++ b/drivers/net/ethernet/intel/ice/ice.h +@@ -712,7 +712,7 @@ static inline struct xsk_buff_pool *ice_ + struct ice_vsi *vsi = ring->vsi; + u16 qid; + +- qid = ring->q_index - vsi->num_xdp_txq; ++ qid = ring->q_index - vsi->alloc_txq; + + if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps)) + return NULL; diff --git a/queue-5.17/media-ov6650-add-try-support-to-selection-api-operations.patch b/queue-5.17/media-ov6650-add-try-support-to-selection-api-operations.patch new file mode 100644 index 00000000000..20cb5869f9f --- /dev/null +++ b/queue-5.17/media-ov6650-add-try-support-to-selection-api-operations.patch @@ -0,0 +1,144 @@ +From c74052646496ffe0bc606152e6b9653137020cbf Mon Sep 17 00:00:00 2001 +From: Janusz Krzysztofik +Date: Mon, 4 May 2020 00:06:17 +0200 +Subject: media: ov6650: Add try support to selection API operations + +From: Janusz Krzysztofik + +commit c74052646496ffe0bc606152e6b9653137020cbf upstream. + +Try requests are now only supported by format processing pad operations +implemented by the driver. The driver selection API operations +currently respond to them with -EINVAL. While that is correct, it +constraints video device drivers to not use subdevice cropping at all +while processing user requested active frame size, otherwise their set +try format results might differ from active. As a consequence, we +can't fix set format pad operation as not to touch crop rectangle since +that would affect users not being able to set arbitrary frame sizes. +Moreover, without a working set try selection support we are not able +to use pad config crop rectangle as a reference while processing set +try format requests. + +Implement missing try selection support. Moreover, as it will be now +possible to maintain the pad config crop rectangle via selection API, +start using it instead of the active one as a reference while +processing set try format requests. + +is_unscaled_ok() helper, now also called from set selection operation, +has been just moved up in the source file to avoid a prototype, with no +functional changes. + +[Sakari Ailus: Rebase on subdev state patches] + +Fixes: 717fd5b4907a ("[media] v4l2: replace try_mbus_fmt by set_fmt") +Signed-off-by: Janusz Krzysztofik +Signed-off-by: Sakari Ailus +Signed-off-by: Mauro Carvalho Chehab +Signed-off-by: Greg Kroah-Hartman +--- + drivers/media/i2c/ov6650.c | 54 +++++++++++++++++++++++++++++++++++---------- + 1 file changed, 43 insertions(+), 11 deletions(-) + +--- a/drivers/media/i2c/ov6650.c ++++ b/drivers/media/i2c/ov6650.c +@@ -472,9 +472,16 @@ static int ov6650_get_selection(struct v + { + struct i2c_client *client = v4l2_get_subdevdata(sd); + struct ov6650 *priv = to_ov6650(client); ++ struct v4l2_rect *rect; + +- if (sel->which != V4L2_SUBDEV_FORMAT_ACTIVE) +- return -EINVAL; ++ if (sel->which == V4L2_SUBDEV_FORMAT_TRY) { ++ /* pre-select try crop rectangle */ ++ rect = &sd_state->pads->try_crop; ++ ++ } else { ++ /* pre-select active crop rectangle */ ++ rect = &priv->rect; ++ } + + switch (sel->target) { + case V4L2_SEL_TGT_CROP_BOUNDS: +@@ -483,14 +490,22 @@ static int ov6650_get_selection(struct v + sel->r.width = W_CIF; + sel->r.height = H_CIF; + return 0; ++ + case V4L2_SEL_TGT_CROP: +- sel->r = priv->rect; ++ /* use selected crop rectangle */ ++ sel->r = *rect; + return 0; ++ + default: + return -EINVAL; + } + } + ++static bool is_unscaled_ok(int width, int height, struct v4l2_rect *rect) ++{ ++ return width > rect->width >> 1 || height > rect->height >> 1; ++} ++ + static void ov6650_bind_align_crop_rectangle(struct v4l2_rect *rect) + { + v4l_bound_align_image(&rect->width, 2, W_CIF, 1, +@@ -510,12 +525,30 @@ static int ov6650_set_selection(struct v + struct ov6650 *priv = to_ov6650(client); + int ret; + +- if (sel->which != V4L2_SUBDEV_FORMAT_ACTIVE || +- sel->target != V4L2_SEL_TGT_CROP) ++ if (sel->target != V4L2_SEL_TGT_CROP) + return -EINVAL; + + ov6650_bind_align_crop_rectangle(&sel->r); + ++ if (sel->which == V4L2_SUBDEV_FORMAT_TRY) { ++ struct v4l2_rect *crop = &sd_state->pads->try_crop; ++ struct v4l2_mbus_framefmt *mf = &sd_state->pads->try_fmt; ++ /* detect current pad config scaling factor */ ++ bool half_scale = !is_unscaled_ok(mf->width, mf->height, crop); ++ ++ /* store new crop rectangle */ ++ *crop = sel->r; ++ ++ /* adjust frame size */ ++ mf->width = crop->width >> half_scale; ++ mf->height = crop->height >> half_scale; ++ ++ return 0; ++ } ++ ++ /* V4L2_SUBDEV_FORMAT_ACTIVE */ ++ ++ /* apply new crop rectangle */ + ret = ov6650_reg_write(client, REG_HSTRT, sel->r.left >> 1); + if (!ret) { + priv->rect.width += priv->rect.left - sel->r.left; +@@ -567,11 +600,6 @@ static int ov6650_get_fmt(struct v4l2_su + return 0; + } + +-static bool is_unscaled_ok(int width, int height, struct v4l2_rect *rect) +-{ +- return width > rect->width >> 1 || height > rect->height >> 1; +-} +- + #define to_clkrc(div) ((div) - 1) + + /* set the format we will capture in */ +@@ -692,7 +720,11 @@ static int ov6650_set_fmt(struct v4l2_su + break; + } + +- *crop = priv->rect; ++ if (format->which == V4L2_SUBDEV_FORMAT_TRY) ++ *crop = sd_state->pads->try_crop; ++ else ++ *crop = priv->rect; ++ + half_scale = !is_unscaled_ok(mf->width, mf->height, crop); + + /* adjust new crop rectangle position against its current center */ diff --git a/queue-5.17/media-ov6650-fix-crop-rectangle-affected-by-set-format.patch b/queue-5.17/media-ov6650-fix-crop-rectangle-affected-by-set-format.patch new file mode 100644 index 00000000000..4a250262ab4 --- /dev/null +++ b/queue-5.17/media-ov6650-fix-crop-rectangle-affected-by-set-format.patch @@ -0,0 +1,91 @@ +From 985d2d7a482e9b64ef9643702b066da9cbd6ae8e Mon Sep 17 00:00:00 2001 +From: Janusz Krzysztofik +Date: Mon, 4 May 2020 00:06:18 +0200 +Subject: media: ov6650: Fix crop rectangle affected by set format + +From: Janusz Krzysztofik + +commit 985d2d7a482e9b64ef9643702b066da9cbd6ae8e upstream. + +According to subdevice interface specification found in V4L2 API +documentation, set format pad operations should not affect image +geometry set in preceding image processing steps. Unfortunately, that +requirement is not respected by the driver implementation of set format +as it was not the case when that code was still implementing a pair of +now obsolete .s_mbus_fmt() / .try_mbus_fmt() video operations before +they have been merged and reused as an implementation of .set_fmt() pad +operation by commit 717fd5b4907a ("[media] v4l2: replace try_mbus_fmt +by set_fmt"). + +Exclude non-compliant crop rectangle adjustments from set format try, +as well as a call to .set_selection() from set format active processing +path, so only frame scaling is applied as needed and crop rectangle is +no longer modified. + +[Sakari Ailus: Rebase on subdev state patches] + +Fixes: 717fd5b4907a ("[media] v4l2: replace try_mbus_fmt by set_fmt") +Signed-off-by: Janusz Krzysztofik +Signed-off-by: Sakari Ailus +Signed-off-by: Mauro Carvalho Chehab +Signed-off-by: Greg Kroah-Hartman +--- + drivers/media/i2c/ov6650.c | 28 ++++------------------------ + 1 file changed, 4 insertions(+), 24 deletions(-) + +--- a/drivers/media/i2c/ov6650.c ++++ b/drivers/media/i2c/ov6650.c +@@ -693,11 +693,7 @@ static int ov6650_set_fmt(struct v4l2_su + struct v4l2_mbus_framefmt *mf = &format->format; + struct i2c_client *client = v4l2_get_subdevdata(sd); + struct ov6650 *priv = to_ov6650(client); +- struct v4l2_subdev_selection sel = { +- .which = V4L2_SUBDEV_FORMAT_ACTIVE, +- .target = V4L2_SEL_TGT_CROP, +- }; +- struct v4l2_rect *crop = &sel.r; ++ struct v4l2_rect *crop; + bool half_scale; + + if (format->pad) +@@ -721,24 +717,13 @@ static int ov6650_set_fmt(struct v4l2_su + } + + if (format->which == V4L2_SUBDEV_FORMAT_TRY) +- *crop = sd_state->pads->try_crop; ++ crop = &sd_state->pads->try_crop; + else +- *crop = priv->rect; ++ crop = &priv->rect; + + half_scale = !is_unscaled_ok(mf->width, mf->height, crop); + +- /* adjust new crop rectangle position against its current center */ +- crop->left += (crop->width - (mf->width << half_scale)) / 2; +- crop->top += (crop->height - (mf->height << half_scale)) / 2; +- /* adjust new crop rectangle size */ +- crop->width = mf->width << half_scale; +- crop->height = mf->height << half_scale; +- + if (format->which == V4L2_SUBDEV_FORMAT_TRY) { +- /* store new crop rectangle, hadware bound, in pad config */ +- ov6650_bind_align_crop_rectangle(crop); +- sd_state->pads->try_crop = *crop; +- + /* store new mbus frame format code and size in pad config */ + sd_state->pads->try_fmt.width = crop->width >> half_scale; + sd_state->pads->try_fmt.height = crop->height >> half_scale; +@@ -751,12 +736,7 @@ static int ov6650_set_fmt(struct v4l2_su + mf->code = sd_state->pads->try_fmt.code; + + } else { +- int ret; +- +- /* apply new crop rectangle */ +- ret = ov6650_set_selection(sd, NULL, &sel); +- if (ret) +- return ret; ++ int ret = 0; + + /* apply new media bus frame format and scaling if changed */ + if (mf->code != priv->code || half_scale != priv->half_scale) diff --git a/queue-5.17/n64cart-convert-bi_disk-to-bi_bdev-bd_disk-fix-build.patch b/queue-5.17/n64cart-convert-bi_disk-to-bi_bdev-bd_disk-fix-build.patch new file mode 100644 index 00000000000..7e261d1a8fe --- /dev/null +++ b/queue-5.17/n64cart-convert-bi_disk-to-bi_bdev-bd_disk-fix-build.patch @@ -0,0 +1,47 @@ +From b2479de38d8fc7ef13d5c78ff5ded6e5a1a4eac0 Mon Sep 17 00:00:00 2001 +From: Jackie Liu +Date: Mon, 21 Mar 2022 15:12:16 +0800 +Subject: n64cart: convert bi_disk to bi_bdev->bd_disk fix build +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jackie Liu + +commit b2479de38d8fc7ef13d5c78ff5ded6e5a1a4eac0 upstream. + +My kernel robot report below: + + drivers/block/n64cart.c: In function ‘n64cart_submit_bio’: + drivers/block/n64cart.c:91:26: error: ‘struct bio’ has no member named ‘bi_disk’ + 91 | struct device *dev = bio->bi_disk->private_data; + | ^~ + CC drivers/slimbus/qcom-ctrl.o + CC drivers/auxdisplay/hd44780.o + CC drivers/watchdog/watchdog_core.o + CC drivers/nvme/host/fault_inject.o + AR drivers/accessibility/braille/built-in.a + make[2]: *** [scripts/Makefile.build:288: drivers/block/n64cart.o] Error 1 + +Fixes: 309dca309fc3 ("block: store a block_device pointer in struct bio"); +Reported-by: k2ci +Signed-off-by: Jackie Liu +Reviewed-by: Chaitanya Kulkarni +Link: https://lore.kernel.org/r/20220321071216.1549596-1-liu.yun@linux.dev +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + drivers/block/n64cart.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/block/n64cart.c ++++ b/drivers/block/n64cart.c +@@ -88,7 +88,7 @@ static void n64cart_submit_bio(struct bi + { + struct bio_vec bvec; + struct bvec_iter iter; +- struct device *dev = bio->bi_disk->private_data; ++ struct device *dev = bio->bi_bdev->bd_disk->private_data; + u32 pos = bio->bi_iter.bi_sector << SECTOR_SHIFT; + + bio_for_each_segment(bvec, bio, iter) { diff --git a/queue-5.17/openvswitch-fixed-nd-target-mask-field-in-the-flow-dump.patch b/queue-5.17/openvswitch-fixed-nd-target-mask-field-in-the-flow-dump.patch new file mode 100644 index 00000000000..dfb95221ddf --- /dev/null +++ b/queue-5.17/openvswitch-fixed-nd-target-mask-field-in-the-flow-dump.patch @@ -0,0 +1,49 @@ +From f19c44452b58a84d95e209b847f5495d91c9983a Mon Sep 17 00:00:00 2001 +From: Martin Varghese +Date: Mon, 28 Mar 2022 11:11:48 +0530 +Subject: openvswitch: Fixed nd target mask field in the flow dump. + +From: Martin Varghese + +commit f19c44452b58a84d95e209b847f5495d91c9983a upstream. + +IPv6 nd target mask was not getting populated in flow dump. + +In the function __ovs_nla_put_key the icmp code mask field was checked +instead of icmp code key field to classify the flow as neighbour discovery. + +ufid:bdfbe3e5-60c2-43b0-a5ff-dfcac1c37328, recirc_id(0),dp_hash(0/0), +skb_priority(0/0),in_port(ovs-nm1),skb_mark(0/0),ct_state(0/0), +ct_zone(0/0),ct_mark(0/0),ct_label(0/0), +eth(src=00:00:00:00:00:00/00:00:00:00:00:00, +dst=00:00:00:00:00:00/00:00:00:00:00:00), +eth_type(0x86dd), +ipv6(src=::/::,dst=::/::,label=0/0,proto=58,tclass=0/0,hlimit=0/0,frag=no), +icmpv6(type=135,code=0), +nd(target=2001::2/::, +sll=00:00:00:00:00:00/00:00:00:00:00:00, +tll=00:00:00:00:00:00/00:00:00:00:00:00), +packets:10, bytes:860, used:0.504s, dp:ovs, actions:ovs-nm2 + +Fixes: e64457191a25 (openvswitch: Restructure datapath.c and flow.c) +Signed-off-by: Martin Varghese +Link: https://lore.kernel.org/r/20220328054148.3057-1-martinvarghesenokia@gmail.com +Signed-off-by: Paolo Abeni +Signed-off-by: Greg Kroah-Hartman +--- + net/openvswitch/flow_netlink.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/openvswitch/flow_netlink.c ++++ b/net/openvswitch/flow_netlink.c +@@ -2201,8 +2201,8 @@ static int __ovs_nla_put_key(const struc + icmpv6_key->icmpv6_type = ntohs(output->tp.src); + icmpv6_key->icmpv6_code = ntohs(output->tp.dst); + +- if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION || +- icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) { ++ if (swkey->tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) || ++ swkey->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { + struct ovs_key_nd *nd_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key)); diff --git a/queue-5.17/perf-vendor-events-update-metrics-for-skylake-server.patch b/queue-5.17/perf-vendor-events-update-metrics-for-skylake-server.patch new file mode 100644 index 00000000000..76cceaed41d --- /dev/null +++ b/queue-5.17/perf-vendor-events-update-metrics-for-skylake-server.patch @@ -0,0 +1,1818 @@ +From 3bad20d7d129c3b3063658a0f83974dfe6dac5c4 Mon Sep 17 00:00:00 2001 +From: Ian Rogers +Date: Mon, 31 Jan 2022 17:58:34 -0800 +Subject: perf vendor events: Update metrics for SkyLake Server + +From: Ian Rogers + +commit 3bad20d7d129c3b3063658a0f83974dfe6dac5c4 upstream. + +Based on TMA_metrics-full.csv version 4.3 at 01.org: + https://download.01.org/perfmon/ +Events are updated to version 1.26: + https://download.01.org/perfmon/SKX +Json files generated by: + https://github.com/intel/event-converter-for-linux-perf + +Fixes were made that allow the skx-metrics.json to successfully +generate, bringing back TopdownL1 metrics. + +Tested: + + $ perf test + ... + 6: Parse event definition strings : Ok + 7: Simple expression parser : Ok + ... + 9: Parse perf pmu format : Ok + 10: PMU events : + 10.1: PMU event table sanity : Ok + 10.2: PMU event map aliases : Ok + 10.3: Parsing of PMU event table metrics : Ok + 10.4: Parsing of PMU event table metrics with fake PMUs : Ok + ... + 68: Parse and process metrics : Ok + ... + 88: perf stat metrics (shadow stat) test : Ok + 89: perf all metricgroups test : Ok + 90: perf all metrics test : Skip + 91: perf all PMU test : Ok + ... + +90 skips due to a lack of floating point samples, which is +understandable. + +Fixes: c4ad8fabd03f76ed ("perf vendor events: Update metrics for SkyLake Server") +Reviewed-by: Kan Liang +Signed-off-by: Ian Rogers +Cc: Alexander Shishkin +Cc: Alexandre Torgue +Cc: Andi Kleen +Cc: Ingo Molnar +Cc: James Clark +Cc: Jin Yao +Cc: Jiri Olsa +Cc: John Garry +Cc: Mark Rutland +Cc: Maxime Coquelin +Cc: Namhyung Kim +Cc: Peter Zijlstra +Cc: Stephane Eranian +Cc: Zhengjun Xing +Link: https://lore.kernel.org/r/20220201015858.1226914-3-irogers@google.com +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Greg Kroah-Hartman +--- + tools/perf/pmu-events/arch/x86/skylakex/cache.json | 111 +- + tools/perf/pmu-events/arch/x86/skylakex/floating-point.json | 24 + tools/perf/pmu-events/arch/x86/skylakex/frontend.json | 18 + tools/perf/pmu-events/arch/x86/skylakex/memory.json | 96 +- + tools/perf/pmu-events/arch/x86/skylakex/pipeline.json | 11 + tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json | 461 ++++++++++-- + tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json | 23 + 7 files changed, 591 insertions(+), 153 deletions(-) + +--- a/tools/perf/pmu-events/arch/x86/skylakex/cache.json ++++ b/tools/perf/pmu-events/arch/x86/skylakex/cache.json +@@ -315,6 +315,19 @@ + "UMask": "0x82" + }, + { ++ "BriefDescription": "All retired memory instructions.", ++ "Counter": "0,1,2,3", ++ "CounterHTOff": "0,1,2,3", ++ "Data_LA": "1", ++ "EventCode": "0xD0", ++ "EventName": "MEM_INST_RETIRED.ANY", ++ "L1_Hit_Indication": "1", ++ "PEBS": "1", ++ "PublicDescription": "Counts all retired memory instructions - loads and stores.", ++ "SampleAfterValue": "2000003", ++ "UMask": "0x83" ++ }, ++ { + "BriefDescription": "Retired load instructions with locked access.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", +@@ -358,6 +371,7 @@ + "EventCode": "0xD0", + "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS", + "PEBS": "1", ++ "PublicDescription": "Number of retired load instructions that (start a) miss in the 2nd-level TLB (STLB).", + "SampleAfterValue": "100003", + "UMask": "0x11" + }, +@@ -370,6 +384,7 @@ + "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES", + "L1_Hit_Indication": "1", + "PEBS": "1", ++ "PublicDescription": "Number of retired store instructions that (start a) miss in the 2nd-level TLB (STLB).", + "SampleAfterValue": "100003", + "UMask": "0x12" + }, +@@ -733,7 +748,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x0000010491", ++ "MSRValue": "0x10491", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -772,7 +787,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x04003C0491", ++ "MSRValue": "0x4003C0491", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -785,7 +800,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x01003C0491", ++ "MSRValue": "0x1003C0491", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -798,7 +813,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x08003C0491", ++ "MSRValue": "0x8003C0491", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -811,7 +826,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x0000010490", ++ "MSRValue": "0x10490", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -850,7 +865,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x04003C0490", ++ "MSRValue": "0x4003C0490", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -863,7 +878,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x01003C0490", ++ "MSRValue": "0x1003C0490", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -876,7 +891,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x08003C0490", ++ "MSRValue": "0x8003C0490", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -889,7 +904,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x0000010120", ++ "MSRValue": "0x10120", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -928,7 +943,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x04003C0120", ++ "MSRValue": "0x4003C0120", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -941,7 +956,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x01003C0120", ++ "MSRValue": "0x1003C0120", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -954,7 +969,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x08003C0120", ++ "MSRValue": "0x8003C0120", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -967,7 +982,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x0000010122", ++ "MSRValue": "0x10122", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1006,7 +1021,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x04003C0122", ++ "MSRValue": "0x4003C0122", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1019,7 +1034,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x01003C0122", ++ "MSRValue": "0x1003C0122", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1032,7 +1047,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x08003C0122", ++ "MSRValue": "0x8003C0122", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1045,7 +1060,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x0000010004", ++ "MSRValue": "0x10004", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1084,7 +1099,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x04003C0004", ++ "MSRValue": "0x4003C0004", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1097,7 +1112,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x01003C0004", ++ "MSRValue": "0x1003C0004", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1110,7 +1125,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x08003C0004", ++ "MSRValue": "0x8003C0004", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1123,7 +1138,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x0000010001", ++ "MSRValue": "0x10001", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1162,7 +1177,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x04003C0001", ++ "MSRValue": "0x4003C0001", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1175,7 +1190,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x01003C0001", ++ "MSRValue": "0x1003C0001", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1188,7 +1203,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x08003C0001", ++ "MSRValue": "0x8003C0001", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1201,7 +1216,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x0000010002", ++ "MSRValue": "0x10002", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1240,7 +1255,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x04003C0002", ++ "MSRValue": "0x4003C0002", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1253,7 +1268,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x01003C0002", ++ "MSRValue": "0x1003C0002", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1266,7 +1281,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x08003C0002", ++ "MSRValue": "0x8003C0002", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1279,7 +1294,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x0000010400", ++ "MSRValue": "0x10400", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1318,7 +1333,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x04003C0400", ++ "MSRValue": "0x4003C0400", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1331,7 +1346,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x01003C0400", ++ "MSRValue": "0x1003C0400", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1344,7 +1359,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x08003C0400", ++ "MSRValue": "0x8003C0400", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1357,7 +1372,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x0000010010", ++ "MSRValue": "0x10010", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1396,7 +1411,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x04003C0010", ++ "MSRValue": "0x4003C0010", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1409,7 +1424,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x01003C0010", ++ "MSRValue": "0x1003C0010", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1422,7 +1437,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x08003C0010", ++ "MSRValue": "0x8003C0010", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1435,7 +1450,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x0000010020", ++ "MSRValue": "0x10020", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1474,7 +1489,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x04003C0020", ++ "MSRValue": "0x4003C0020", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1487,7 +1502,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x01003C0020", ++ "MSRValue": "0x1003C0020", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1500,7 +1515,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x08003C0020", ++ "MSRValue": "0x8003C0020", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1513,7 +1528,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x0000010080", ++ "MSRValue": "0x10080", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1552,7 +1567,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x04003C0080", ++ "MSRValue": "0x4003C0080", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1565,7 +1580,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x01003C0080", ++ "MSRValue": "0x1003C0080", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1578,7 +1593,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x08003C0080", ++ "MSRValue": "0x8003C0080", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1591,7 +1606,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x0000010100", ++ "MSRValue": "0x10100", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1630,7 +1645,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x04003C0100", ++ "MSRValue": "0x4003C0100", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1643,7 +1658,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x01003C0100", ++ "MSRValue": "0x1003C0100", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1656,7 +1671,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x08003C0100", ++ "MSRValue": "0x8003C0100", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +--- a/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json ++++ b/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json +@@ -1,73 +1,81 @@ + [ + { +- "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT14 RCP14 DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", ++ "BriefDescription": "Counts once for most SIMD 128-bit packed computational double precision floating-point instructions retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC7", + "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE", ++ "PublicDescription": "Counts once for most SIMD 128-bit packed computational double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", + "SampleAfterValue": "2000003", + "UMask": "0x4" + }, + { +- "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", ++ "BriefDescription": "Counts once for most SIMD 128-bit packed computational single precision floating-point instruction retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC7", + "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE", ++ "PublicDescription": "Counts once for most SIMD 128-bit packed computational single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", + "SampleAfterValue": "2000003", + "UMask": "0x8" + }, + { +- "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", ++ "BriefDescription": "Counts once for most SIMD 256-bit packed double computational precision floating-point instructions retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC7", + "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE", ++ "PublicDescription": "Counts once for most SIMD 256-bit packed double computational precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", + "SampleAfterValue": "2000003", + "UMask": "0x10" + }, + { +- "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", ++ "BriefDescription": "Counts once for most SIMD 256-bit packed single computational precision floating-point instructions retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC7", + "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE", ++ "PublicDescription": "Counts once for most SIMD 256-bit packed single computational precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", + "SampleAfterValue": "2000003", + "UMask": "0x20" + }, + { +- "BriefDescription": "Number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 8 calculations per element.", ++ "BriefDescription": "Counts number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC7", + "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE", ++ "PublicDescription": "Number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", + "SampleAfterValue": "2000003", + "UMask": "0x40" + }, + { +- "BriefDescription": "Number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 16 calculations per element.", ++ "BriefDescription": "Counts number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC7", + "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE", ++ "PublicDescription": "Number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", + "SampleAfterValue": "2000003", + "UMask": "0x80" + }, + { +- "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", ++ "BriefDescription": "Counts once for most SIMD scalar computational double precision floating-point instructions retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC7", + "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE", ++ "PublicDescription": "Counts once for most SIMD scalar computational double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SIMD scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { +- "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", ++ "BriefDescription": "Counts once for most SIMD scalar computational single precision floating-point instructions retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC7", + "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE", ++ "PublicDescription": "Counts once for most SIMD scalar computational single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SIMD scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, +--- a/tools/perf/pmu-events/arch/x86/skylakex/frontend.json ++++ b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json +@@ -30,7 +30,21 @@ + "UMask": "0x2" + }, + { +- "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss.", ++ "BriefDescription": "Retired Instructions who experienced DSB miss.", ++ "Counter": "0,1,2,3", ++ "CounterHTOff": "0,1,2,3", ++ "EventCode": "0xC6", ++ "EventName": "FRONTEND_RETIRED.ANY_DSB_MISS", ++ "MSRIndex": "0x3F7", ++ "MSRValue": "0x1", ++ "PEBS": "1", ++ "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.", ++ "SampleAfterValue": "100007", ++ "TakenAlone": "1", ++ "UMask": "0x1" ++ }, ++ { ++ "BriefDescription": "Retired Instructions who experienced a critical DSB miss.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", +@@ -38,7 +52,7 @@ + "MSRIndex": "0x3F7", + "MSRValue": "0x11", + "PEBS": "1", +- "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.", ++ "PublicDescription": "Number of retired Instructions that experienced a critical DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. Critical means stalls were exposed to the back-end as a result of the DSB miss.", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" +--- a/tools/perf/pmu-events/arch/x86/skylakex/memory.json ++++ b/tools/perf/pmu-events/arch/x86/skylakex/memory.json +@@ -299,7 +299,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x083FC00491", ++ "MSRValue": "0x83FC00491", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -312,7 +312,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x063FC00491", ++ "MSRValue": "0x63FC00491", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -325,7 +325,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x0604000491", ++ "MSRValue": "0x604000491", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -338,7 +338,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x063B800491", ++ "MSRValue": "0x63B800491", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -377,7 +377,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x083FC00490", ++ "MSRValue": "0x83FC00490", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -390,7 +390,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x063FC00490", ++ "MSRValue": "0x63FC00490", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -403,7 +403,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x0604000490", ++ "MSRValue": "0x604000490", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -416,7 +416,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x063B800490", ++ "MSRValue": "0x63B800490", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -455,7 +455,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x083FC00120", ++ "MSRValue": "0x83FC00120", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -468,7 +468,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x063FC00120", ++ "MSRValue": "0x63FC00120", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -481,7 +481,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x0604000120", ++ "MSRValue": "0x604000120", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -494,7 +494,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x063B800120", ++ "MSRValue": "0x63B800120", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -533,7 +533,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x083FC00122", ++ "MSRValue": "0x83FC00122", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -546,7 +546,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x063FC00122", ++ "MSRValue": "0x63FC00122", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -559,7 +559,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x0604000122", ++ "MSRValue": "0x604000122", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -572,7 +572,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x063B800122", ++ "MSRValue": "0x63B800122", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -611,7 +611,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x083FC00004", ++ "MSRValue": "0x83FC00004", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -624,7 +624,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x063FC00004", ++ "MSRValue": "0x63FC00004", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -637,7 +637,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x0604000004", ++ "MSRValue": "0x604000004", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -650,7 +650,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x063B800004", ++ "MSRValue": "0x63B800004", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -689,7 +689,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x083FC00001", ++ "MSRValue": "0x83FC00001", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -702,7 +702,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x063FC00001", ++ "MSRValue": "0x63FC00001", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -715,7 +715,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x0604000001", ++ "MSRValue": "0x604000001", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -728,7 +728,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x063B800001", ++ "MSRValue": "0x63B800001", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -767,7 +767,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x083FC00002", ++ "MSRValue": "0x83FC00002", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -780,7 +780,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x063FC00002", ++ "MSRValue": "0x63FC00002", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -793,7 +793,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x0604000002", ++ "MSRValue": "0x604000002", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -806,7 +806,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x063B800002", ++ "MSRValue": "0x63B800002", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -845,7 +845,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x083FC00400", ++ "MSRValue": "0x83FC00400", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -858,7 +858,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x063FC00400", ++ "MSRValue": "0x63FC00400", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -871,7 +871,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x0604000400", ++ "MSRValue": "0x604000400", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -884,7 +884,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x063B800400", ++ "MSRValue": "0x63B800400", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -923,7 +923,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x083FC00010", ++ "MSRValue": "0x83FC00010", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -936,7 +936,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x063FC00010", ++ "MSRValue": "0x63FC00010", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -949,7 +949,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x0604000010", ++ "MSRValue": "0x604000010", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -962,7 +962,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x063B800010", ++ "MSRValue": "0x63B800010", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1001,7 +1001,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x083FC00020", ++ "MSRValue": "0x83FC00020", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1014,7 +1014,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x063FC00020", ++ "MSRValue": "0x63FC00020", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1027,7 +1027,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x0604000020", ++ "MSRValue": "0x604000020", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1040,7 +1040,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x063B800020", ++ "MSRValue": "0x63B800020", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1079,7 +1079,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x083FC00080", ++ "MSRValue": "0x83FC00080", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1092,7 +1092,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x063FC00080", ++ "MSRValue": "0x63FC00080", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1105,7 +1105,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x0604000080", ++ "MSRValue": "0x604000080", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1118,7 +1118,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x063B800080", ++ "MSRValue": "0x63B800080", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1157,7 +1157,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x083FC00100", ++ "MSRValue": "0x83FC00100", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1170,7 +1170,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x063FC00100", ++ "MSRValue": "0x63FC00100", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1183,7 +1183,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x0604000100", ++ "MSRValue": "0x604000100", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +@@ -1196,7 +1196,7 @@ + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", +- "MSRValue": "0x063B800100", ++ "MSRValue": "0x63B800100", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", +--- a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json ++++ b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json +@@ -436,6 +436,17 @@ + "SampleAfterValue": "2000003" + }, + { ++ "BriefDescription": "Number of all retired NOP instructions.", ++ "Counter": "0,1,2,3", ++ "CounterHTOff": "0,1,2,3,4,5,6,7", ++ "Errata": "SKL091, SKL044", ++ "EventCode": "0xC0", ++ "EventName": "INST_RETIRED.NOP", ++ "PEBS": "1", ++ "SampleAfterValue": "2000003", ++ "UMask": "0x2" ++ }, ++ { + "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution", + "Counter": "1", + "CounterHTOff": "1", +--- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json ++++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json +@@ -1,26 +1,167 @@ + [ + { ++ "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend", ++ "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)", ++ "MetricGroup": "TopdownL1", ++ "MetricName": "Frontend_Bound", ++ "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound." ++ }, ++ { ++ "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", ++ "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", ++ "MetricGroup": "TopdownL1_SMT", ++ "MetricName": "Frontend_Bound_SMT", ++ "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU." ++ }, ++ { ++ "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations", ++ "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD)", ++ "MetricGroup": "TopdownL1", ++ "MetricName": "Bad_Speculation", ++ "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example." ++ }, ++ { ++ "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.", ++ "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", ++ "MetricGroup": "TopdownL1_SMT", ++ "MetricName": "Bad_Speculation_SMT", ++ "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU." ++ }, ++ { ++ "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend", ++ "MetricConstraint": "NO_NMI_WATCHDOG", ++ "MetricExpr": "1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD)", ++ "MetricGroup": "TopdownL1", ++ "MetricName": "Backend_Bound", ++ "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound." ++ }, ++ { ++ "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", ++ "MetricExpr": "1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", ++ "MetricGroup": "TopdownL1_SMT", ++ "MetricName": "Backend_Bound_SMT", ++ "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU." ++ }, ++ { ++ "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired", ++ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)", ++ "MetricGroup": "TopdownL1", ++ "MetricName": "Retiring", ++ "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. " ++ }, ++ { ++ "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.", ++ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", ++ "MetricGroup": "TopdownL1_SMT", ++ "MetricName": "Retiring_SMT", ++ "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. SMT version; use when SMT is enabled and measuring per logical CPU." ++ }, ++ { ++ "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks", ++ "MetricExpr": "100 * ( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) )", ++ "MetricGroup": "Bad;BadSpec;BrMispredicts", ++ "MetricName": "Mispredictions" ++ }, ++ { ++ "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks", ++ "MetricExpr": "100 * ( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) )", ++ "MetricGroup": "Bad;BadSpec;BrMispredicts_SMT", ++ "MetricName": "Mispredictions_SMT" ++ }, ++ { ++ "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks", ++ "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) * ( ( (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (min( CPU_CLK_UNHALTED.THREAD , cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@ ) / CPU_CLK_UNHALTED.THREAD) / #(CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) ) + ( (( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (OFFCORE_REQUESTS_BUFFER.SQ_FULL / CPU_CLK_UNHALTED.THREAD) / #(( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) ) ) + ( (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( ((L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )) * cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ / CPU_CLK_UNHALTED.THREAD) / #(max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) ) ", ++ "MetricGroup": "Mem;MemoryBW;Offcore", ++ "MetricName": "Memory_Bandwidth" ++ }, ++ { ++ "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks", ++ "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * ( ( (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (min( CPU_CLK_UNHALTED.THREAD , cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@ ) / CPU_CLK_UNHALTED.THREAD) / #(CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) ) + ( (( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (( OFFCORE_REQUESTS_BUFFER.SQ_FULL / 2 ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / #(( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) ) ) + ( (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( ((L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )) * cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ / CPU_CLK_UNHALTED.THREAD) / #(max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) ) ", ++ "MetricGroup": "Mem;MemoryBW;Offcore_SMT", ++ "MetricName": "Memory_Bandwidth_SMT" ++ }, ++ { ++ "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)", ++ "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) * ( ( (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (min( CPU_CLK_UNHALTED.THREAD , OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD ) / CPU_CLK_UNHALTED.THREAD - (min( CPU_CLK_UNHALTED.THREAD , cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@ ) / CPU_CLK_UNHALTED.THREAD)) / #(CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) ) + ( (( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (( (20.5 * ((CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC) * msr@tsc@ / 1000000000 / duration_time)) - (3.5 * ((CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC) * msr@tsc@ / 1000000000 / duration_time)) ) * MEM_LOAD_RETIRED.L3_HIT * (1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) / 2) / CPU_CLK_UNHALTED.THREAD) / #(( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) ) + ( (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD)) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) )", ++ "MetricGroup": "Mem;MemoryLat;Offcore", ++ "MetricName": "Memory_Latency" ++ }, ++ { ++ "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)", ++ "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * ( ( (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (min( CPU_CLK_UNHALTED.THREAD , OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD ) / CPU_CLK_UNHALTED.THREAD - (min( CPU_CLK_UNHALTED.THREAD , cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@ ) / CPU_CLK_UNHALTED.THREAD)) / #(CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) ) + ( (( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (( (20.5 * ((CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC) * msr@tsc@ / 1000000000 / duration_time)) - (3.5 * ((CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC) * msr@tsc@ / 1000000000 / duration_time)) ) * MEM_LOAD_RETIRED.L3_HIT * (1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) / 2) / CPU_CLK_UNHALTED.THREAD) / #(( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) ) + ( (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD)) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) )", ++ "MetricGroup": "Mem;MemoryLat;Offcore_SMT", ++ "MetricName": "Memory_Latency_SMT" ++ }, ++ { ++ "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)", ++ "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) * ( ( (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) / ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (min( 9 * cpu@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_LOAD_MISSES.WALK_ACTIVE , max( CYCLE_ACTIVITY.CYCLES_MEM_ANY - CYCLE_ACTIVITY.CYCLES_L1D_MISS , 0 ) ) / CPU_CLK_UNHALTED.THREAD) / (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) ) + ( (EXE_ACTIVITY.BOUND_ON_STORES / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (( 9 * cpu@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_STORE_MISSES.WALK_ACTIVE ) / CPU_CLK_UNHALTED.THREAD) / #(EXE_ACTIVITY.BOUND_ON_STORES / CPU_CLK_UNHALTED.THREAD) ) ) ", ++ "MetricGroup": "Mem;MemoryTLB", ++ "MetricName": "Memory_Data_TLBs" ++ }, ++ { ++ "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)", ++ "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * ( ( (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) / ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (min( 9 * cpu@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_LOAD_MISSES.WALK_ACTIVE , max( CYCLE_ACTIVITY.CYCLES_MEM_ANY - CYCLE_ACTIVITY.CYCLES_L1D_MISS , 0 ) ) / CPU_CLK_UNHALTED.THREAD) / (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) ) + ( (EXE_ACTIVITY.BOUND_ON_STORES / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (( 9 * cpu@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_STORE_MISSES.WALK_ACTIVE ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / #(EXE_ACTIVITY.BOUND_ON_STORES / CPU_CLK_UNHALTED.THREAD) ) ) ", ++ "MetricGroup": "Mem;MemoryTLB;_SMT", ++ "MetricName": "Memory_Data_TLBs_SMT" ++ }, ++ { ++ "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)", ++ "MetricExpr": "100 * (( BR_INST_RETIRED.CONDITIONAL + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - ( BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN ) - 2 * BR_INST_RETIRED.NEAR_CALL) ) / (4 * CPU_CLK_UNHALTED.THREAD))", ++ "MetricGroup": "Ret", ++ "MetricName": "Branching_Overhead" ++ }, ++ { ++ "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)", ++ "MetricExpr": "100 * (( BR_INST_RETIRED.CONDITIONAL + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - ( BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN ) - 2 * BR_INST_RETIRED.NEAR_CALL) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", ++ "MetricGroup": "Ret_SMT", ++ "MetricName": "Branching_Overhead_SMT" ++ }, ++ { ++ "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)", ++ "MetricExpr": "100 * (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * ( (ICACHE_64B.IFTAG_STALL / CPU_CLK_UNHALTED.THREAD) + (( ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@ ) / CPU_CLK_UNHALTED.THREAD) + (9 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) ) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD))", ++ "MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB", ++ "MetricName": "Big_Code" ++ }, ++ { ++ "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)", ++ "MetricExpr": "100 * (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * ( (ICACHE_64B.IFTAG_STALL / CPU_CLK_UNHALTED.THREAD) + (( ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@ ) / CPU_CLK_UNHALTED.THREAD) + (9 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) ) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", ++ "MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB_SMT", ++ "MetricName": "Big_Code_SMT" ++ }, ++ { ++ "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks", ++ "MetricExpr": "100 * ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) ) - (100 * (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * ( (ICACHE_64B.IFTAG_STALL / CPU_CLK_UNHALTED.THREAD) + (( ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@ ) / CPU_CLK_UNHALTED.THREAD) + (9 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) ) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)))", ++ "MetricGroup": "Fed;FetchBW;Frontend", ++ "MetricName": "Instruction_Fetch_BW" ++ }, ++ { ++ "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks", ++ "MetricExpr": "100 * ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) ) - (100 * (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * ( (ICACHE_64B.IFTAG_STALL / CPU_CLK_UNHALTED.THREAD) + (( ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@ ) / CPU_CLK_UNHALTED.THREAD) + (9 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) ) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))))", ++ "MetricGroup": "Fed;FetchBW;Frontend_SMT", ++ "MetricName": "Instruction_Fetch_BW_SMT" ++ }, ++ { + "BriefDescription": "Instructions Per Cycle (per Logical Processor)", + "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", +- "MetricGroup": "Summary", ++ "MetricGroup": "Ret;Summary", + "MetricName": "IPC" + }, + { + "BriefDescription": "Uops Per Instruction", + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY", +- "MetricGroup": "Pipeline;Retire", ++ "MetricGroup": "Pipeline;Ret;Retire", + "MetricName": "UPI" + }, + { + "BriefDescription": "Instruction per taken branch", +- "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN", +- "MetricGroup": "Branches;FetchBW;PGO", +- "MetricName": "IpTB" ++ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / BR_INST_RETIRED.NEAR_TAKEN", ++ "MetricGroup": "Branches;Fed;FetchBW", ++ "MetricName": "UpTB" + }, + { + "BriefDescription": "Cycles Per Instruction (per Logical Processor)", + "MetricExpr": "1 / (INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD)", +- "MetricGroup": "Pipeline", ++ "MetricGroup": "Pipeline;Mem", + "MetricName": "CPI" + }, + { +@@ -30,39 +171,84 @@ + "MetricName": "CLKS" + }, + { +- "BriefDescription": "Instructions Per Cycle (per physical core)", ++ "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)", ++ "MetricExpr": "4 * CPU_CLK_UNHALTED.THREAD", ++ "MetricGroup": "TmaL1", ++ "MetricName": "SLOTS" ++ }, ++ { ++ "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)", ++ "MetricExpr": "4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", ++ "MetricGroup": "TmaL1_SMT", ++ "MetricName": "SLOTS_SMT" ++ }, ++ { ++ "BriefDescription": "The ratio of Executed- by Issued-Uops", ++ "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY", ++ "MetricGroup": "Cor;Pipeline", ++ "MetricName": "Execute_per_Issue", ++ "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage." ++ }, ++ { ++ "BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)", + "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", +- "MetricGroup": "SMT;TmaL1", ++ "MetricGroup": "Ret;SMT;TmaL1", + "MetricName": "CoreIPC" + }, + { +- "BriefDescription": "Instructions Per Cycle (per physical core)", ++ "BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)", + "MetricExpr": "INST_RETIRED.ANY / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", +- "MetricGroup": "SMT;TmaL1", ++ "MetricGroup": "Ret;SMT;TmaL1_SMT", + "MetricName": "CoreIPC_SMT" + }, + { + "BriefDescription": "Floating Point Operations Per Cycle", + "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / CPU_CLK_UNHALTED.THREAD", +- "MetricGroup": "Flops", ++ "MetricGroup": "Ret;Flops", + "MetricName": "FLOPc" + }, + { + "BriefDescription": "Floating Point Operations Per Cycle", + "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", +- "MetricGroup": "Flops_SMT", ++ "MetricGroup": "Ret;Flops_SMT", + "MetricName": "FLOPc_SMT" + }, + { ++ "BriefDescription": "Actual per-core usage of the Floating Point execution units (regardless of the vector width)", ++ "MetricExpr": "( (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE) + (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) ) / ( 2 * CPU_CLK_UNHALTED.THREAD )", ++ "MetricGroup": "Cor;Flops;HPC", ++ "MetricName": "FP_Arith_Utilization", ++ "PublicDescription": "Actual per-core usage of the Floating Point execution units (regardless of the vector width). Values > 1 are possible due to Fused-Multiply Add (FMA) counting." ++ }, ++ { ++ "BriefDescription": "Actual per-core usage of the Floating Point execution units (regardless of the vector width). SMT version; use when SMT is enabled and measuring per logical CPU.", ++ "MetricExpr": "( (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE) + (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) ) / ( 2 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ) )", ++ "MetricGroup": "Cor;Flops;HPC_SMT", ++ "MetricName": "FP_Arith_Utilization_SMT", ++ "PublicDescription": "Actual per-core usage of the Floating Point execution units (regardless of the vector width). Values > 1 are possible due to Fused-Multiply Add (FMA) counting. SMT version; use when SMT is enabled and measuring per logical CPU." ++ }, ++ { + "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", + "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 ) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)", +- "MetricGroup": "Pipeline;PortsUtil", ++ "MetricGroup": "Backend;Cor;Pipeline;PortsUtil", + "MetricName": "ILP" + }, + { ++ "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)", ++ "MetricExpr": " ( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) ) * (4 * CPU_CLK_UNHALTED.THREAD) / BR_MISP_RETIRED.ALL_BRANCHES", ++ "MetricGroup": "Bad;BrMispredicts", ++ "MetricName": "Branch_Misprediction_Cost" ++ }, ++ { ++ "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)", ++ "MetricExpr": " ( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) ) * (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / BR_MISP_RETIRED.ALL_BRANCHES", ++ "MetricGroup": "Bad;BrMispredicts_SMT", ++ "MetricName": "Branch_Misprediction_Cost_SMT" ++ }, ++ { + "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)", + "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES", +- "MetricGroup": "BrMispredicts", ++ "MetricGroup": "Bad;BadSpec;BrMispredicts", + "MetricName": "IpMispredict" + }, + { +@@ -86,122 +272,249 @@ + { + "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES", +- "MetricGroup": "Branches;InsType", ++ "MetricGroup": "Branches;Fed;InsType", + "MetricName": "IpBranch" + }, + { + "BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL", +- "MetricGroup": "Branches", ++ "MetricGroup": "Branches;Fed;PGO", + "MetricName": "IpCall" + }, + { ++ "BriefDescription": "Instruction per taken branch", ++ "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN", ++ "MetricGroup": "Branches;Fed;FetchBW;Frontend;PGO", ++ "MetricName": "IpTB" ++ }, ++ { + "BriefDescription": "Branch instructions per taken branch. ", + "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN", +- "MetricGroup": "Branches;PGO", ++ "MetricGroup": "Branches;Fed;PGO", + "MetricName": "BpTkBranch" + }, + { + "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )", +- "MetricGroup": "Flops;FpArith;InsType", ++ "MetricGroup": "Flops;InsType", + "MetricName": "IpFLOP" + }, + { ++ "BriefDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate)", ++ "MetricExpr": "INST_RETIRED.ANY / ( (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE) + (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) )", ++ "MetricGroup": "Flops;InsType", ++ "MetricName": "IpArith", ++ "PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). May undercount due to FMA double counting. Approximated prior to BDW." ++ }, ++ { ++ "BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)", ++ "MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_SINGLE", ++ "MetricGroup": "Flops;FpScalar;InsType", ++ "MetricName": "IpArith_Scalar_SP", ++ "PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting." ++ }, ++ { ++ "BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)", ++ "MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_DOUBLE", ++ "MetricGroup": "Flops;FpScalar;InsType", ++ "MetricName": "IpArith_Scalar_DP", ++ "PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting." ++ }, ++ { ++ "BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)", ++ "MetricExpr": "INST_RETIRED.ANY / ( FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE )", ++ "MetricGroup": "Flops;FpVector;InsType", ++ "MetricName": "IpArith_AVX128", ++ "PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting." ++ }, ++ { ++ "BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)", ++ "MetricExpr": "INST_RETIRED.ANY / ( FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )", ++ "MetricGroup": "Flops;FpVector;InsType", ++ "MetricName": "IpArith_AVX256", ++ "PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting." ++ }, ++ { ++ "BriefDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate)", ++ "MetricExpr": "INST_RETIRED.ANY / ( FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )", ++ "MetricGroup": "Flops;FpVector;InsType", ++ "MetricName": "IpArith_AVX512", ++ "PublicDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting." ++ }, ++ { + "BriefDescription": "Total number of retired Instructions, Sample with: INST_RETIRED.PREC_DIST", + "MetricExpr": "INST_RETIRED.ANY", + "MetricGroup": "Summary;TmaL1", + "MetricName": "Instructions" + }, + { ++ "BriefDescription": "Average number of Uops issued by front-end when it issued something", ++ "MetricExpr": "UOPS_ISSUED.ANY / cpu@UOPS_ISSUED.ANY\\,cmask\\=1@", ++ "MetricGroup": "Fed;FetchBW", ++ "MetricName": "Fetch_UpC" ++ }, ++ { + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)", + "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)", +- "MetricGroup": "DSB;FetchBW", ++ "MetricGroup": "DSB;Fed;FetchBW", + "MetricName": "DSB_Coverage" + }, + { +- "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)", ++ "BriefDescription": "Total penalty related to DSB (uop cache) misses - subset/see of/the Instruction_Fetch_BW Bottleneck.", ++ "MetricExpr": "(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * (DSB2MITE_SWITCHES.PENALTY_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) + ((IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD))) * (( IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS ) / CPU_CLK_UNHALTED.THREAD / 2) / #((IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)))", ++ "MetricGroup": "DSBmiss;Fed", ++ "MetricName": "DSB_Misses_Cost" ++ }, ++ { ++ "BriefDescription": "Total penalty related to DSB (uop cache) misses - subset/see of/the Instruction_Fetch_BW Bottleneck.", ++ "MetricExpr": "(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * (DSB2MITE_SWITCHES.PENALTY_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + ((IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * (( IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ) / 2) / #((IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))))", ++ "MetricGroup": "DSBmiss;Fed_SMT", ++ "MetricName": "DSB_Misses_Cost_SMT" ++ }, ++ { ++ "BriefDescription": "Number of Instructions per non-speculative DSB miss", ++ "MetricExpr": "INST_RETIRED.ANY / FRONTEND_RETIRED.ANY_DSB_MISS", ++ "MetricGroup": "DSBmiss;Fed", ++ "MetricName": "IpDSB_Miss_Ret" ++ }, ++ { ++ "BriefDescription": "Fraction of branches that are non-taken conditionals", ++ "MetricExpr": "BR_INST_RETIRED.NOT_TAKEN / BR_INST_RETIRED.ALL_BRANCHES", ++ "MetricGroup": "Bad;Branches;CodeGen;PGO", ++ "MetricName": "Cond_NT" ++ }, ++ { ++ "BriefDescription": "Fraction of branches that are taken conditionals", ++ "MetricExpr": "( BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN ) / BR_INST_RETIRED.ALL_BRANCHES", ++ "MetricGroup": "Bad;Branches;CodeGen;PGO", ++ "MetricName": "Cond_TK" ++ }, ++ { ++ "BriefDescription": "Fraction of branches that are CALL or RET", ++ "MetricExpr": "( BR_INST_RETIRED.NEAR_CALL + BR_INST_RETIRED.NEAR_RETURN ) / BR_INST_RETIRED.ALL_BRANCHES", ++ "MetricGroup": "Bad;Branches", ++ "MetricName": "CallRet" ++ }, ++ { ++ "BriefDescription": "Fraction of branches that are unconditional (direct or indirect) jumps", ++ "MetricExpr": "(BR_INST_RETIRED.NEAR_TAKEN - ( BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN ) - 2 * BR_INST_RETIRED.NEAR_CALL) / BR_INST_RETIRED.ALL_BRANCHES", ++ "MetricGroup": "Bad;Branches", ++ "MetricName": "Jump" ++ }, ++ { ++ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load instructions (in core cycles)", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )", +- "MetricGroup": "MemoryBound;MemoryLat", +- "MetricName": "Load_Miss_Real_Latency" ++ "MetricGroup": "Mem;MemoryBound;MemoryLat", ++ "MetricName": "Load_Miss_Real_Latency", ++ "PublicDescription": "Actual Average Latency for L1 data-cache miss demand load instructions (in core cycles). Latency may be overestimated for multi-load instructions - e.g. repeat strings." + }, + { + "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)", + "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES", +- "MetricGroup": "MemoryBound;MemoryBW", ++ "MetricGroup": "Mem;MemoryBound;MemoryBW", + "MetricName": "MLP" + }, + { +- "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", +- "MetricConstraint": "NO_NMI_WATCHDOG", +- "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * CORE_CLKS )", +- "MetricGroup": "MemoryTLB", +- "MetricName": "Page_Walks_Utilization" +- }, +- { + "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]", + "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time", +- "MetricGroup": "MemoryBW", ++ "MetricGroup": "Mem;MemoryBW", + "MetricName": "L1D_Cache_Fill_BW" + }, + { + "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]", + "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time", +- "MetricGroup": "MemoryBW", ++ "MetricGroup": "Mem;MemoryBW", + "MetricName": "L2_Cache_Fill_BW" + }, + { + "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]", + "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time", +- "MetricGroup": "MemoryBW", ++ "MetricGroup": "Mem;MemoryBW", + "MetricName": "L3_Cache_Fill_BW" + }, + { + "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]", + "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1000000000 / duration_time", +- "MetricGroup": "MemoryBW;Offcore", ++ "MetricGroup": "Mem;MemoryBW;Offcore", + "MetricName": "L3_Cache_Access_BW" + }, + { + "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads", + "MetricExpr": "1000 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY", +- "MetricGroup": "CacheMisses", ++ "MetricGroup": "Mem;CacheMisses", + "MetricName": "L1MPKI" + }, + { ++ "BriefDescription": "L1 cache true misses per kilo instruction for all demand loads (including speculative)", ++ "MetricExpr": "1000 * L2_RQSTS.ALL_DEMAND_DATA_RD / INST_RETIRED.ANY", ++ "MetricGroup": "Mem;CacheMisses", ++ "MetricName": "L1MPKI_Load" ++ }, ++ { + "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads", + "MetricExpr": "1000 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY", +- "MetricGroup": "CacheMisses", ++ "MetricGroup": "Mem;Backend;CacheMisses", + "MetricName": "L2MPKI" + }, + { + "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)", + "MetricExpr": "1000 * L2_RQSTS.MISS / INST_RETIRED.ANY", +- "MetricGroup": "CacheMisses;Offcore", ++ "MetricGroup": "Mem;CacheMisses;Offcore", + "MetricName": "L2MPKI_All" + }, + { ++ "BriefDescription": "L2 cache misses per kilo instruction for all demand loads (including speculative)", ++ "MetricExpr": "1000 * L2_RQSTS.DEMAND_DATA_RD_MISS / INST_RETIRED.ANY", ++ "MetricGroup": "Mem;CacheMisses", ++ "MetricName": "L2MPKI_Load" ++ }, ++ { + "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)", + "MetricExpr": "1000 * ( L2_RQSTS.REFERENCES - L2_RQSTS.MISS ) / INST_RETIRED.ANY", +- "MetricGroup": "CacheMisses", ++ "MetricGroup": "Mem;CacheMisses", + "MetricName": "L2HPKI_All" + }, + { ++ "BriefDescription": "L2 cache hits per kilo instruction for all demand loads (including speculative)", ++ "MetricExpr": "1000 * L2_RQSTS.DEMAND_DATA_RD_HIT / INST_RETIRED.ANY", ++ "MetricGroup": "Mem;CacheMisses", ++ "MetricName": "L2HPKI_Load" ++ }, ++ { + "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads", + "MetricExpr": "1000 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY", +- "MetricGroup": "CacheMisses", ++ "MetricGroup": "Mem;CacheMisses", + "MetricName": "L3MPKI" + }, + { ++ "BriefDescription": "Fill Buffer (FB) true hits per kilo instructions for retired demand loads", ++ "MetricExpr": "1000 * MEM_LOAD_RETIRED.FB_HIT / INST_RETIRED.ANY", ++ "MetricGroup": "Mem;CacheMisses", ++ "MetricName": "FB_HPKI" ++ }, ++ { ++ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", ++ "MetricConstraint": "NO_NMI_WATCHDOG", ++ "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * CPU_CLK_UNHALTED.THREAD )", ++ "MetricGroup": "Mem;MemoryTLB", ++ "MetricName": "Page_Walks_Utilization" ++ }, ++ { ++ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", ++ "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ) )", ++ "MetricGroup": "Mem;MemoryTLB_SMT", ++ "MetricName": "Page_Walks_Utilization_SMT" ++ }, ++ { + "BriefDescription": "Rate of silent evictions from the L2 cache per Kilo instruction where the evicted lines are dropped (no writeback to L3 or memory)", + "MetricExpr": "1000 * L2_LINES_OUT.SILENT / INST_RETIRED.ANY", +- "MetricGroup": "L2Evicts;Server", ++ "MetricGroup": "L2Evicts;Mem;Server", + "MetricName": "L2_Evictions_Silent_PKI" + }, + { + "BriefDescription": "Rate of non silent evictions from the L2 cache per Kilo instruction", + "MetricExpr": "1000 * L2_LINES_OUT.NON_SILENT / INST_RETIRED.ANY", +- "MetricGroup": "L2Evicts;Server", ++ "MetricGroup": "L2Evicts;Mem;Server", + "MetricName": "L2_Evictions_NonSilent_PKI" + }, + { +@@ -219,7 +532,7 @@ + { + "BriefDescription": "Giga Floating Point Operations Per Second", + "MetricExpr": "( ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / 1000000000 ) / duration_time", +- "MetricGroup": "Flops;HPC", ++ "MetricGroup": "Cor;Flops;HPC", + "MetricName": "GFLOPs" + }, + { +@@ -229,6 +542,48 @@ + "MetricName": "Turbo_Utilization" + }, + { ++ "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0", ++ "MetricExpr": "CORE_POWER.LVL0_TURBO_LICENSE / CPU_CLK_UNHALTED.THREAD", ++ "MetricGroup": "Power", ++ "MetricName": "Power_License0_Utilization", ++ "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0. This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes." ++ }, ++ { ++ "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0. SMT version; use when SMT is enabled and measuring per logical CPU.", ++ "MetricExpr": "CORE_POWER.LVL0_TURBO_LICENSE / 2 / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", ++ "MetricGroup": "Power_SMT", ++ "MetricName": "Power_License0_Utilization_SMT", ++ "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0. This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes. SMT version; use when SMT is enabled and measuring per logical CPU." ++ }, ++ { ++ "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1", ++ "MetricExpr": "CORE_POWER.LVL1_TURBO_LICENSE / CPU_CLK_UNHALTED.THREAD", ++ "MetricGroup": "Power", ++ "MetricName": "Power_License1_Utilization", ++ "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1. This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions." ++ }, ++ { ++ "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1. SMT version; use when SMT is enabled and measuring per logical CPU.", ++ "MetricExpr": "CORE_POWER.LVL1_TURBO_LICENSE / 2 / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", ++ "MetricGroup": "Power_SMT", ++ "MetricName": "Power_License1_Utilization_SMT", ++ "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1. This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions. SMT version; use when SMT is enabled and measuring per logical CPU." ++ }, ++ { ++ "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX)", ++ "MetricExpr": "CORE_POWER.LVL2_TURBO_LICENSE / CPU_CLK_UNHALTED.THREAD", ++ "MetricGroup": "Power", ++ "MetricName": "Power_License2_Utilization", ++ "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX). This includes high current AVX 512-bit instructions." ++ }, ++ { ++ "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX). SMT version; use when SMT is enabled and measuring per logical CPU.", ++ "MetricExpr": "CORE_POWER.LVL2_TURBO_LICENSE / 2 / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", ++ "MetricGroup": "Power_SMT", ++ "MetricName": "Power_License2_Utilization_SMT", ++ "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX). This includes high current AVX 512-bit instructions. SMT version; use when SMT is enabled and measuring per logical CPU." ++ }, ++ { + "BriefDescription": "Fraction of cycles where both hardware Logical Processors were active", + "MetricExpr": "1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", + "MetricGroup": "SMT", +@@ -241,33 +596,45 @@ + "MetricName": "Kernel_Utilization" + }, + { ++ "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode", ++ "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k", ++ "MetricGroup": "OS", ++ "MetricName": "Kernel_CPI" ++ }, ++ { + "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]", + "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time", +- "MetricGroup": "HPC;MemoryBW;SoC", ++ "MetricGroup": "HPC;Mem;MemoryBW;SoC", + "MetricName": "DRAM_BW_Use" + }, + { + "BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches", + "MetricExpr": "1000000000 * ( cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433@ / cha@event\\=0x35\\,umask\\=0x21\\,config\\=0x40433@ ) / ( cha_0@event\\=0x0@ / duration_time )", +- "MetricGroup": "MemoryLat;SoC", ++ "MetricGroup": "Mem;MemoryLat;SoC", + "MetricName": "MEM_Read_Latency" + }, + { + "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches", + "MetricExpr": "cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433@ / cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433\\,thresh\\=1@", +- "MetricGroup": "MemoryBW;SoC", ++ "MetricGroup": "Mem;MemoryBW;SoC", + "MetricName": "MEM_Parallel_Reads" + }, + { ++ "BriefDescription": "Average latency of data read request to external DRAM memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches", ++ "MetricExpr": "1000000000 * ( UNC_M_RPQ_OCCUPANCY / UNC_M_RPQ_INSERTS ) / imc_0@event\\=0x0@", ++ "MetricGroup": "Mem;MemoryLat;SoC;Server", ++ "MetricName": "MEM_DRAM_Read_Latency" ++ }, ++ { + "BriefDescription": "Average IO (network or disk) Bandwidth Use for Writes [GB / sec]", + "MetricExpr": "( UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3 ) * 4 / 1000000000 / duration_time", +- "MetricGroup": "IoBW;SoC;Server", ++ "MetricGroup": "IoBW;Mem;SoC;Server", + "MetricName": "IO_Write_BW" + }, + { + "BriefDescription": "Average IO (network or disk) Bandwidth Use for Reads [GB / sec]", + "MetricExpr": "( UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3 ) * 4 / 1000000000 / duration_time", +- "MetricGroup": "IoBW;SoC;Server", ++ "MetricGroup": "IoBW;Mem;SoC;Server", + "MetricName": "IO_Read_BW" + }, + { +--- a/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json ++++ b/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json +@@ -538,6 +538,18 @@ + "Unit": "IIO" + }, + { ++ "BriefDescription": "PCIe Completion Buffer Inserts of completions with data: Part 0-3", ++ "Counter": "0,1,2,3", ++ "EventCode": "0xC2", ++ "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.ALL_PARTS", ++ "FCMask": "0x4", ++ "PerPkg": "1", ++ "PortMask": "0x0f", ++ "PublicDescription": "PCIe Completion Buffer Inserts of completions with data: Part 0-3", ++ "UMask": "0x03", ++ "Unit": "IIO" ++ }, ++ { + "BriefDescription": "PCIe Completion Buffer Inserts of completions with data: Part 0", + "Counter": "0,1,2,3", + "EventCode": "0xC2", +@@ -586,6 +598,17 @@ + "Unit": "IIO" + }, + { ++ "BriefDescription": "PCIe Completion Buffer occupancy of completions with data: Part 0-3", ++ "Counter": "2,3", ++ "EventCode": "0xD5", ++ "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.ALL_PARTS", ++ "FCMask": "0x04", ++ "PerPkg": "1", ++ "PublicDescription": "PCIe Completion Buffer occupancy of completions with data: Part 0-3", ++ "UMask": "0x0f", ++ "Unit": "IIO" ++ }, ++ { + "BriefDescription": "PCIe Completion Buffer occupancy of completions with data: Part 0", + "Counter": "2,3", + "EventCode": "0xD5", diff --git a/queue-5.17/pinctrl-canonical-rsel-resistance-selection-property.patch b/queue-5.17/pinctrl-canonical-rsel-resistance-selection-property.patch new file mode 100644 index 00000000000..e3b4c996975 --- /dev/null +++ b/queue-5.17/pinctrl-canonical-rsel-resistance-selection-property.patch @@ -0,0 +1,31 @@ +From 7966c5051fc7d52425155ab30ad568d9d97f3b02 Mon Sep 17 00:00:00 2001 +From: Guodong Liu +Date: Wed, 16 Feb 2022 11:21:24 +0800 +Subject: pinctrl: canonical rsel resistance selection property + +From: Guodong Liu + +commit 7966c5051fc7d52425155ab30ad568d9d97f3b02 upstream. + +Change "mediatek,rsel_resistance_in_si_unit" to "mediatek,rsel-resistance-in-si-unit" + +Fixes: fb34a9ae383a ("pinctrl: mediatek: support rsel feature") +Signed-off-by: Guodong Liu +Link: https://lore.kernel.org/r/20220216032124.28067-4-guodong.liu@mediatek.com +Signed-off-by: Linus Walleij +Signed-off-by: Greg Kroah-Hartman +--- + drivers/pinctrl/mediatek/pinctrl-paris.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/pinctrl/mediatek/pinctrl-paris.c ++++ b/drivers/pinctrl/mediatek/pinctrl-paris.c +@@ -984,7 +984,7 @@ int mtk_paris_pinctrl_probe(struct platf + hw->nbase = hw->soc->nbase_names; + + if (of_find_property(hw->dev->of_node, +- "mediatek,rsel_resistance_in_si_unit", NULL)) ++ "mediatek,rsel-resistance-in-si-unit", NULL)) + hw->rsel_si_unit = true; + else + hw->rsel_si_unit = false; diff --git a/queue-5.17/series b/queue-5.17/series index e13363aee81..07c22e1991e 100644 --- a/queue-5.17/series +++ b/queue-5.17/series @@ -1098,3 +1098,22 @@ dt-bindings-pinctrl-mt8195-fix-bias-pull-up-down-checks.patch dt-bindings-pinctrl-pinctrl-microchip-sgpio-fix-example.patch ubi-fastmap-return-error-code-if-memory-allocation-fails-in-add_aeb.patch net-preserve-skb_end_offset-in-skb_unclone_keeptruesize.patch +asoc-sof-intel-fix-build-error-without-snd_soc_sof_pci_dev.patch +asoc-topology-allow-tlv-control-to-be-either-read-or-write.patch +perf-vendor-events-update-metrics-for-skylake-server.patch +media-ov6650-add-try-support-to-selection-api-operations.patch +media-ov6650-fix-crop-rectangle-affected-by-set-format.patch +pinctrl-canonical-rsel-resistance-selection-property.patch +spi-mediatek-support-tick_delay-without-enhance_timing.patch +arm-dts-spear1340-update-serial-node-properties.patch +arm-dts-spear13xx-update-spi-dma-properties.patch +arm64-dts-ls1043a-update-i2c-dma-properties.patch +arm64-dts-ls1046a-update-i2c-node-dma-properties.patch +um-fix-uml_mconsole-stop-go.patch +docs-sysctl-kernel-add-missing-bit-to-panic_print.patch +xsk-do-not-write-null-in-sw-ring-at-allocation-failure.patch +ice-xsk-fix-indexing-in-ice_tx_xsk_pool.patch +vdpa-mlx5-avoid-processing-works-if-workqueue-was-destroyed.patch +openvswitch-fixed-nd-target-mask-field-in-the-flow-dump.patch +torture-make-torture.sh-help-message-match-reality.patch +n64cart-convert-bi_disk-to-bi_bdev-bd_disk-fix-build.patch diff --git a/queue-5.17/spi-mediatek-support-tick_delay-without-enhance_timing.patch b/queue-5.17/spi-mediatek-support-tick_delay-without-enhance_timing.patch new file mode 100644 index 00000000000..7aa648b0116 --- /dev/null +++ b/queue-5.17/spi-mediatek-support-tick_delay-without-enhance_timing.patch @@ -0,0 +1,54 @@ +From 03b1be379dcee2e9c866c2a455a1a4a9581b3efd Mon Sep 17 00:00:00 2001 +From: Leilk Liu +Date: Tue, 15 Mar 2022 11:24:06 +0800 +Subject: spi: mediatek: support tick_delay without enhance_timing + +From: Leilk Liu + +commit 03b1be379dcee2e9c866c2a455a1a4a9581b3efd upstream. + +this patch support tick_delay bit[31:30] without enhance_timing feature. + +Fixes: f84d866ab43f("spi: mediatek: add tick_delay support") +Signed-off-by: Leilk Liu +Reviewed-by: AngeloGioacchino Del Regno +Link: https://lore.kernel.org/r/20220315032411.2826-2-leilk.liu@mediatek.com +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman +--- + drivers/spi/spi-mt65xx.c | 15 ++++++++++++--- + 1 file changed, 12 insertions(+), 3 deletions(-) + +--- a/drivers/spi/spi-mt65xx.c ++++ b/drivers/spi/spi-mt65xx.c +@@ -43,8 +43,11 @@ + #define SPI_CFG1_PACKET_LOOP_OFFSET 8 + #define SPI_CFG1_PACKET_LENGTH_OFFSET 16 + #define SPI_CFG1_GET_TICK_DLY_OFFSET 29 ++#define SPI_CFG1_GET_TICK_DLY_OFFSET_V1 30 + + #define SPI_CFG1_GET_TICK_DLY_MASK 0xe0000000 ++#define SPI_CFG1_GET_TICK_DLY_MASK_V1 0xc0000000 ++ + #define SPI_CFG1_CS_IDLE_MASK 0xff + #define SPI_CFG1_PACKET_LOOP_MASK 0xff00 + #define SPI_CFG1_PACKET_LENGTH_MASK 0x3ff0000 +@@ -346,9 +349,15 @@ static int mtk_spi_prepare_message(struc + + /* tick delay */ + reg_val = readl(mdata->base + SPI_CFG1_REG); +- reg_val &= ~SPI_CFG1_GET_TICK_DLY_MASK; +- reg_val |= ((chip_config->tick_delay & 0x7) +- << SPI_CFG1_GET_TICK_DLY_OFFSET); ++ if (mdata->dev_comp->enhance_timing) { ++ reg_val &= ~SPI_CFG1_GET_TICK_DLY_MASK; ++ reg_val |= ((chip_config->tick_delay & 0x7) ++ << SPI_CFG1_GET_TICK_DLY_OFFSET); ++ } else { ++ reg_val &= ~SPI_CFG1_GET_TICK_DLY_MASK_V1; ++ reg_val |= ((chip_config->tick_delay & 0x3) ++ << SPI_CFG1_GET_TICK_DLY_OFFSET_V1); ++ } + writel(reg_val, mdata->base + SPI_CFG1_REG); + + /* set hw cs timing */ diff --git a/queue-5.17/torture-make-torture.sh-help-message-match-reality.patch b/queue-5.17/torture-make-torture.sh-help-message-match-reality.patch new file mode 100644 index 00000000000..fc60749e7a2 --- /dev/null +++ b/queue-5.17/torture-make-torture.sh-help-message-match-reality.patch @@ -0,0 +1,35 @@ +From f233673cd32a048f2eed69e56b61174c33fb740b Mon Sep 17 00:00:00 2001 +From: "Paul E. McKenney" +Date: Thu, 27 Jan 2022 09:39:15 -0800 +Subject: torture: Make torture.sh help message match reality + +From: Paul E. McKenney + +commit f233673cd32a048f2eed69e56b61174c33fb740b upstream. + +This commit fixes a couple of typos: s/--doall/--do-all/ and +s/--doallmodconfig/--do-allmodconfig/. + +[ paulmck: Add Fixes: supplied by Paul Menzel. ] + +Fixes: a115a775a8d5 ("torture: Add "make allmodconfig" to torture.sh") +Reported-by: Paul Menzel +Signed-off-by: Paul E. McKenney +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/rcutorture/bin/torture.sh | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/tools/testing/selftests/rcutorture/bin/torture.sh ++++ b/tools/testing/selftests/rcutorture/bin/torture.sh +@@ -71,8 +71,8 @@ usage () { + echo " --configs-rcutorture \"config-file list w/ repeat factor (3*TINY01)\"" + echo " --configs-locktorture \"config-file list w/ repeat factor (10*LOCK01)\"" + echo " --configs-scftorture \"config-file list w/ repeat factor (2*CFLIST)\"" +- echo " --doall" +- echo " --doallmodconfig / --do-no-allmodconfig" ++ echo " --do-all" ++ echo " --do-allmodconfig / --do-no-allmodconfig" + echo " --do-clocksourcewd / --do-no-clocksourcewd" + echo " --do-kasan / --do-no-kasan" + echo " --do-kcsan / --do-no-kcsan" diff --git a/queue-5.17/um-fix-uml_mconsole-stop-go.patch b/queue-5.17/um-fix-uml_mconsole-stop-go.patch new file mode 100644 index 00000000000..5343d6f399a --- /dev/null +++ b/queue-5.17/um-fix-uml_mconsole-stop-go.patch @@ -0,0 +1,39 @@ +From 1a3a6a2a035bb6c3a7ef4c788d8fd69a7b2d6284 Mon Sep 17 00:00:00 2001 +From: Anton Ivanov +Date: Tue, 22 Feb 2022 12:44:10 +0000 +Subject: um: Fix uml_mconsole stop/go + +From: Anton Ivanov + +commit 1a3a6a2a035bb6c3a7ef4c788d8fd69a7b2d6284 upstream. + +Moving to an EPOLL based IRQ controller broke uml_mconsole stop/go +commands. This fixes it and restores stop/go functionality. + +Fixes: ff6a17989c08 ("Epoll based IRQ controller") +Signed-off-by: Anton Ivanov +Signed-off-by: Richard Weinberger +Signed-off-by: Greg Kroah-Hartman +--- + arch/um/drivers/mconsole_kern.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/arch/um/drivers/mconsole_kern.c ++++ b/arch/um/drivers/mconsole_kern.c +@@ -224,7 +224,7 @@ void mconsole_go(struct mc_request *req) + + void mconsole_stop(struct mc_request *req) + { +- deactivate_fd(req->originating_fd, MCONSOLE_IRQ); ++ block_signals(); + os_set_fd_block(req->originating_fd, 1); + mconsole_reply(req, "stopped", 0, 0); + for (;;) { +@@ -247,6 +247,7 @@ void mconsole_stop(struct mc_request *re + } + os_set_fd_block(req->originating_fd, 0); + mconsole_reply(req, "", 0, 0); ++ unblock_signals(); + } + + static DEFINE_SPINLOCK(mc_devices_lock); diff --git a/queue-5.17/vdpa-mlx5-avoid-processing-works-if-workqueue-was-destroyed.patch b/queue-5.17/vdpa-mlx5-avoid-processing-works-if-workqueue-was-destroyed.patch new file mode 100644 index 00000000000..03414892e53 --- /dev/null +++ b/queue-5.17/vdpa-mlx5-avoid-processing-works-if-workqueue-was-destroyed.patch @@ -0,0 +1,52 @@ +From ad6dc1daaf29f97f23cc810d60ee01c0e83f4c6b Mon Sep 17 00:00:00 2001 +From: Eli Cohen +Date: Mon, 21 Mar 2022 16:13:03 +0200 +Subject: vdpa/mlx5: Avoid processing works if workqueue was destroyed + +From: Eli Cohen + +commit ad6dc1daaf29f97f23cc810d60ee01c0e83f4c6b upstream. + +If mlx5_vdpa gets unloaded while a VM is running, the workqueue will be +destroyed. However, vhost might still have reference to the kick +function and might attempt to push new works. This could lead to null +pointer dereference. + +To fix this, set mvdev->wq to NULL just before destroying and verify +that the workqueue is not NULL in mlx5_vdpa_kick_vq before attempting to +push a new work. + +Fixes: 5262912ef3cf ("vdpa/mlx5: Add support for control VQ and MAC setting") +Signed-off-by: Eli Cohen +Link: https://lore.kernel.org/r/20220321141303.9586-1-elic@nvidia.com +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/vdpa/mlx5/net/mlx5_vnet.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c ++++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c +@@ -1669,7 +1669,7 @@ static void mlx5_vdpa_kick_vq(struct vdp + return; + + if (unlikely(is_ctrl_vq_idx(mvdev, idx))) { +- if (!mvdev->cvq.ready) ++ if (!mvdev->wq || !mvdev->cvq.ready) + return; + + wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC); +@@ -2707,9 +2707,12 @@ static void mlx5_vdpa_dev_del(struct vdp + struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); + struct mlx5_vdpa_dev *mvdev = to_mvdev(dev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); ++ struct workqueue_struct *wq; + + mlx5_notifier_unregister(mvdev->mdev, &ndev->nb); +- destroy_workqueue(mvdev->wq); ++ wq = mvdev->wq; ++ mvdev->wq = NULL; ++ destroy_workqueue(wq); + _vdpa_unregister_device(dev); + mgtdev->ndev = NULL; + } diff --git a/queue-5.17/xsk-do-not-write-null-in-sw-ring-at-allocation-failure.patch b/queue-5.17/xsk-do-not-write-null-in-sw-ring-at-allocation-failure.patch new file mode 100644 index 00000000000..52662dc9769 --- /dev/null +++ b/queue-5.17/xsk-do-not-write-null-in-sw-ring-at-allocation-failure.patch @@ -0,0 +1,50 @@ +From a95a4d9b39b0324402569ed7395aae59b8fd2b11 Mon Sep 17 00:00:00 2001 +From: Magnus Karlsson +Date: Mon, 28 Mar 2022 16:21:20 +0200 +Subject: xsk: Do not write NULL in SW ring at allocation failure + +From: Magnus Karlsson + +commit a95a4d9b39b0324402569ed7395aae59b8fd2b11 upstream. + +For the case when xp_alloc_batch() is used but the batched allocation +cannot be used, there is a slow path that uses the non-batched +xp_alloc(). When it fails to allocate an entry, it returns NULL. The +current code wrote this NULL into the entry of the provided results +array (pointer to the driver SW ring usually) and returned. This might +not be what the driver expects and to make things simpler, just write +successfully allocated xdp_buffs into the SW ring,. The driver might +have information in there that is still important after an allocation +failure. + +Note that at this point in time, there are no drivers using +xp_alloc_batch() that could trigger this slow path. But one might get +added. + +Fixes: 47e4075df300 ("xsk: Batched buffer allocation for the pool") +Signed-off-by: Magnus Karlsson +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20220328142123.170157-2-maciej.fijalkowski@intel.com +Signed-off-by: Greg Kroah-Hartman +--- + net/xdp/xsk_buff_pool.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/net/xdp/xsk_buff_pool.c ++++ b/net/xdp/xsk_buff_pool.c +@@ -584,9 +584,13 @@ u32 xp_alloc_batch(struct xsk_buff_pool + u32 nb_entries1 = 0, nb_entries2; + + if (unlikely(pool->dma_need_sync)) { ++ struct xdp_buff *buff; ++ + /* Slow path */ +- *xdp = xp_alloc(pool); +- return !!*xdp; ++ buff = xp_alloc(pool); ++ if (buff) ++ *xdp = buff; ++ return !!buff; + } + + if (unlikely(pool->free_list_cnt)) {