]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 6.5
authorSasha Levin <sashal@kernel.org>
Thu, 14 Sep 2023 01:14:21 +0000 (21:14 -0400)
committerSasha Levin <sashal@kernel.org>
Thu, 14 Sep 2023 01:14:21 +0000 (21:14 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
122 files changed:
queue-6.5/accel-ivpu-refactor-deprecated-strncpy.patch [new file with mode: 0644]
queue-6.5/af_unix-fix-data-race-around-sk-sk_err.patch [new file with mode: 0644]
queue-6.5/af_unix-fix-data-race-around-unix_tot_inflight.patch [new file with mode: 0644]
queue-6.5/af_unix-fix-data-races-around-sk-sk_shutdown.patch [new file with mode: 0644]
queue-6.5/af_unix-fix-data-races-around-user-unix_inflight.patch [new file with mode: 0644]
queue-6.5/af_unix-fix-msg_controllen-test-in-scm_pidfd_recv-fo.patch [new file with mode: 0644]
queue-6.5/backlight-gpio_backlight-drop-output-gpio-direction-.patch [new file with mode: 0644]
queue-6.5/backlight-lp855x-initialize-pwm-state-on-first-brigh.patch [new file with mode: 0644]
queue-6.5/blk-throttle-consider-carryover_ios-bytes-in-throtl_.patch [new file with mode: 0644]
queue-6.5/blk-throttle-use-calculate_io-bytes_allowed-for-thro.patch [new file with mode: 0644]
queue-6.5/bpf-assign-bpf_tramp_run_ctx-saved_run_ctx-before-re.patch [new file with mode: 0644]
queue-6.5/bpf-bpf_sk_storage-fix-invalid-wait-context-lockdep-.patch [new file with mode: 0644]
queue-6.5/bpf-bpf_sk_storage-fix-the-missing-uncharge-in-sk_om.patch [new file with mode: 0644]
queue-6.5/bpf-invoke-__bpf_prog_exit_sleepable_recur-on-recurs.patch [new file with mode: 0644]
queue-6.5/bpf-sockmap-fix-skb-refcnt-race-after-locking-change.patch [new file with mode: 0644]
queue-6.5/ceph-make-members-in-struct-ceph_mds_request_args_ex.patch [new file with mode: 0644]
queue-6.5/drm-amd-display-fix-mode-scaling-rmx_.patch [new file with mode: 0644]
queue-6.5/drm-i915-gvt-drop-unused-helper-intel_vgpu_reset_gtt.patch [new file with mode: 0644]
queue-6.5/drm-i915-gvt-put-the-page-reference-obtained-by-kvm-.patch [new file with mode: 0644]
queue-6.5/drm-i915-gvt-verify-pfn-is-valid-before-dereferencin.patch [new file with mode: 0644]
queue-6.5/drm-i915-mark-requests-for-guc-virtual-engines-to-av.patch [new file with mode: 0644]
queue-6.5/gfs2-low-memory-forced-flush-fixes.patch [new file with mode: 0644]
queue-6.5/gfs2-switch-to-wait_event-in-gfs2_logd.patch [new file with mode: 0644]
queue-6.5/gve-fix-frag_list-chaining.patch [new file with mode: 0644]
queue-6.5/i3c-master-svc-describe-member-saved_regs.patch [new file with mode: 0644]
queue-6.5/idr-fix-param-name-in-idr_alloc_cyclic-doc.patch [new file with mode: 0644]
queue-6.5/igb-change-igb_min-to-allow-set-rx-tx-value-between-.patch [new file with mode: 0644]
queue-6.5/igb-disable-virtualization-features-on-82580.patch [new file with mode: 0644]
queue-6.5/igbvf-change-igbvf_min-to-allow-set-rx-tx-value-betw.patch [new file with mode: 0644]
queue-6.5/igc-change-igc_min-to-allow-set-rx-tx-value-between-.patch [new file with mode: 0644]
queue-6.5/input-iqs7222-configure-power-mode-before-triggering.patch [new file with mode: 0644]
queue-6.5/input-tca6416-keypad-always-expect-proper-irq-number.patch [new file with mode: 0644]
queue-6.5/input-tca6416-keypad-fix-interrupt-enable-disbalance.patch [new file with mode: 0644]
queue-6.5/ip_tunnels-use-dev_stats_inc.patch [new file with mode: 0644]
queue-6.5/ipv4-annotate-data-races-around-fi-fib_dead.patch [new file with mode: 0644]
queue-6.5/ipv4-ignore-dst-hint-for-multipath-routes.patch [new file with mode: 0644]
queue-6.5/ipv6-ignore-dst-hint-for-multipath-routes.patch [new file with mode: 0644]
queue-6.5/kbuild-do-not-run-depmod-for-make-modules_sign.patch [new file with mode: 0644]
queue-6.5/kbuild-dummy-tools-make-mprofile_kernel-checks-work-.patch [new file with mode: 0644]
queue-6.5/kbuild-rpm-pkg-define-_arch-conditionally.patch [new file with mode: 0644]
queue-6.5/kcm-destroy-mutex-in-kcm_exit_net.patch [new file with mode: 0644]
queue-6.5/kconfig-fix-possible-buffer-overflow.patch [new file with mode: 0644]
queue-6.5/kvm-svm-correct-the-size-of-spec_ctrl-field-in-vmcb-.patch [new file with mode: 0644]
queue-6.5/kvm-svm-don-t-defer-nmi-unblocking-until-next-exit-f.patch [new file with mode: 0644]
queue-6.5/mailbox-qcom-ipcc-fix-incorrect-num_chans-counting.patch [new file with mode: 0644]
queue-6.5/mlx5-core-e-switch-create-acl-ft-for-eswitch-manager.patch [new file with mode: 0644]
queue-6.5/mptcp-annotate-data-races-around-msk-rmem_fwd_alloc.patch [new file with mode: 0644]
queue-6.5/net-annotate-data-races-around-sk-sk_bind_phc.patch [new file with mode: 0644]
queue-6.5/net-annotate-data-races-around-sk-sk_forward_alloc.patch [new file with mode: 0644]
queue-6.5/net-annotate-data-races-around-sk-sk_tsflags.patch [new file with mode: 0644]
queue-6.5/net-dsa-sja1105-complete-tc-cbs-offload-support-on-s.patch [new file with mode: 0644]
queue-6.5/net-dsa-sja1105-fix-bandwidth-discrepancy-between-tc.patch [new file with mode: 0644]
queue-6.5/net-dsa-sja1105-fix-enospc-when-replacing-the-same-t.patch [new file with mode: 0644]
queue-6.5/net-enetc-distinguish-error-from-valid-pointers-in-e.patch [new file with mode: 0644]
queue-6.5/net-fib-avoid-warn-splat-in-flow-dissector.patch [new file with mode: 0644]
queue-6.5/net-handshake-fix-null-ptr-deref-in-handshake_nl_don.patch [new file with mode: 0644]
queue-6.5/net-hns3-fix-byte-order-conversion-issue-in-hclge_db.patch [new file with mode: 0644]
queue-6.5/net-hns3-fix-debugfs-concurrency-issue-between-kfree.patch [new file with mode: 0644]
queue-6.5/net-hns3-fix-invalid-mutex-between-tc-qdisc-and-dcb-.patch [new file with mode: 0644]
queue-6.5/net-hns3-fix-the-port-information-display-when-sfp-i.patch [new file with mode: 0644]
queue-6.5/net-hns3-fix-tx-timeout-issue.patch [new file with mode: 0644]
queue-6.5/net-hns3-remove-gso-partial-feature-bit.patch [new file with mode: 0644]
queue-6.5/net-ipv6-addrconf-avoid-integer-underflow-in-ipv6_cr.patch [new file with mode: 0644]
queue-6.5/net-mlx5-give-esw_offloads_load-unload_rep-mlx5_-pre.patch [new file with mode: 0644]
queue-6.5/net-mlx5-push-devlink-port-pf-vf-init-cleanup-calls-.patch [new file with mode: 0644]
queue-6.5/net-mlx5-rework-devlink-port-alloc-free-into-init-cl.patch [new file with mode: 0644]
queue-6.5/net-mlx5e-clear-mirred-devices-array-if-the-rule-is-.patch [new file with mode: 0644]
queue-6.5/net-phy-micrel-correct-bit-assignments-for-phy_devic.patch [new file with mode: 0644]
queue-6.5/net-phy-provide-module-4-ksz9477-errata-ds80000754c.patch [new file with mode: 0644]
queue-6.5/net-phylink-fix-sphinx-complaint-about-invalid-liter.patch [new file with mode: 0644]
queue-6.5/net-read-sk-sk_family-once-in-sk_mc_loop.patch [new file with mode: 0644]
queue-6.5/net-sched-fq_pie-avoid-stalls-in-fq_pie_timer.patch [new file with mode: 0644]
queue-6.5/net-sched-sch_qfq-fix-uaf-in-qfq_dequeue.patch [new file with mode: 0644]
queue-6.5/net-use-sk_forward_alloc_get-in-sk_get_meminfo.patch [new file with mode: 0644]
queue-6.5/netfilter-nf_tables-audit-log-rule-reset.patch [new file with mode: 0644]
queue-6.5/netfilter-nf_tables-audit-log-setelem-reset.patch [new file with mode: 0644]
queue-6.5/netfilter-nf_tables-unbreak-audit-log-reset.patch [new file with mode: 0644]
queue-6.5/netfilter-nfnetlink_osf-avoid-oob-read.patch [new file with mode: 0644]
queue-6.5/netfilter-nft_set_rbtree-skip-sync-gc-for-new-elemen.patch [new file with mode: 0644]
queue-6.5/netfilter-nftables-exthdr-fix-4-byte-stack-oob-write.patch [new file with mode: 0644]
queue-6.5/octeontx2-af-fix-truncation-of-smq-in-cn10k-nix-aq-e.patch [new file with mode: 0644]
queue-6.5/perf-annotate-bpf-don-t-enclose-non-debug-code-with-.patch [new file with mode: 0644]
queue-6.5/perf-bpf-filter-fix-sample-flag-check-with.patch [new file with mode: 0644]
queue-6.5/perf-dlfilter-add-al_cleanup.patch [new file with mode: 0644]
queue-6.5/perf-dlfilter-initialize-addr_location-before-passin.patch [new file with mode: 0644]
queue-6.5/perf-header-fix-missing-pmu-caps.patch [new file with mode: 0644]
queue-6.5/perf-lock-don-t-pass-an-err_ptr-directly-to-perf_ses.patch [new file with mode: 0644]
queue-6.5/perf-parse-events-additional-error-reporting.patch [new file with mode: 0644]
queue-6.5/perf-parse-events-move-instances-of-yyabort-to-yynom.patch [new file with mode: 0644]
queue-6.5/perf-parse-events-separate-enomem-memory-handling.patch [new file with mode: 0644]
queue-6.5/perf-parse-events-separate-yyabort-and-yynomem-cases.patch [new file with mode: 0644]
queue-6.5/perf-script-print-cgroup-field-on-the-same-line-as-c.patch [new file with mode: 0644]
queue-6.5/perf-test-stat_bpf_counters_cgrp-enhance-perf-stat-c.patch [new file with mode: 0644]
queue-6.5/perf-test-stat_bpf_counters_cgrp-fix-shellcheck-issu.patch [new file with mode: 0644]
queue-6.5/perf-top-don-t-pass-an-err_ptr-directly-to-perf_sess.patch [new file with mode: 0644]
queue-6.5/perf-trace-really-free-the-evsel-priv-area.patch [new file with mode: 0644]
queue-6.5/perf-vendor-events-arm64-remove-l1d_cache_lmiss-from.patch [new file with mode: 0644]
queue-6.5/perf-vendor-events-drop-some-of-the-json-events-for-.patch [new file with mode: 0644]
queue-6.5/perf-vendor-events-drop-stores_per_inst-metric-event.patch [new file with mode: 0644]
queue-6.5/perf-vendor-events-move-json-events-to-appropriate-f.patch [new file with mode: 0644]
queue-6.5/perf-vendor-events-update-metric-event-names-for-pow.patch [new file with mode: 0644]
queue-6.5/perf-vendor-events-update-the-json-events-descriptio.patch [new file with mode: 0644]
queue-6.5/pwm-atmel-tcb-fix-resource-freeing-in-error-path-and.patch [new file with mode: 0644]
queue-6.5/pwm-atmel-tcb-harmonize-resource-allocation-order.patch [new file with mode: 0644]
queue-6.5/pwm-lpc32xx-remove-handling-of-pwm-channels.patch [new file with mode: 0644]
queue-6.5/regulator-tps6287x-fix-n_voltages.patch [new file with mode: 0644]
queue-6.5/regulator-tps6594-regulator-fix-random-kernel-crash.patch [new file with mode: 0644]
queue-6.5/s390-bpf-pass-through-tail-call-counter-in-trampolin.patch [new file with mode: 0644]
queue-6.5/s390-zcrypt-don-t-leak-memory-if-dev_set_name-fails.patch [new file with mode: 0644]
queue-6.5/sctp-annotate-data-races-around-sk-sk_wmem_queued.patch [new file with mode: 0644]
queue-6.5/selftests-bpf-fix-a-ci-failure-caused-by-vsock-write.patch [new file with mode: 0644]
queue-6.5/selftests-bpf-fix-flaky-cgroup_iter_sleepable-subtes.patch [new file with mode: 0644]
queue-6.5/series
queue-6.5/sh-boards-fix-ceu-buffer-size-passed-to-dma_declare_.patch [new file with mode: 0644]
queue-6.5/sh-push-switch-reorder-cleanup-operations-to-avoid-u.patch [new file with mode: 0644]
queue-6.5/smb-propagate-error-code-of-extract_sharename.patch [new file with mode: 0644]
queue-6.5/tools-mm-fix-undefined-reference-to-pthread_once.patch [new file with mode: 0644]
queue-6.5/tpm_crb-fix-an-error-handling-path-in-crb_acpi_add.patch [new file with mode: 0644]
queue-6.5/veth-fixing-transmit-return-status-for-dropped-packe.patch [new file with mode: 0644]
queue-6.5/watchdog-intel-mid_wdt-add-module_alias-to-allow-aut.patch [new file with mode: 0644]
queue-6.5/x86-virt-drop-unnecessary-check-on-extended-cpuid-le.patch [new file with mode: 0644]
queue-6.5/xsk-fix-xsk_diag-use-after-free-error-during-socket-.patch [new file with mode: 0644]

diff --git a/queue-6.5/accel-ivpu-refactor-deprecated-strncpy.patch b/queue-6.5/accel-ivpu-refactor-deprecated-strncpy.patch
new file mode 100644 (file)
index 0000000..932546c
--- /dev/null
@@ -0,0 +1,59 @@
+From 6a989b30d8c170fc2dfbc4e03f883768d4c059ba Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 24 Aug 2023 21:20:25 +0000
+Subject: accel/ivpu: refactor deprecated strncpy
+
+From: Justin Stitt <justinstitt@google.com>
+
+[ Upstream commit 4b2fd81f2af7147e844ecec0c5c07a16bca6b86e ]
+
+`strncpy` is deprecated for use on NUL-terminated destination strings [1].
+
+A suitable replacement is `strscpy` [2] due to the fact that it
+guarantees NUL-termination on its destination buffer argument which is
+_not_ the case for `strncpy`!
+
+Also remove extraneous if-statement as it can never be entered. The
+return value from `strncpy` is it's first argument. In this case,
+`...dyndbg_cmd` is an array:
+|      char dyndbg_cmd[VPU_DYNDBG_CMD_MAX_LEN];
+             ^^^^^^^^^^
+This can never be NULL which means `strncpy`'s return value cannot be
+NULL here. Just use `strscpy` which is more robust and results in
+simpler and less ambiguous code.
+
+Moreover, remove needless `... - 1` as `strscpy`'s implementation
+ensures NUL-termination and we do not need to carefully dance around
+ending boundaries with a "- 1" anymore.
+
+Fixes: 5d7422cfb498 ("accel/ivpu: Add IPC driver and JSM messages")
+Link: www.kernel.org/doc/html/latest/process/deprecated.html#strncpy-on-nul-terminated-strings [1]
+Link: https://manpages.debian.org/testing/linux-manual-4.8/strscpy.9.en.html [2]
+Link: https://github.com/KSPP/linux/issues/90
+Cc: linux-hardening@vger.kernel.org
+Signed-off-by: Justin Stitt <justinstitt@google.com>
+Reviewed-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
+Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230824-strncpy-drivers-accel-ivpu-ivpu_jsm_msg-c-v1-1-12d9b52d2dff@google.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/accel/ivpu/ivpu_jsm_msg.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.c b/drivers/accel/ivpu/ivpu_jsm_msg.c
+index 831bfd2b2d39d..bdddef2c59eec 100644
+--- a/drivers/accel/ivpu/ivpu_jsm_msg.c
++++ b/drivers/accel/ivpu/ivpu_jsm_msg.c
+@@ -118,8 +118,7 @@ int ivpu_jsm_dyndbg_control(struct ivpu_device *vdev, char *command, size_t size
+       struct vpu_jsm_msg resp;
+       int ret;
+-      if (!strncpy(req.payload.dyndbg_control.dyndbg_cmd, command, VPU_DYNDBG_CMD_MAX_LEN - 1))
+-              return -ENOMEM;
++      strscpy(req.payload.dyndbg_control.dyndbg_cmd, command, VPU_DYNDBG_CMD_MAX_LEN);
+       ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_DYNDBG_CONTROL_RSP, &resp,
+                                   VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
+-- 
+2.40.1
+
diff --git a/queue-6.5/af_unix-fix-data-race-around-sk-sk_err.patch b/queue-6.5/af_unix-fix-data-race-around-sk-sk_err.patch
new file mode 100644 (file)
index 0000000..0a35a79
--- /dev/null
@@ -0,0 +1,42 @@
+From f91006860534b49358df7c57026f557a6b955e70 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 1 Sep 2023 17:27:08 -0700
+Subject: af_unix: Fix data race around sk->sk_err.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit b192812905e4b134f7b7994b079eb647e9d2d37e ]
+
+As with sk->sk_shutdown shown in the previous patch, sk->sk_err can be
+read locklessly by unix_dgram_sendmsg().
+
+Let's use READ_ONCE() for sk_err as well.
+
+Note that the writer side is marked by commit cc04410af7de ("af_unix:
+annotate lockless accesses to sk->sk_err").
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/sock.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/core/sock.c b/net/core/sock.c
+index 2a78e47f76dba..29c6cb030818b 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -2748,7 +2748,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
+                       break;
+               if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
+                       break;
+-              if (sk->sk_err)
++              if (READ_ONCE(sk->sk_err))
+                       break;
+               timeo = schedule_timeout(timeo);
+       }
+-- 
+2.40.1
+
diff --git a/queue-6.5/af_unix-fix-data-race-around-unix_tot_inflight.patch b/queue-6.5/af_unix-fix-data-race-around-unix_tot_inflight.patch
new file mode 100644 (file)
index 0000000..b84145d
--- /dev/null
@@ -0,0 +1,84 @@
+From e25888611ca8ce544e268367cd8fdadc22d5bb57 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 1 Sep 2023 17:27:06 -0700
+Subject: af_unix: Fix data-race around unix_tot_inflight.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit ade32bd8a738d7497ffe9743c46728db26740f78 ]
+
+unix_tot_inflight is changed under spin_lock(unix_gc_lock), but
+unix_release_sock() reads it locklessly.
+
+Let's use READ_ONCE() for unix_tot_inflight.
+
+Note that the writer side was marked by commit 9d6d7f1cb67c ("af_unix:
+annote lockless accesses to unix_tot_inflight & gc_in_progress")
+
+BUG: KCSAN: data-race in unix_inflight / unix_release_sock
+
+write (marked) to 0xffffffff871852b8 of 4 bytes by task 123 on cpu 1:
+ unix_inflight+0x130/0x180 net/unix/scm.c:64
+ unix_attach_fds+0x137/0x1b0 net/unix/scm.c:123
+ unix_scm_to_skb net/unix/af_unix.c:1832 [inline]
+ unix_dgram_sendmsg+0x46a/0x14f0 net/unix/af_unix.c:1955
+ sock_sendmsg_nosec net/socket.c:724 [inline]
+ sock_sendmsg+0x148/0x160 net/socket.c:747
+ ____sys_sendmsg+0x4e4/0x610 net/socket.c:2493
+ ___sys_sendmsg+0xc6/0x140 net/socket.c:2547
+ __sys_sendmsg+0x94/0x140 net/socket.c:2576
+ __do_sys_sendmsg net/socket.c:2585 [inline]
+ __se_sys_sendmsg net/socket.c:2583 [inline]
+ __x64_sys_sendmsg+0x45/0x50 net/socket.c:2583
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x3b/0x90 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x72/0xdc
+
+read to 0xffffffff871852b8 of 4 bytes by task 4891 on cpu 0:
+ unix_release_sock+0x608/0x910 net/unix/af_unix.c:671
+ unix_release+0x59/0x80 net/unix/af_unix.c:1058
+ __sock_release+0x7d/0x170 net/socket.c:653
+ sock_close+0x19/0x30 net/socket.c:1385
+ __fput+0x179/0x5e0 fs/file_table.c:321
+ ____fput+0x15/0x20 fs/file_table.c:349
+ task_work_run+0x116/0x1a0 kernel/task_work.c:179
+ resume_user_mode_work include/linux/resume_user_mode.h:49 [inline]
+ exit_to_user_mode_loop kernel/entry/common.c:171 [inline]
+ exit_to_user_mode_prepare+0x174/0x180 kernel/entry/common.c:204
+ __syscall_exit_to_user_mode_work kernel/entry/common.c:286 [inline]
+ syscall_exit_to_user_mode+0x1a/0x30 kernel/entry/common.c:297
+ do_syscall_64+0x4b/0x90 arch/x86/entry/common.c:86
+ entry_SYSCALL_64_after_hwframe+0x72/0xdc
+
+value changed: 0x00000000 -> 0x00000001
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 0 PID: 4891 Comm: systemd-coredum Not tainted 6.4.0-rc5-01219-gfa0e21fa4443 #5
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
+
+Fixes: 9305cfa4443d ("[AF_UNIX]: Make unix_tot_inflight counter non-atomic")
+Reported-by: syzkaller <syzkaller@googlegroups.com>
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/unix/af_unix.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
+index 86930a8ed012b..3e8a04a136688 100644
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -680,7 +680,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
+        *        What the above comment does talk about? --ANK(980817)
+        */
+-      if (unix_tot_inflight)
++      if (READ_ONCE(unix_tot_inflight))
+               unix_gc();              /* Garbage collect fds */
+ }
+-- 
+2.40.1
+
diff --git a/queue-6.5/af_unix-fix-data-races-around-sk-sk_shutdown.patch b/queue-6.5/af_unix-fix-data-races-around-sk-sk_shutdown.patch
new file mode 100644 (file)
index 0000000..d9d135b
--- /dev/null
@@ -0,0 +1,96 @@
+From f318f99e3b20b888ef4f0e6fe3b95df5ee2dd899 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 1 Sep 2023 17:27:07 -0700
+Subject: af_unix: Fix data-races around sk->sk_shutdown.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit afe8764f76346ba838d4f162883e23d2fcfaa90e ]
+
+sk->sk_shutdown is changed under unix_state_lock(sk), but
+unix_dgram_sendmsg() calls two functions to read sk_shutdown locklessly.
+
+  sock_alloc_send_pskb
+  `- sock_wait_for_wmem
+
+Let's use READ_ONCE() there.
+
+Note that the writer side was marked by commit e1d09c2c2f57 ("af_unix:
+Fix data races around sk->sk_shutdown.").
+
+BUG: KCSAN: data-race in sock_alloc_send_pskb / unix_release_sock
+
+write (marked) to 0xffff8880069af12c of 1 bytes by task 1 on cpu 1:
+ unix_release_sock+0x75c/0x910 net/unix/af_unix.c:631
+ unix_release+0x59/0x80 net/unix/af_unix.c:1053
+ __sock_release+0x7d/0x170 net/socket.c:654
+ sock_close+0x19/0x30 net/socket.c:1386
+ __fput+0x2a3/0x680 fs/file_table.c:384
+ ____fput+0x15/0x20 fs/file_table.c:412
+ task_work_run+0x116/0x1a0 kernel/task_work.c:179
+ resume_user_mode_work include/linux/resume_user_mode.h:49 [inline]
+ exit_to_user_mode_loop kernel/entry/common.c:171 [inline]
+ exit_to_user_mode_prepare+0x174/0x180 kernel/entry/common.c:204
+ __syscall_exit_to_user_mode_work kernel/entry/common.c:286 [inline]
+ syscall_exit_to_user_mode+0x1a/0x30 kernel/entry/common.c:297
+ do_syscall_64+0x4b/0x90 arch/x86/entry/common.c:86
+ entry_SYSCALL_64_after_hwframe+0x6e/0xd8
+
+read to 0xffff8880069af12c of 1 bytes by task 28650 on cpu 0:
+ sock_alloc_send_pskb+0xd2/0x620 net/core/sock.c:2767
+ unix_dgram_sendmsg+0x2f8/0x14f0 net/unix/af_unix.c:1944
+ unix_seqpacket_sendmsg net/unix/af_unix.c:2308 [inline]
+ unix_seqpacket_sendmsg+0xba/0x130 net/unix/af_unix.c:2292
+ sock_sendmsg_nosec net/socket.c:725 [inline]
+ sock_sendmsg+0x148/0x160 net/socket.c:748
+ ____sys_sendmsg+0x4e4/0x610 net/socket.c:2494
+ ___sys_sendmsg+0xc6/0x140 net/socket.c:2548
+ __sys_sendmsg+0x94/0x140 net/socket.c:2577
+ __do_sys_sendmsg net/socket.c:2586 [inline]
+ __se_sys_sendmsg net/socket.c:2584 [inline]
+ __x64_sys_sendmsg+0x45/0x50 net/socket.c:2584
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x3b/0x90 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x6e/0xd8
+
+value changed: 0x00 -> 0x03
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 0 PID: 28650 Comm: systemd-coredum Not tainted 6.4.0-11989-g6843306689af #6
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Reported-by: syzkaller <syzkaller@googlegroups.com>
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/sock.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/net/core/sock.c b/net/core/sock.c
+index 0a687c8fbed7f..2a78e47f76dba 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -2746,7 +2746,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
+               prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+               if (refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf))
+                       break;
+-              if (sk->sk_shutdown & SEND_SHUTDOWN)
++              if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
+                       break;
+               if (sk->sk_err)
+                       break;
+@@ -2776,7 +2776,7 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
+                       goto failure;
+               err = -EPIPE;
+-              if (sk->sk_shutdown & SEND_SHUTDOWN)
++              if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
+                       goto failure;
+               if (sk_wmem_alloc_get(sk) < READ_ONCE(sk->sk_sndbuf))
+-- 
+2.40.1
+
diff --git a/queue-6.5/af_unix-fix-data-races-around-user-unix_inflight.patch b/queue-6.5/af_unix-fix-data-races-around-user-unix_inflight.patch
new file mode 100644 (file)
index 0000000..67947ab
--- /dev/null
@@ -0,0 +1,105 @@
+From 0b8102d441c4df4ad7a8c42524fc7454dff13522 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 1 Sep 2023 17:27:05 -0700
+Subject: af_unix: Fix data-races around user->unix_inflight.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 0bc36c0650b21df36fbec8136add83936eaf0607 ]
+
+user->unix_inflight is changed under spin_lock(unix_gc_lock),
+but too_many_unix_fds() reads it locklessly.
+
+Let's annotate the write/read accesses to user->unix_inflight.
+
+BUG: KCSAN: data-race in unix_attach_fds / unix_inflight
+
+write to 0xffffffff8546f2d0 of 8 bytes by task 44798 on cpu 1:
+ unix_inflight+0x157/0x180 net/unix/scm.c:66
+ unix_attach_fds+0x147/0x1e0 net/unix/scm.c:123
+ unix_scm_to_skb net/unix/af_unix.c:1827 [inline]
+ unix_dgram_sendmsg+0x46a/0x14f0 net/unix/af_unix.c:1950
+ unix_seqpacket_sendmsg net/unix/af_unix.c:2308 [inline]
+ unix_seqpacket_sendmsg+0xba/0x130 net/unix/af_unix.c:2292
+ sock_sendmsg_nosec net/socket.c:725 [inline]
+ sock_sendmsg+0x148/0x160 net/socket.c:748
+ ____sys_sendmsg+0x4e4/0x610 net/socket.c:2494
+ ___sys_sendmsg+0xc6/0x140 net/socket.c:2548
+ __sys_sendmsg+0x94/0x140 net/socket.c:2577
+ __do_sys_sendmsg net/socket.c:2586 [inline]
+ __se_sys_sendmsg net/socket.c:2584 [inline]
+ __x64_sys_sendmsg+0x45/0x50 net/socket.c:2584
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x3b/0x90 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x6e/0xd8
+
+read to 0xffffffff8546f2d0 of 8 bytes by task 44814 on cpu 0:
+ too_many_unix_fds net/unix/scm.c:101 [inline]
+ unix_attach_fds+0x54/0x1e0 net/unix/scm.c:110
+ unix_scm_to_skb net/unix/af_unix.c:1827 [inline]
+ unix_dgram_sendmsg+0x46a/0x14f0 net/unix/af_unix.c:1950
+ unix_seqpacket_sendmsg net/unix/af_unix.c:2308 [inline]
+ unix_seqpacket_sendmsg+0xba/0x130 net/unix/af_unix.c:2292
+ sock_sendmsg_nosec net/socket.c:725 [inline]
+ sock_sendmsg+0x148/0x160 net/socket.c:748
+ ____sys_sendmsg+0x4e4/0x610 net/socket.c:2494
+ ___sys_sendmsg+0xc6/0x140 net/socket.c:2548
+ __sys_sendmsg+0x94/0x140 net/socket.c:2577
+ __do_sys_sendmsg net/socket.c:2586 [inline]
+ __se_sys_sendmsg net/socket.c:2584 [inline]
+ __x64_sys_sendmsg+0x45/0x50 net/socket.c:2584
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x3b/0x90 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x6e/0xd8
+
+value changed: 0x000000000000000c -> 0x000000000000000d
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 0 PID: 44814 Comm: systemd-coredum Not tainted 6.4.0-11989-g6843306689af #6
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
+
+Fixes: 712f4aad406b ("unix: properly account for FDs passed over unix sockets")
+Reported-by: syzkaller <syzkaller@googlegroups.com>
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Acked-by: Willy Tarreau <w@1wt.eu>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/unix/scm.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/net/unix/scm.c b/net/unix/scm.c
+index f9152881d77f6..033e21e5c4df6 100644
+--- a/net/unix/scm.c
++++ b/net/unix/scm.c
+@@ -63,7 +63,7 @@ void unix_inflight(struct user_struct *user, struct file *fp)
+               /* Paired with READ_ONCE() in wait_for_unix_gc() */
+               WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1);
+       }
+-      user->unix_inflight++;
++      WRITE_ONCE(user->unix_inflight, user->unix_inflight + 1);
+       spin_unlock(&unix_gc_lock);
+ }
+@@ -84,7 +84,7 @@ void unix_notinflight(struct user_struct *user, struct file *fp)
+               /* Paired with READ_ONCE() in wait_for_unix_gc() */
+               WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1);
+       }
+-      user->unix_inflight--;
++      WRITE_ONCE(user->unix_inflight, user->unix_inflight - 1);
+       spin_unlock(&unix_gc_lock);
+ }
+@@ -98,7 +98,7 @@ static inline bool too_many_unix_fds(struct task_struct *p)
+ {
+       struct user_struct *user = current_user();
+-      if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
++      if (unlikely(READ_ONCE(user->unix_inflight) > task_rlimit(p, RLIMIT_NOFILE)))
+               return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
+       return false;
+ }
+-- 
+2.40.1
+
diff --git a/queue-6.5/af_unix-fix-msg_controllen-test-in-scm_pidfd_recv-fo.patch b/queue-6.5/af_unix-fix-msg_controllen-test-in-scm_pidfd_recv-fo.patch
new file mode 100644 (file)
index 0000000..e9a002c
--- /dev/null
@@ -0,0 +1,68 @@
+From 80c87260ca4afe5783fc347125857c90aa9012ac Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 1 Sep 2023 16:46:04 -0700
+Subject: af_unix: Fix msg_controllen test in scm_pidfd_recv() for
+ MSG_CMSG_COMPAT.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 718e6b51298e0f254baca0d40ab52a00e004e014 ]
+
+Heiko Carstens reported that SCM_PIDFD does not work with MSG_CMSG_COMPAT
+because scm_pidfd_recv() always checks msg_controllen against sizeof(struct
+cmsghdr).
+
+We need to use sizeof(struct compat_cmsghdr) for the compat case.
+
+Fixes: 5e2ff6704a27 ("scm: add SO_PASSPIDFD and SCM_PIDFD")
+Reported-by: Heiko Carstens <hca@linux.ibm.com>
+Closes: https://lore.kernel.org/netdev/20230901200517.8742-A-hca@linux.ibm.com/
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Tested-by: Heiko Carstens <hca@linux.ibm.com>
+Reviewed-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
+Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
+Acked-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/scm.h | 14 +++++++++-----
+ 1 file changed, 9 insertions(+), 5 deletions(-)
+
+diff --git a/include/net/scm.h b/include/net/scm.h
+index c5bcdf65f55c9..e8c76b4be2fe7 100644
+--- a/include/net/scm.h
++++ b/include/net/scm.h
+@@ -9,6 +9,7 @@
+ #include <linux/pid.h>
+ #include <linux/nsproxy.h>
+ #include <linux/sched/signal.h>
++#include <net/compat.h>
+ /* Well, we should have at least one descriptor open
+  * to accept passed FDs 8)
+@@ -123,14 +124,17 @@ static inline bool scm_has_secdata(struct socket *sock)
+ static __inline__ void scm_pidfd_recv(struct msghdr *msg, struct scm_cookie *scm)
+ {
+       struct file *pidfd_file = NULL;
+-      int pidfd;
++      int len, pidfd;
+-      /*
+-       * put_cmsg() doesn't return an error if CMSG is truncated,
++      /* put_cmsg() doesn't return an error if CMSG is truncated,
+        * that's why we need to opencode these checks here.
+        */
+-      if ((msg->msg_controllen <= sizeof(struct cmsghdr)) ||
+-          (msg->msg_controllen - sizeof(struct cmsghdr)) < sizeof(int)) {
++      if (msg->msg_flags & MSG_CMSG_COMPAT)
++              len = sizeof(struct compat_cmsghdr) + sizeof(int);
++      else
++              len = sizeof(struct cmsghdr) + sizeof(int);
++
++      if (msg->msg_controllen < len) {
+               msg->msg_flags |= MSG_CTRUNC;
+               return;
+       }
+-- 
+2.40.1
+
diff --git a/queue-6.5/backlight-gpio_backlight-drop-output-gpio-direction-.patch b/queue-6.5/backlight-gpio_backlight-drop-output-gpio-direction-.patch
new file mode 100644 (file)
index 0000000..7e8e27c
--- /dev/null
@@ -0,0 +1,42 @@
+From 95a91229fcff934c0bd3b32ee005b0115ec7741d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 21 Jul 2023 09:29:03 +0000
+Subject: backlight: gpio_backlight: Drop output GPIO direction check for
+ initial power state
+
+From: Ying Liu <victor.liu@nxp.com>
+
+[ Upstream commit fe1328b5b2a087221e31da77e617f4c2b70f3b7f ]
+
+So, let's drop output GPIO direction check and only check GPIO value to set
+the initial power state.
+
+Fixes: 706dc68102bc ("backlight: gpio: Explicitly set the direction of the GPIO")
+Signed-off-by: Liu Ying <victor.liu@nxp.com>
+Reviewed-by: Andy Shevchenko <andy@kernel.org>
+Acked-by: Linus Walleij <linus.walleij@linaro.org>
+Acked-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
+Link: https://lore.kernel.org/r/20230721093342.1532531-1-victor.liu@nxp.com
+Signed-off-by: Lee Jones <lee@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/video/backlight/gpio_backlight.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/drivers/video/backlight/gpio_backlight.c b/drivers/video/backlight/gpio_backlight.c
+index 5c5c99f7979e3..30ec5b6845335 100644
+--- a/drivers/video/backlight/gpio_backlight.c
++++ b/drivers/video/backlight/gpio_backlight.c
+@@ -87,8 +87,7 @@ static int gpio_backlight_probe(struct platform_device *pdev)
+               /* Not booted with device tree or no phandle link to the node */
+               bl->props.power = def_value ? FB_BLANK_UNBLANK
+                                           : FB_BLANK_POWERDOWN;
+-      else if (gpiod_get_direction(gbl->gpiod) == 0 &&
+-               gpiod_get_value_cansleep(gbl->gpiod) == 0)
++      else if (gpiod_get_value_cansleep(gbl->gpiod) == 0)
+               bl->props.power = FB_BLANK_POWERDOWN;
+       else
+               bl->props.power = FB_BLANK_UNBLANK;
+-- 
+2.40.1
+
diff --git a/queue-6.5/backlight-lp855x-initialize-pwm-state-on-first-brigh.patch b/queue-6.5/backlight-lp855x-initialize-pwm-state-on-first-brigh.patch
new file mode 100644 (file)
index 0000000..0057c9a
--- /dev/null
@@ -0,0 +1,94 @@
+From 7f96da571393b4fa88947642621cefb0e0d032df Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 14 Jul 2023 14:14:39 +0200
+Subject: backlight: lp855x: Initialize PWM state on first brightness change
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Artur Weber <aweber.kernel@gmail.com>
+
+[ Upstream commit 4c09e20b3c85f60353ace21092e34f35f5e3ab00 ]
+
+As pointed out by Uwe Kleine-König[1], the changes introduced in
+commit c1ff7da03e16 ("video: backlight: lp855x: Get PWM for PWM mode
+during probe") caused the PWM state set up by the bootloader to be
+re-set when the driver is probed. This differs from the behavior from
+before that patch, where the PWM state would be initialized on the
+first brightness change.
+
+Fix this by moving the PWM state initialization into the PWM control
+function. Add a new variable, needs_pwm_init, to the device info struct
+to allow us to check whether we need the initialization, or whether it
+has already been done.
+
+[1] https://lore.kernel.org/lkml/20230614083953.e4kkweddjz7wztby@pengutronix.de/
+
+Fixes: c1ff7da03e16 ("video: backlight: lp855x: Get PWM for PWM mode during probe")
+Signed-off-by: Artur Weber <aweber.kernel@gmail.com>
+Reviewed-by: Daniel Thompson <daniel.thompson@linaro.org>
+Link: https://lore.kernel.org/r/20230714121440.7717-2-aweber.kernel@gmail.com
+Signed-off-by: Lee Jones <lee@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/video/backlight/lp855x_bl.c | 20 ++++++++++++--------
+ 1 file changed, 12 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/video/backlight/lp855x_bl.c b/drivers/video/backlight/lp855x_bl.c
+index 1c9e921bca14a..349ec324bc1ea 100644
+--- a/drivers/video/backlight/lp855x_bl.c
++++ b/drivers/video/backlight/lp855x_bl.c
+@@ -71,6 +71,7 @@ struct lp855x {
+       struct device *dev;
+       struct lp855x_platform_data *pdata;
+       struct pwm_device *pwm;
++      bool needs_pwm_init;
+       struct regulator *supply;       /* regulator for VDD input */
+       struct regulator *enable;       /* regulator for EN/VDDIO input */
+ };
+@@ -220,7 +221,15 @@ static void lp855x_pwm_ctrl(struct lp855x *lp, int br, int max_br)
+ {
+       struct pwm_state state;
+-      pwm_get_state(lp->pwm, &state);
++      if (lp->needs_pwm_init) {
++              pwm_init_state(lp->pwm, &state);
++              /* Legacy platform data compatibility */
++              if (lp->pdata->period_ns > 0)
++                      state.period = lp->pdata->period_ns;
++              lp->needs_pwm_init = false;
++      } else {
++              pwm_get_state(lp->pwm, &state);
++      }
+       state.duty_cycle = div_u64(br * state.period, max_br);
+       state.enabled = state.duty_cycle;
+@@ -387,7 +396,6 @@ static int lp855x_probe(struct i2c_client *cl)
+       const struct i2c_device_id *id = i2c_client_get_device_id(cl);
+       const struct acpi_device_id *acpi_id = NULL;
+       struct device *dev = &cl->dev;
+-      struct pwm_state pwmstate;
+       struct lp855x *lp;
+       int ret;
+@@ -470,15 +478,11 @@ static int lp855x_probe(struct i2c_client *cl)
+               else
+                       return dev_err_probe(dev, ret, "getting PWM\n");
++              lp->needs_pwm_init = false;
+               lp->mode = REGISTER_BASED;
+               dev_dbg(dev, "mode: register based\n");
+       } else {
+-              pwm_init_state(lp->pwm, &pwmstate);
+-              /* Legacy platform data compatibility */
+-              if (lp->pdata->period_ns > 0)
+-                      pwmstate.period = lp->pdata->period_ns;
+-              pwm_apply_state(lp->pwm, &pwmstate);
+-
++              lp->needs_pwm_init = true;
+               lp->mode = PWM_BASED;
+               dev_dbg(dev, "mode: PWM based\n");
+       }
+-- 
+2.40.1
+
diff --git a/queue-6.5/blk-throttle-consider-carryover_ios-bytes-in-throtl_.patch b/queue-6.5/blk-throttle-consider-carryover_ios-bytes-in-throtl_.patch
new file mode 100644 (file)
index 0000000..dcef38c
--- /dev/null
@@ -0,0 +1,95 @@
+From cb7184a60f911e1a9d2e1c663c08e5be69140e4f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Aug 2023 09:27:08 +0800
+Subject: blk-throttle: consider 'carryover_ios/bytes' in throtl_trim_slice()
+
+From: Yu Kuai <yukuai3@huawei.com>
+
+[ Upstream commit eead0056648cef49d7b15c07ae612fa217083165 ]
+
+Currently, 'carryover_ios/bytes' is not handled in throtl_trim_slice(),
+for consequence, 'carryover_ios/bytes' will be used to throttle bio
+multiple times, for example:
+
+1) set iops limit to 100, and slice start is 0, slice end is 100ms;
+2) current time is 0, and 10 ios are dispatched, those io won't be
+   throttled and io_disp is 10;
+3) still at current time 0, update iops limit to 1000, carryover_ios is
+   updated to (0 - 10) = -10;
+4) in this slice(0 - 100ms), io_allowed = 100 + (-10) = 90, which means
+   only 90 ios can be dispatched without waiting;
+5) assume that io is throttled in slice(0 - 100ms), and
+   throtl_trim_slice() update silce to (100ms - 200ms). In this case,
+   'carryover_ios/bytes' is not cleared and still only 90 ios can be
+   dispatched between 100ms - 200ms.
+
+Fix this problem by updating 'carryover_ios/bytes' in
+throtl_trim_slice().
+
+Fixes: a880ae93e5b5 ("blk-throttle: fix io hung due to configuration updates")
+Reported-by: zhuxiaohui <zhuxiaohui.400@bytedance.com>
+Link: https://lore.kernel.org/all/20230812072116.42321-1-zhuxiaohui.400@bytedance.com/
+Signed-off-by: Yu Kuai <yukuai3@huawei.com>
+Acked-by: Tejun Heo <tj@kernel.org>
+Link: https://lore.kernel.org/r/20230816012708.1193747-5-yukuai1@huaweicloud.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-throttle.c | 21 +++++++++++++--------
+ 1 file changed, 13 insertions(+), 8 deletions(-)
+
+diff --git a/block/blk-throttle.c b/block/blk-throttle.c
+index b0d9573f1911b..e78bc3b65ec80 100644
+--- a/block/blk-throttle.c
++++ b/block/blk-throttle.c
+@@ -729,8 +729,9 @@ static u64 calculate_bytes_allowed(u64 bps_limit, unsigned long jiffy_elapsed)
+ /* Trim the used slices and adjust slice start accordingly */
+ static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw)
+ {
+-      unsigned long time_elapsed, io_trim;
+-      u64 bytes_trim;
++      unsigned long time_elapsed;
++      long long bytes_trim;
++      int io_trim;
+       BUG_ON(time_before(tg->slice_end[rw], tg->slice_start[rw]));
+@@ -758,17 +759,21 @@ static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw)
+               return;
+       bytes_trim = calculate_bytes_allowed(tg_bps_limit(tg, rw),
+-                                           time_elapsed);
+-      io_trim = calculate_io_allowed(tg_iops_limit(tg, rw), time_elapsed);
+-      if (!bytes_trim && !io_trim)
++                                           time_elapsed) +
++                   tg->carryover_bytes[rw];
++      io_trim = calculate_io_allowed(tg_iops_limit(tg, rw), time_elapsed) +
++                tg->carryover_ios[rw];
++      if (bytes_trim <= 0 && io_trim <= 0)
+               return;
+-      if (tg->bytes_disp[rw] >= bytes_trim)
++      tg->carryover_bytes[rw] = 0;
++      if ((long long)tg->bytes_disp[rw] >= bytes_trim)
+               tg->bytes_disp[rw] -= bytes_trim;
+       else
+               tg->bytes_disp[rw] = 0;
+-      if (tg->io_disp[rw] >= io_trim)
++      tg->carryover_ios[rw] = 0;
++      if ((int)tg->io_disp[rw] >= io_trim)
+               tg->io_disp[rw] -= io_trim;
+       else
+               tg->io_disp[rw] = 0;
+@@ -776,7 +781,7 @@ static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw)
+       tg->slice_start[rw] += time_elapsed;
+       throtl_log(&tg->service_queue,
+-                 "[%c] trim slice nr=%lu bytes=%llu io=%lu start=%lu end=%lu jiffies=%lu",
++                 "[%c] trim slice nr=%lu bytes=%lld io=%d start=%lu end=%lu jiffies=%lu",
+                  rw == READ ? 'R' : 'W', time_elapsed / tg->td->throtl_slice,
+                  bytes_trim, io_trim, tg->slice_start[rw], tg->slice_end[rw],
+                  jiffies);
+-- 
+2.40.1
+
diff --git a/queue-6.5/blk-throttle-use-calculate_io-bytes_allowed-for-thro.patch b/queue-6.5/blk-throttle-use-calculate_io-bytes_allowed-for-thro.patch
new file mode 100644 (file)
index 0000000..626661c
--- /dev/null
@@ -0,0 +1,144 @@
+From 3268eb2f82983c956d051123601cd9fbb94d180c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Aug 2023 09:27:07 +0800
+Subject: blk-throttle: use calculate_io/bytes_allowed() for
+ throtl_trim_slice()
+
+From: Yu Kuai <yukuai3@huawei.com>
+
+[ Upstream commit e8368b57c006dc0e02dcd8a9dc9f2060ff5476fe ]
+
+There are no functional changes, just make the code cleaner.
+
+Signed-off-by: Yu Kuai <yukuai3@huawei.com>
+Acked-by: Tejun Heo <tj@kernel.org>
+Link: https://lore.kernel.org/r/20230816012708.1193747-4-yukuai1@huaweicloud.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Stable-dep-of: eead0056648c ("blk-throttle: consider 'carryover_ios/bytes' in throtl_trim_slice()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-throttle.c | 86 +++++++++++++++++++++-----------------------
+ 1 file changed, 41 insertions(+), 45 deletions(-)
+
+diff --git a/block/blk-throttle.c b/block/blk-throttle.c
+index 7397ff199d669..b0d9573f1911b 100644
+--- a/block/blk-throttle.c
++++ b/block/blk-throttle.c
+@@ -697,11 +697,40 @@ static bool throtl_slice_used(struct throtl_grp *tg, bool rw)
+       return true;
+ }
++static unsigned int calculate_io_allowed(u32 iops_limit,
++                                       unsigned long jiffy_elapsed)
++{
++      unsigned int io_allowed;
++      u64 tmp;
++
++      /*
++       * jiffy_elapsed should not be a big value as minimum iops can be
++       * 1 then at max jiffy elapsed should be equivalent of 1 second as we
++       * will allow dispatch after 1 second and after that slice should
++       * have been trimmed.
++       */
++
++      tmp = (u64)iops_limit * jiffy_elapsed;
++      do_div(tmp, HZ);
++
++      if (tmp > UINT_MAX)
++              io_allowed = UINT_MAX;
++      else
++              io_allowed = tmp;
++
++      return io_allowed;
++}
++
++static u64 calculate_bytes_allowed(u64 bps_limit, unsigned long jiffy_elapsed)
++{
++      return mul_u64_u64_div_u64(bps_limit, (u64)jiffy_elapsed, (u64)HZ);
++}
++
+ /* Trim the used slices and adjust slice start accordingly */
+ static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw)
+ {
+-      unsigned long nr_slices, time_elapsed, io_trim;
+-      u64 bytes_trim, tmp;
++      unsigned long time_elapsed, io_trim;
++      u64 bytes_trim;
+       BUG_ON(time_before(tg->slice_end[rw], tg->slice_start[rw]));
+@@ -723,19 +752,14 @@ static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw)
+       throtl_set_slice_end(tg, rw, jiffies + tg->td->throtl_slice);
+-      time_elapsed = jiffies - tg->slice_start[rw];
+-
+-      nr_slices = time_elapsed / tg->td->throtl_slice;
+-
+-      if (!nr_slices)
++      time_elapsed = rounddown(jiffies - tg->slice_start[rw],
++                               tg->td->throtl_slice);
++      if (!time_elapsed)
+               return;
+-      tmp = tg_bps_limit(tg, rw) * tg->td->throtl_slice * nr_slices;
+-      do_div(tmp, HZ);
+-      bytes_trim = tmp;
+-
+-      io_trim = (tg_iops_limit(tg, rw) * tg->td->throtl_slice * nr_slices) /
+-              HZ;
++      bytes_trim = calculate_bytes_allowed(tg_bps_limit(tg, rw),
++                                           time_elapsed);
++      io_trim = calculate_io_allowed(tg_iops_limit(tg, rw), time_elapsed);
+       if (!bytes_trim && !io_trim)
+               return;
+@@ -749,41 +773,13 @@ static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw)
+       else
+               tg->io_disp[rw] = 0;
+-      tg->slice_start[rw] += nr_slices * tg->td->throtl_slice;
++      tg->slice_start[rw] += time_elapsed;
+       throtl_log(&tg->service_queue,
+                  "[%c] trim slice nr=%lu bytes=%llu io=%lu start=%lu end=%lu jiffies=%lu",
+-                 rw == READ ? 'R' : 'W', nr_slices, bytes_trim, io_trim,
+-                 tg->slice_start[rw], tg->slice_end[rw], jiffies);
+-}
+-
+-static unsigned int calculate_io_allowed(u32 iops_limit,
+-                                       unsigned long jiffy_elapsed)
+-{
+-      unsigned int io_allowed;
+-      u64 tmp;
+-
+-      /*
+-       * jiffy_elapsed should not be a big value as minimum iops can be
+-       * 1 then at max jiffy elapsed should be equivalent of 1 second as we
+-       * will allow dispatch after 1 second and after that slice should
+-       * have been trimmed.
+-       */
+-
+-      tmp = (u64)iops_limit * jiffy_elapsed;
+-      do_div(tmp, HZ);
+-
+-      if (tmp > UINT_MAX)
+-              io_allowed = UINT_MAX;
+-      else
+-              io_allowed = tmp;
+-
+-      return io_allowed;
+-}
+-
+-static u64 calculate_bytes_allowed(u64 bps_limit, unsigned long jiffy_elapsed)
+-{
+-      return mul_u64_u64_div_u64(bps_limit, (u64)jiffy_elapsed, (u64)HZ);
++                 rw == READ ? 'R' : 'W', time_elapsed / tg->td->throtl_slice,
++                 bytes_trim, io_trim, tg->slice_start[rw], tg->slice_end[rw],
++                 jiffies);
+ }
+ static void __tg_update_carryover(struct throtl_grp *tg, bool rw)
+-- 
+2.40.1
+
diff --git a/queue-6.5/bpf-assign-bpf_tramp_run_ctx-saved_run_ctx-before-re.patch b/queue-6.5/bpf-assign-bpf_tramp_run_ctx-saved_run_ctx-before-re.patch
new file mode 100644 (file)
index 0000000..17debd0
--- /dev/null
@@ -0,0 +1,71 @@
+From 9dd34ca7e0320be533d9c4de646d327ed551d1d0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 30 Aug 2023 10:04:05 +0200
+Subject: bpf: Assign bpf_tramp_run_ctx::saved_run_ctx before recursion check.
+
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+
+[ Upstream commit 6764e767f4af1e35f87f3497e1182d945de37f93 ]
+
+__bpf_prog_enter_recur() assigns bpf_tramp_run_ctx::saved_run_ctx before
+performing the recursion check which means in case of a recursion
+__bpf_prog_exit_recur() uses the previously set bpf_tramp_run_ctx::saved_run_ctx
+value.
+
+__bpf_prog_enter_sleepable_recur() assigns bpf_tramp_run_ctx::saved_run_ctx
+after the recursion check which means in case of a recursion
+__bpf_prog_exit_sleepable_recur() uses an uninitialized value. This does not
+look right. If I read the entry trampoline code right, then bpf_tramp_run_ctx
+isn't initialized upfront.
+
+Align __bpf_prog_enter_sleepable_recur() with __bpf_prog_enter_recur() and
+set bpf_tramp_run_ctx::saved_run_ctx before the recursion check is made.
+Remove the assignment of saved_run_ctx in kern_sys_bpf() since it happens
+a few cycles later.
+
+Fixes: e384c7b7b46d0 ("bpf, x86: Create bpf_tramp_run_ctx on the caller thread's stack")
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Jiri Olsa <jolsa@kernel.org>
+Link: https://lore.kernel.org/bpf/20230830080405.251926-3-bigeasy@linutronix.de
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/syscall.c    | 1 -
+ kernel/bpf/trampoline.c | 5 ++---
+ 2 files changed, 2 insertions(+), 4 deletions(-)
+
+diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
+index c925c270ed8b4..1480b6cf12f06 100644
+--- a/kernel/bpf/syscall.c
++++ b/kernel/bpf/syscall.c
+@@ -5304,7 +5304,6 @@ int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size)
+               }
+               run_ctx.bpf_cookie = 0;
+-              run_ctx.saved_run_ctx = NULL;
+               if (!__bpf_prog_enter_sleepable_recur(prog, &run_ctx)) {
+                       /* recursion detected */
+                       __bpf_prog_exit_sleepable_recur(prog, 0, &run_ctx);
+diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
+index 78acf28d48732..53ff50cac61ea 100644
+--- a/kernel/bpf/trampoline.c
++++ b/kernel/bpf/trampoline.c
+@@ -926,13 +926,12 @@ u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
+       migrate_disable();
+       might_fault();
++      run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
++
+       if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
+               bpf_prog_inc_misses_counter(prog);
+               return 0;
+       }
+-
+-      run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
+-
+       return bpf_prog_start_time();
+ }
+-- 
+2.40.1
+
diff --git a/queue-6.5/bpf-bpf_sk_storage-fix-invalid-wait-context-lockdep-.patch b/queue-6.5/bpf-bpf_sk_storage-fix-invalid-wait-context-lockdep-.patch
new file mode 100644 (file)
index 0000000..227003a
--- /dev/null
@@ -0,0 +1,198 @@
+From 00ec14470c9b00b73a07fb912e523870bff4277d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 1 Sep 2023 16:11:27 -0700
+Subject: bpf: bpf_sk_storage: Fix invalid wait context lockdep report
+
+From: Martin KaFai Lau <martin.lau@kernel.org>
+
+[ Upstream commit a96a44aba556c42b432929d37d60158aca21ad4c ]
+
+'./test_progs -t test_local_storage' reported a splat:
+
+[   27.137569] =============================
+[   27.138122] [ BUG: Invalid wait context ]
+[   27.138650] 6.5.0-03980-gd11ae1b16b0a #247 Tainted: G           O
+[   27.139542] -----------------------------
+[   27.140106] test_progs/1729 is trying to lock:
+[   27.140713] ffff8883ef047b88 (stock_lock){-.-.}-{3:3}, at: local_lock_acquire+0x9/0x130
+[   27.141834] other info that might help us debug this:
+[   27.142437] context-{5:5}
+[   27.142856] 2 locks held by test_progs/1729:
+[   27.143352]  #0: ffffffff84bcd9c0 (rcu_read_lock){....}-{1:3}, at: rcu_lock_acquire+0x4/0x40
+[   27.144492]  #1: ffff888107deb2c0 (&storage->lock){..-.}-{2:2}, at: bpf_local_storage_update+0x39e/0x8e0
+[   27.145855] stack backtrace:
+[   27.146274] CPU: 0 PID: 1729 Comm: test_progs Tainted: G           O       6.5.0-03980-gd11ae1b16b0a #247
+[   27.147550] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014
+[   27.149127] Call Trace:
+[   27.149490]  <TASK>
+[   27.149867]  dump_stack_lvl+0x130/0x1d0
+[   27.152609]  dump_stack+0x14/0x20
+[   27.153131]  __lock_acquire+0x1657/0x2220
+[   27.153677]  lock_acquire+0x1b8/0x510
+[   27.157908]  local_lock_acquire+0x29/0x130
+[   27.159048]  obj_cgroup_charge+0xf4/0x3c0
+[   27.160794]  slab_pre_alloc_hook+0x28e/0x2b0
+[   27.161931]  __kmem_cache_alloc_node+0x51/0x210
+[   27.163557]  __kmalloc+0xaa/0x210
+[   27.164593]  bpf_map_kzalloc+0xbc/0x170
+[   27.165147]  bpf_selem_alloc+0x130/0x510
+[   27.166295]  bpf_local_storage_update+0x5aa/0x8e0
+[   27.167042]  bpf_fd_sk_storage_update_elem+0xdb/0x1a0
+[   27.169199]  bpf_map_update_value+0x415/0x4f0
+[   27.169871]  map_update_elem+0x413/0x550
+[   27.170330]  __sys_bpf+0x5e9/0x640
+[   27.174065]  __x64_sys_bpf+0x80/0x90
+[   27.174568]  do_syscall_64+0x48/0xa0
+[   27.175201]  entry_SYSCALL_64_after_hwframe+0x6e/0xd8
+[   27.175932] RIP: 0033:0x7effb40e41ad
+[   27.176357] Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d8
+[   27.179028] RSP: 002b:00007ffe64c21fc8 EFLAGS: 00000202 ORIG_RAX: 0000000000000141
+[   27.180088] RAX: ffffffffffffffda RBX: 00007ffe64c22768 RCX: 00007effb40e41ad
+[   27.181082] RDX: 0000000000000020 RSI: 00007ffe64c22008 RDI: 0000000000000002
+[   27.182030] RBP: 00007ffe64c21ff0 R08: 0000000000000000 R09: 00007ffe64c22788
+[   27.183038] R10: 0000000000000064 R11: 0000000000000202 R12: 0000000000000000
+[   27.184006] R13: 00007ffe64c22788 R14: 00007effb42a1000 R15: 0000000000000000
+[   27.184958]  </TASK>
+
+It complains about acquiring a local_lock while holding a raw_spin_lock.
+It means it should not allocate memory while holding a raw_spin_lock
+since it is not safe for RT.
+
+raw_spin_lock is needed because bpf_local_storage supports tracing
+context. In particular for task local storage, it is easy to
+get a "current" task PTR_TO_BTF_ID in tracing bpf prog.
+However, task (and cgroup) local storage has already been moved to
+bpf mem allocator which can be used after raw_spin_lock.
+
+The splat is for the sk storage. For sk (and inode) storage,
+it has not been moved to bpf mem allocator. Using raw_spin_lock or not,
+kzalloc(GFP_ATOMIC) could theoretically be unsafe in tracing context.
+However, the local storage helper requires a verifier accepted
+sk pointer (PTR_TO_BTF_ID), it is hypothetical if that (mean running
+a bpf prog in a kzalloc unsafe context and also able to hold a verifier
+accepted sk pointer) could happen.
+
+This patch avoids kzalloc after raw_spin_lock to silent the splat.
+There is an existing kzalloc before the raw_spin_lock. At that point,
+a kzalloc is very likely required because a lookup has just been done
+before. Thus, this patch always does the kzalloc before acquiring
+the raw_spin_lock and remove the later kzalloc usage after the
+raw_spin_lock. After this change, it will have a charge and then
+uncharge during the syscall bpf_map_update_elem() code path.
+This patch opts for simplicity and not continue the old
+optimization to save one charge and uncharge.
+
+This issue is dated back to the very first commit of bpf_sk_storage
+which had been refactored multiple times to create task, inode, and
+cgroup storage. This patch uses a Fixes tag with a more recent
+commit that should be easier to do backport.
+
+Fixes: b00fa38a9c1c ("bpf: Enable non-atomic allocations in local storage")
+Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Link: https://lore.kernel.org/bpf/20230901231129.578493-2-martin.lau@linux.dev
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/bpf_local_storage.c | 47 ++++++++++------------------------
+ 1 file changed, 14 insertions(+), 33 deletions(-)
+
+diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
+index b5149cfce7d4d..37ad47d52dc55 100644
+--- a/kernel/bpf/bpf_local_storage.c
++++ b/kernel/bpf/bpf_local_storage.c
+@@ -553,7 +553,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
+                        void *value, u64 map_flags, gfp_t gfp_flags)
+ {
+       struct bpf_local_storage_data *old_sdata = NULL;
+-      struct bpf_local_storage_elem *selem = NULL;
++      struct bpf_local_storage_elem *alloc_selem, *selem = NULL;
+       struct bpf_local_storage *local_storage;
+       unsigned long flags;
+       int err;
+@@ -607,11 +607,12 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
+               }
+       }
+-      if (gfp_flags == GFP_KERNEL) {
+-              selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags);
+-              if (!selem)
+-                      return ERR_PTR(-ENOMEM);
+-      }
++      /* A lookup has just been done before and concluded a new selem is
++       * needed. The chance of an unnecessary alloc is unlikely.
++       */
++      alloc_selem = selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags);
++      if (!alloc_selem)
++              return ERR_PTR(-ENOMEM);
+       raw_spin_lock_irqsave(&local_storage->lock, flags);
+@@ -623,13 +624,13 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
+                * simple.
+                */
+               err = -EAGAIN;
+-              goto unlock_err;
++              goto unlock;
+       }
+       old_sdata = bpf_local_storage_lookup(local_storage, smap, false);
+       err = check_flags(old_sdata, map_flags);
+       if (err)
+-              goto unlock_err;
++              goto unlock;
+       if (old_sdata && (map_flags & BPF_F_LOCK)) {
+               copy_map_value_locked(&smap->map, old_sdata->data, value,
+@@ -638,23 +639,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
+               goto unlock;
+       }
+-      if (gfp_flags != GFP_KERNEL) {
+-              /* local_storage->lock is held.  Hence, we are sure
+-               * we can unlink and uncharge the old_sdata successfully
+-               * later.  Hence, instead of charging the new selem now
+-               * and then uncharge the old selem later (which may cause
+-               * a potential but unnecessary charge failure),  avoid taking
+-               * a charge at all here (the "!old_sdata" check) and the
+-               * old_sdata will not be uncharged later during
+-               * bpf_selem_unlink_storage_nolock().
+-               */
+-              selem = bpf_selem_alloc(smap, owner, value, !old_sdata, gfp_flags);
+-              if (!selem) {
+-                      err = -ENOMEM;
+-                      goto unlock_err;
+-              }
+-      }
+-
++      alloc_selem = NULL;
+       /* First, link the new selem to the map */
+       bpf_selem_link_map(smap, selem);
+@@ -665,20 +650,16 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
+       if (old_sdata) {
+               bpf_selem_unlink_map(SELEM(old_sdata));
+               bpf_selem_unlink_storage_nolock(local_storage, SELEM(old_sdata),
+-                                              false, false);
++                                              true, false);
+       }
+ unlock:
+       raw_spin_unlock_irqrestore(&local_storage->lock, flags);
+-      return SDATA(selem);
+-
+-unlock_err:
+-      raw_spin_unlock_irqrestore(&local_storage->lock, flags);
+-      if (selem) {
++      if (alloc_selem) {
+               mem_uncharge(smap, owner, smap->elem_size);
+-              bpf_selem_free(selem, smap, true);
++              bpf_selem_free(alloc_selem, smap, true);
+       }
+-      return ERR_PTR(err);
++      return err ? ERR_PTR(err) : SDATA(selem);
+ }
+ static u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache)
+-- 
+2.40.1
+
diff --git a/queue-6.5/bpf-bpf_sk_storage-fix-the-missing-uncharge-in-sk_om.patch b/queue-6.5/bpf-bpf_sk_storage-fix-the-missing-uncharge-in-sk_om.patch
new file mode 100644 (file)
index 0000000..2a7970b
--- /dev/null
@@ -0,0 +1,51 @@
+From 479473ee1f2ad31c7f6b057de6664b75da16c456 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 1 Sep 2023 16:11:28 -0700
+Subject: bpf: bpf_sk_storage: Fix the missing uncharge in sk_omem_alloc
+
+From: Martin KaFai Lau <martin.lau@kernel.org>
+
+[ Upstream commit 55d49f750b1cb1f177fb1b00ae02cba4613bcfb7 ]
+
+The commit c83597fa5dc6 ("bpf: Refactor some inode/task/sk storage functions
+for reuse"), refactored the bpf_{sk,task,inode}_storage_free() into
+bpf_local_storage_unlink_nolock() which then later renamed to
+bpf_local_storage_destroy(). The commit accidentally passed the
+"bool uncharge_mem = false" argument to bpf_selem_unlink_storage_nolock()
+which then stopped the uncharge from happening to the sk->sk_omem_alloc.
+
+This missing uncharge only happens when the sk is going away (during
+__sk_destruct).
+
+This patch fixes it by always passing "uncharge_mem = true". It is a
+noop to the task/inode/cgroup storage because they do not have the
+map_local_storage_(un)charge enabled in the map_ops. A followup patch
+will be done in bpf-next to remove the uncharge_mem argument.
+
+A selftest is added in the next patch.
+
+Fixes: c83597fa5dc6 ("bpf: Refactor some inode/task/sk storage functions for reuse")
+Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Link: https://lore.kernel.org/bpf/20230901231129.578493-3-martin.lau@linux.dev
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/bpf_local_storage.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
+index 37ad47d52dc55..146824cc96893 100644
+--- a/kernel/bpf/bpf_local_storage.c
++++ b/kernel/bpf/bpf_local_storage.c
+@@ -760,7 +760,7 @@ void bpf_local_storage_destroy(struct bpf_local_storage *local_storage)
+                * of the loop will set the free_cgroup_storage to true.
+                */
+               free_storage = bpf_selem_unlink_storage_nolock(
+-                      local_storage, selem, false, true);
++                      local_storage, selem, true, true);
+       }
+       raw_spin_unlock_irqrestore(&local_storage->lock, flags);
+-- 
+2.40.1
+
diff --git a/queue-6.5/bpf-invoke-__bpf_prog_exit_sleepable_recur-on-recurs.patch b/queue-6.5/bpf-invoke-__bpf_prog_exit_sleepable_recur-on-recurs.patch
new file mode 100644 (file)
index 0000000..f028762
--- /dev/null
@@ -0,0 +1,46 @@
+From 4a80c41c8eda655c514cac098f9b45f092d7738a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 30 Aug 2023 10:04:04 +0200
+Subject: bpf: Invoke __bpf_prog_exit_sleepable_recur() on recursion in
+ kern_sys_bpf().
+
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+
+[ Upstream commit 7645629f7dc88cd777f98970134bf1a54c8d77e3 ]
+
+If __bpf_prog_enter_sleepable_recur() detects recursion then it returns
+0 without undoing rcu_read_lock_trace(), migrate_disable() or
+decrementing the recursion counter. This is fine in the JIT case because
+the JIT code will jump in the 0 case to the end and invoke the matching
+exit trampoline (__bpf_prog_exit_sleepable_recur()).
+
+This is not the case in kern_sys_bpf() which returns directly to the
+caller with an error code.
+
+Add __bpf_prog_exit_sleepable_recur() as clean up in the recursion case.
+
+Fixes: b1d18a7574d0d ("bpf: Extend sys_bpf commands for bpf_syscall programs.")
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Jiri Olsa <jolsa@kernel.org>
+Link: https://lore.kernel.org/bpf/20230830080405.251926-2-bigeasy@linutronix.de
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/syscall.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
+index a2aef900519c2..c925c270ed8b4 100644
+--- a/kernel/bpf/syscall.c
++++ b/kernel/bpf/syscall.c
+@@ -5307,6 +5307,7 @@ int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size)
+               run_ctx.saved_run_ctx = NULL;
+               if (!__bpf_prog_enter_sleepable_recur(prog, &run_ctx)) {
+                       /* recursion detected */
++                      __bpf_prog_exit_sleepable_recur(prog, 0, &run_ctx);
+                       bpf_prog_put(prog);
+                       return -EBUSY;
+               }
+-- 
+2.40.1
+
diff --git a/queue-6.5/bpf-sockmap-fix-skb-refcnt-race-after-locking-change.patch b/queue-6.5/bpf-sockmap-fix-skb-refcnt-race-after-locking-change.patch
new file mode 100644 (file)
index 0000000..c232afd
--- /dev/null
@@ -0,0 +1,124 @@
+From 212ef7593410d09fb706a9cadb50dcfc37680b3e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 1 Sep 2023 13:21:37 -0700
+Subject: bpf, sockmap: Fix skb refcnt race after locking changes
+
+From: John Fastabend <john.fastabend@gmail.com>
+
+[ Upstream commit a454d84ee20baf7bd7be90721b9821f73c7d23d9 ]
+
+There is a race where skb's from the sk_psock_backlog can be referenced
+after userspace side has already skb_consumed() the sk_buff and its refcnt
+dropped to zer0 causing use after free.
+
+The flow is the following:
+
+  while ((skb = skb_peek(&psock->ingress_skb))
+    sk_psock_handle_Skb(psock, skb, ..., ingress)
+    if (!ingress) ...
+    sk_psock_skb_ingress
+       sk_psock_skb_ingress_enqueue(skb)
+          msg->skb = skb
+          sk_psock_queue_msg(psock, msg)
+    skb_dequeue(&psock->ingress_skb)
+
+The sk_psock_queue_msg() puts the msg on the ingress_msg queue. This is
+what the application reads when recvmsg() is called. An application can
+read this anytime after the msg is placed on the queue. The recvmsg hook
+will also read msg->skb and then after user space reads the msg will call
+consume_skb(skb) on it effectively free'ing it.
+
+But, the race is in above where backlog queue still has a reference to
+the skb and calls skb_dequeue(). If the skb_dequeue happens after the
+user reads and free's the skb we have a use after free.
+
+The !ingress case does not suffer from this problem because it uses
+sendmsg_*(sk, msg) which does not pass the sk_buff further down the
+stack.
+
+The following splat was observed with 'test_progs -t sockmap_listen':
+
+  [ 1022.710250][ T2556] general protection fault, ...
+  [...]
+  [ 1022.712830][ T2556] Workqueue: events sk_psock_backlog
+  [ 1022.713262][ T2556] RIP: 0010:skb_dequeue+0x4c/0x80
+  [ 1022.713653][ T2556] Code: ...
+  [...]
+  [ 1022.720699][ T2556] Call Trace:
+  [ 1022.720984][ T2556]  <TASK>
+  [ 1022.721254][ T2556]  ? die_addr+0x32/0x80^M
+  [ 1022.721589][ T2556]  ? exc_general_protection+0x25a/0x4b0
+  [ 1022.722026][ T2556]  ? asm_exc_general_protection+0x22/0x30
+  [ 1022.722489][ T2556]  ? skb_dequeue+0x4c/0x80
+  [ 1022.722854][ T2556]  sk_psock_backlog+0x27a/0x300
+  [ 1022.723243][ T2556]  process_one_work+0x2a7/0x5b0
+  [ 1022.723633][ T2556]  worker_thread+0x4f/0x3a0
+  [ 1022.723998][ T2556]  ? __pfx_worker_thread+0x10/0x10
+  [ 1022.724386][ T2556]  kthread+0xfd/0x130
+  [ 1022.724709][ T2556]  ? __pfx_kthread+0x10/0x10
+  [ 1022.725066][ T2556]  ret_from_fork+0x2d/0x50
+  [ 1022.725409][ T2556]  ? __pfx_kthread+0x10/0x10
+  [ 1022.725799][ T2556]  ret_from_fork_asm+0x1b/0x30
+  [ 1022.726201][ T2556]  </TASK>
+
+To fix we add an skb_get() before passing the skb to be enqueued in the
+engress queue. This bumps the skb->users refcnt so that consume_skb()
+and kfree_skb will not immediately free the sk_buff. With this we can
+be sure the skb is still around when we do the dequeue. Then we just
+need to decrement the refcnt or free the skb in the backlog case which
+we do by calling kfree_skb() on the ingress case as well as the sendmsg
+case.
+
+Before locking change from fixes tag we had the sock locked so we
+couldn't race with user and there was no issue here.
+
+Fixes: 799aa7f98d53e ("skmsg: Avoid lock_sock() in sk_psock_backlog()")
+Reported-by: Jiri Olsa  <jolsa@kernel.org>
+Signed-off-by: John Fastabend <john.fastabend@gmail.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Tested-by: Xu Kuohai <xukuohai@huawei.com>
+Tested-by: Jiri Olsa <jolsa@kernel.org>
+Link: https://lore.kernel.org/bpf/20230901202137.214666-1-john.fastabend@gmail.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/skmsg.c | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+diff --git a/net/core/skmsg.c b/net/core/skmsg.c
+index ef1a2eb6520bf..a746dbc2f8877 100644
+--- a/net/core/skmsg.c
++++ b/net/core/skmsg.c
+@@ -612,12 +612,18 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
+ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
+                              u32 off, u32 len, bool ingress)
+ {
++      int err = 0;
++
+       if (!ingress) {
+               if (!sock_writeable(psock->sk))
+                       return -EAGAIN;
+               return skb_send_sock(psock->sk, skb, off, len);
+       }
+-      return sk_psock_skb_ingress(psock, skb, off, len);
++      skb_get(skb);
++      err = sk_psock_skb_ingress(psock, skb, off, len);
++      if (err < 0)
++              kfree_skb(skb);
++      return err;
+ }
+ static void sk_psock_skb_state(struct sk_psock *psock,
+@@ -685,9 +691,7 @@ static void sk_psock_backlog(struct work_struct *work)
+               } while (len);
+               skb = skb_dequeue(&psock->ingress_skb);
+-              if (!ingress) {
+-                      kfree_skb(skb);
+-              }
++              kfree_skb(skb);
+       }
+ end:
+       mutex_unlock(&psock->work_mutex);
+-- 
+2.40.1
+
diff --git a/queue-6.5/ceph-make-members-in-struct-ceph_mds_request_args_ex.patch b/queue-6.5/ceph-make-members-in-struct-ceph_mds_request_args_ex.patch
new file mode 100644 (file)
index 0000000..1711d48
--- /dev/null
@@ -0,0 +1,70 @@
+From 7a622f53197ef6379dc758dccf2bccd62557f1ae Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 25 Jul 2023 09:44:40 +0800
+Subject: ceph: make members in struct ceph_mds_request_args_ext a union
+
+From: Xiubo Li <xiubli@redhat.com>
+
+[ Upstream commit 3af5ae22030cb59fab4fba35f5a2b62f47e14df9 ]
+
+In ceph mainline it will allow to set the btime in the setattr request
+and just add a 'btime' member in the union 'ceph_mds_request_args' and
+then bump up the header version to 4. That means the total size of union
+'ceph_mds_request_args' will increase sizeof(struct ceph_timespec) bytes,
+but in kclient it will increase the sizeof(setattr_ext) bytes for each
+request.
+
+Since the MDS will always depend on the header's vesion and front_len
+members to decode the 'ceph_mds_request_head' struct, at the same time
+kclient hasn't supported the 'btime' feature yet in setattr request,
+so it's safe to do this change here.
+
+This will save 48 bytes memories for each request.
+
+Fixes: 4f1ddb1ea874 ("ceph: implement updated ceph_mds_request_head structure")
+Signed-off-by: Xiubo Li <xiubli@redhat.com>
+Reviewed-by: Milind Changire <mchangir@redhat.com>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/ceph/ceph_fs.h | 24 +++++++++++++-----------
+ 1 file changed, 13 insertions(+), 11 deletions(-)
+
+diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
+index 49586ff261520..b4fa2a25b7d95 100644
+--- a/include/linux/ceph/ceph_fs.h
++++ b/include/linux/ceph/ceph_fs.h
+@@ -462,17 +462,19 @@ union ceph_mds_request_args {
+ } __attribute__ ((packed));
+ union ceph_mds_request_args_ext {
+-      union ceph_mds_request_args old;
+-      struct {
+-              __le32 mode;
+-              __le32 uid;
+-              __le32 gid;
+-              struct ceph_timespec mtime;
+-              struct ceph_timespec atime;
+-              __le64 size, old_size;       /* old_size needed by truncate */
+-              __le32 mask;                 /* CEPH_SETATTR_* */
+-              struct ceph_timespec btime;
+-      } __attribute__ ((packed)) setattr_ext;
++      union {
++              union ceph_mds_request_args old;
++              struct {
++                      __le32 mode;
++                      __le32 uid;
++                      __le32 gid;
++                      struct ceph_timespec mtime;
++                      struct ceph_timespec atime;
++                      __le64 size, old_size;       /* old_size needed by truncate */
++                      __le32 mask;                 /* CEPH_SETATTR_* */
++                      struct ceph_timespec btime;
++              } __attribute__ ((packed)) setattr_ext;
++      };
+ };
+ #define CEPH_MDS_FLAG_REPLAY          1 /* this is a replayed op */
+-- 
+2.40.1
+
diff --git a/queue-6.5/drm-amd-display-fix-mode-scaling-rmx_.patch b/queue-6.5/drm-amd-display-fix-mode-scaling-rmx_.patch
new file mode 100644 (file)
index 0000000..a04c63a
--- /dev/null
@@ -0,0 +1,44 @@
+From e55cf17ab2f05bb4d9518de2a041c3a704b4b52b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Aug 2023 09:11:11 -0400
+Subject: drm/amd/display: fix mode scaling (RMX_.*)
+
+From: Hamza Mahfooz <hamza.mahfooz@amd.com>
+
+[ Upstream commit ea7971af7a911a7a388b4c47db2a231a6b8dcc29 ]
+
+As made mention of in commit 4a2df0d1f28e ("drm/amd/display: Fixed
+non-native modes not lighting up"), we shouldn't call
+drm_mode_set_crtcinfo() once the crtc timings have been decided. Since,
+it can cause settings to be unintentionally overwritten. So, since
+dm_state is never NULL now, we can use old_stream to determine if we
+should call drm_mode_set_crtcinfo() because we only need to set the crtc
+timing parameters for entirely new streams.
+
+Cc: Harry Wentland <harry.wentland@amd.com>
+Cc: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
+Fixes: bd49f19039c1 ("drm/amd/display: Always set crtcinfo from create_stream_for_sink")
+Reviewed-by: Harry Wentland <harry.wentland@amd.com>
+Signed-off-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+index 3a7e7d2ce847b..e8e238865b021 100644
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+@@ -5990,7 +5990,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
+       if (recalculate_timing)
+               drm_mode_set_crtcinfo(&saved_mode, 0);
+-      else
++      else if (!old_stream)
+               drm_mode_set_crtcinfo(&mode, 0);
+       /*
+-- 
+2.40.1
+
diff --git a/queue-6.5/drm-i915-gvt-drop-unused-helper-intel_vgpu_reset_gtt.patch b/queue-6.5/drm-i915-gvt-drop-unused-helper-intel_vgpu_reset_gtt.patch
new file mode 100644 (file)
index 0000000..4405959
--- /dev/null
@@ -0,0 +1,74 @@
+From 1aa0d1d1ad4592c1384e3f453a6a6326d235fc63 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jul 2023 18:35:16 -0700
+Subject: drm/i915/gvt: Drop unused helper intel_vgpu_reset_gtt()
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit a90c367e5af63880008e21dd199dac839e0e9e0f ]
+
+Drop intel_vgpu_reset_gtt() as it no longer has any callers.  In addition
+to eliminating dead code, this eliminates the last possible scenario where
+__kvmgt_protect_table_find() can be reached without holding vgpu_lock.
+Requiring vgpu_lock to be held when calling __kvmgt_protect_table_find()
+will allow a protecting the gfn hash with vgpu_lock without too much fuss.
+
+No functional change intended.
+
+Fixes: ba25d977571e ("drm/i915/gvt: Do not destroy ppgtt_mm during vGPU D3->D0.")
+Reviewed-by: Yan Zhao <yan.y.zhao@intel.com>
+Tested-by: Yongwei Ma <yongwei.ma@intel.com>
+Reviewed-by: Zhi Wang <zhi.a.wang@intel.com>
+Link: https://lore.kernel.org/r/20230729013535.1070024-11-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/i915/gvt/gtt.c | 18 ------------------
+ drivers/gpu/drm/i915/gvt/gtt.h |  1 -
+ 2 files changed, 19 deletions(-)
+
+diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
+index 8ba6c8668f033..ef5517ecd9a0c 100644
+--- a/drivers/gpu/drm/i915/gvt/gtt.c
++++ b/drivers/gpu/drm/i915/gvt/gtt.c
+@@ -2882,24 +2882,6 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old)
+       ggtt_invalidate(gvt->gt);
+ }
+-/**
+- * intel_vgpu_reset_gtt - reset the all GTT related status
+- * @vgpu: a vGPU
+- *
+- * This function is called from vfio core to reset reset all
+- * GTT related status, including GGTT, PPGTT, scratch page.
+- *
+- */
+-void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu)
+-{
+-      /* Shadow pages are only created when there is no page
+-       * table tracking data, so remove page tracking data after
+-       * removing the shadow pages.
+-       */
+-      intel_vgpu_destroy_all_ppgtt_mm(vgpu);
+-      intel_vgpu_reset_ggtt(vgpu, true);
+-}
+-
+ /**
+  * intel_gvt_restore_ggtt - restore all vGPU's ggtt entries
+  * @gvt: intel gvt device
+diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h
+index a3b0f59ec8bd9..4cb183e06e95a 100644
+--- a/drivers/gpu/drm/i915/gvt/gtt.h
++++ b/drivers/gpu/drm/i915/gvt/gtt.h
+@@ -224,7 +224,6 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old);
+ void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu);
+ int intel_gvt_init_gtt(struct intel_gvt *gvt);
+-void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu);
+ void intel_gvt_clean_gtt(struct intel_gvt *gvt);
+ struct intel_vgpu_mm *intel_gvt_find_ppgtt_mm(struct intel_vgpu *vgpu,
+-- 
+2.40.1
+
diff --git a/queue-6.5/drm-i915-gvt-put-the-page-reference-obtained-by-kvm-.patch b/queue-6.5/drm-i915-gvt-put-the-page-reference-obtained-by-kvm-.patch
new file mode 100644 (file)
index 0000000..6107cac
--- /dev/null
@@ -0,0 +1,53 @@
+From a41be3a843e7311daf8c153a908d49b9c2b80c26 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jul 2023 18:35:11 -0700
+Subject: drm/i915/gvt: Put the page reference obtained by KVM's gfn_to_pfn()
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit 708e49583d7da863898b25dafe4bcd799c414278 ]
+
+Put the struct page reference acquired by gfn_to_pfn(), KVM's API is that
+the caller is ultimately responsible for dropping any reference.
+
+Note, kvm_release_pfn_clean() ensures the pfn is actually a refcounted
+struct page before trying to put any references.
+
+Fixes: b901b252b6cf ("drm/i915/gvt: Add 2M huge gtt support")
+Reviewed-by: Yan Zhao <yan.y.zhao@intel.com>
+Tested-by: Yongwei Ma <yongwei.ma@intel.com>
+Reviewed-by: Zhi Wang <zhi.a.wang@intel.com>
+Link: https://lore.kernel.org/r/20230729013535.1070024-6-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/i915/gvt/gtt.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
+index 58b9b316ae462..8ba6c8668f033 100644
+--- a/drivers/gpu/drm/i915/gvt/gtt.c
++++ b/drivers/gpu/drm/i915/gvt/gtt.c
+@@ -1174,6 +1174,7 @@ static int is_2MB_gtt_possible(struct intel_vgpu *vgpu,
+ {
+       const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
+       kvm_pfn_t pfn;
++      int ret;
+       if (!HAS_PAGE_SIZES(vgpu->gvt->gt->i915, I915_GTT_PAGE_SIZE_2M))
+               return 0;
+@@ -1187,7 +1188,9 @@ static int is_2MB_gtt_possible(struct intel_vgpu *vgpu,
+       if (!pfn_valid(pfn))
+               return -EINVAL;
+-      return PageTransHuge(pfn_to_page(pfn));
++      ret = PageTransHuge(pfn_to_page(pfn));
++      kvm_release_pfn_clean(pfn);
++      return ret;
+ }
+ static int split_2MB_gtt_entry(struct intel_vgpu *vgpu,
+-- 
+2.40.1
+
diff --git a/queue-6.5/drm-i915-gvt-verify-pfn-is-valid-before-dereferencin.patch b/queue-6.5/drm-i915-gvt-verify-pfn-is-valid-before-dereferencin.patch
new file mode 100644 (file)
index 0000000..6e0087e
--- /dev/null
@@ -0,0 +1,46 @@
+From c94c79d6525c4a745d0e7bdab533830e6f3d6d40 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jul 2023 18:35:07 -0700
+Subject: drm/i915/gvt: Verify pfn is "valid" before dereferencing "struct
+ page"
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit f046923af79158361295ed4f0a588c80b9fdcc1d ]
+
+Check that the pfn found by gfn_to_pfn() is actually backed by "struct
+page" memory prior to retrieving and dereferencing the page.  KVM
+supports backing guest memory with VM_PFNMAP, VM_IO, etc., and so
+there is no guarantee the pfn returned by gfn_to_pfn() has an associated
+"struct page".
+
+Fixes: b901b252b6cf ("drm/i915/gvt: Add 2M huge gtt support")
+Reviewed-by: Yan Zhao <yan.y.zhao@intel.com>
+Tested-by: Yongwei Ma <yongwei.ma@intel.com>
+Reviewed-by: Zhi Wang <zhi.a.wang@intel.com>
+Link: https://lore.kernel.org/r/20230729013535.1070024-2-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/i915/gvt/gtt.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
+index 4ec85308379a4..58b9b316ae462 100644
+--- a/drivers/gpu/drm/i915/gvt/gtt.c
++++ b/drivers/gpu/drm/i915/gvt/gtt.c
+@@ -1183,6 +1183,10 @@ static int is_2MB_gtt_possible(struct intel_vgpu *vgpu,
+       pfn = gfn_to_pfn(vgpu->vfio_device.kvm, ops->get_pfn(entry));
+       if (is_error_noslot_pfn(pfn))
+               return -EINVAL;
++
++      if (!pfn_valid(pfn))
++              return -EINVAL;
++
+       return PageTransHuge(pfn_to_page(pfn));
+ }
+-- 
+2.40.1
+
diff --git a/queue-6.5/drm-i915-mark-requests-for-guc-virtual-engines-to-av.patch b/queue-6.5/drm-i915-mark-requests-for-guc-virtual-engines-to-av.patch
new file mode 100644 (file)
index 0000000..4de6783
--- /dev/null
@@ -0,0 +1,88 @@
+From 6c07d21840ca76d8b8964f2273411293abc0c7f6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 21 Aug 2023 17:30:35 +0200
+Subject: drm/i915: mark requests for GuC virtual engines to avoid
+ use-after-free
+
+From: Andrzej Hajda <andrzej.hajda@intel.com>
+
+[ Upstream commit 5eefc5307c983b59344a4cb89009819f580c84fa ]
+
+References to i915_requests may be trapped by userspace inside a
+sync_file or dmabuf (dma-resv) and held indefinitely across different
+proceses. To counter-act the memory leaks, we try to not to keep
+references from the request past their completion.
+On the other side on fence release we need to know if rq->engine
+is valid and points to hw engine (true for non-virtual requests).
+To make it possible extra bit has been added to rq->execution_mask,
+for marking virtual engines.
+
+Fixes: bcb9aa45d5a0 ("Revert "drm/i915: Hold reference to intel_context over life of i915_request"")
+Signed-off-by: Chris Wilson <chris.p.wilson@linux.intel.com>
+Signed-off-by: Andrzej Hajda <andrzej.hajda@intel.com>
+Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
+Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230821153035.3903006-1-andrzej.hajda@intel.com
+(cherry picked from commit 280410677af763f3871b93e794a199cfcf6fb580)
+Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/i915/gt/intel_engine_types.h      | 1 +
+ drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 3 +++
+ drivers/gpu/drm/i915/i915_request.c               | 7 ++-----
+ 3 files changed, 6 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
+index e99a6fa03d453..a7e6775980043 100644
+--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
++++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
+@@ -58,6 +58,7 @@ struct i915_perf_group;
+ typedef u32 intel_engine_mask_t;
+ #define ALL_ENGINES ((intel_engine_mask_t)~0ul)
++#define VIRTUAL_ENGINES BIT(BITS_PER_TYPE(intel_engine_mask_t) - 1)
+ struct intel_hw_status_page {
+       struct list_head timelines;
+diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+index a0e3ef1c65d24..b5b7f2fe8c78e 100644
+--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
++++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+@@ -5470,6 +5470,9 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
+       ve->base.flags = I915_ENGINE_IS_VIRTUAL;
++      BUILD_BUG_ON(ilog2(VIRTUAL_ENGINES) < I915_NUM_ENGINES);
++      ve->base.mask = VIRTUAL_ENGINES;
++
+       intel_context_init(&ve->context, &ve->base);
+       for (n = 0; n < count; n++) {
+diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
+index 833b73edefdbb..eb2a3000ad66b 100644
+--- a/drivers/gpu/drm/i915/i915_request.c
++++ b/drivers/gpu/drm/i915/i915_request.c
+@@ -134,9 +134,7 @@ static void i915_fence_release(struct dma_fence *fence)
+       i915_sw_fence_fini(&rq->semaphore);
+       /*
+-       * Keep one request on each engine for reserved use under mempressure
+-       * do not use with virtual engines as this really is only needed for
+-       * kernel contexts.
++       * Keep one request on each engine for reserved use under mempressure.
+        *
+        * We do not hold a reference to the engine here and so have to be
+        * very careful in what rq->engine we poke. The virtual engine is
+@@ -166,8 +164,7 @@ static void i915_fence_release(struct dma_fence *fence)
+        * know that if the rq->execution_mask is a single bit, rq->engine
+        * can be a physical engine with the exact corresponding mask.
+        */
+-      if (!intel_engine_is_virtual(rq->engine) &&
+-          is_power_of_2(rq->execution_mask) &&
++      if (is_power_of_2(rq->execution_mask) &&
+           !cmpxchg(&rq->engine->request_pool, NULL, rq))
+               return;
+-- 
+2.40.1
+
diff --git a/queue-6.5/gfs2-low-memory-forced-flush-fixes.patch b/queue-6.5/gfs2-low-memory-forced-flush-fixes.patch
new file mode 100644 (file)
index 0000000..d8ff682
--- /dev/null
@@ -0,0 +1,89 @@
+From fa2f90e459db9fe587b7b11a6f114346676d9a17 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 10 Aug 2023 17:15:46 +0200
+Subject: gfs2: low-memory forced flush fixes
+
+From: Andreas Gruenbacher <agruenba@redhat.com>
+
+[ Upstream commit b74cd55aa9a9d0aca760028a51343ec79812e410 ]
+
+First, function gfs2_ail_flush_reqd checks the SDF_FORCE_AIL_FLUSH flag
+to determine if an AIL flush should be forced in low-memory situations.
+However, it also immediately clears the flag, and when called repeatedly
+as in function gfs2_logd, the flag will be lost.  Fix that by pulling
+the SDF_FORCE_AIL_FLUSH flag check out of gfs2_ail_flush_reqd.
+
+Second, function gfs2_writepages sets the SDF_FORCE_AIL_FLUSH flag
+whether or not enough pages were written.  If enough pages could be
+written, flushing the AIL is unnecessary, though.
+
+Third, gfs2_writepages doesn't wake up logd after setting the
+SDF_FORCE_AIL_FLUSH flag, so it can take a long time for logd to react.
+It would be preferable to wake up logd, but that hurts the performance
+of some workloads and we don't quite understand why so far, so don't
+wake up logd so far.
+
+Fixes: b066a4eebd4f ("gfs2: forcibly flush ail to relieve memory pressure")
+Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/gfs2/aops.c | 4 ++--
+ fs/gfs2/log.c  | 8 ++++----
+ 2 files changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
+index ae49256b7c8c6..be2759a974f9e 100644
+--- a/fs/gfs2/aops.c
++++ b/fs/gfs2/aops.c
+@@ -183,13 +183,13 @@ static int gfs2_writepages(struct address_space *mapping,
+       int ret;
+       /*
+-       * Even if we didn't write any pages here, we might still be holding
++       * Even if we didn't write enough pages here, we might still be holding
+        * dirty pages in the ail. We forcibly flush the ail because we don't
+        * want balance_dirty_pages() to loop indefinitely trying to write out
+        * pages held in the ail that it can't find.
+        */
+       ret = iomap_writepages(mapping, wbc, &wpc, &gfs2_writeback_ops);
+-      if (ret == 0)
++      if (ret == 0 && wbc->nr_to_write > 0)
+               set_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags);
+       return ret;
+ }
+diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
+index d3da259820e30..aaca22f2aa2d1 100644
+--- a/fs/gfs2/log.c
++++ b/fs/gfs2/log.c
+@@ -1282,9 +1282,6 @@ static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp)
+ {
+       unsigned int used_blocks = sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free);
+-      if (test_and_clear_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags))
+-              return 1;
+-
+       return used_blocks + atomic_read(&sdp->sd_log_blks_needed) >=
+               atomic_read(&sdp->sd_log_thresh2);
+ }
+@@ -1325,7 +1322,9 @@ int gfs2_logd(void *data)
+                                                 GFS2_LFC_LOGD_JFLUSH_REQD);
+               }
+-              if (gfs2_ail_flush_reqd(sdp)) {
++              if (test_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags) ||
++                  gfs2_ail_flush_reqd(sdp)) {
++                      clear_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags);
+                       gfs2_ail1_start(sdp);
+                       gfs2_ail1_wait(sdp);
+                       gfs2_ail1_empty(sdp, 0);
+@@ -1338,6 +1337,7 @@ int gfs2_logd(void *data)
+               try_to_freeze();
+               t = wait_event_interruptible_timeout(sdp->sd_logd_waitq,
++                              test_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags) ||
+                               gfs2_ail_flush_reqd(sdp) ||
+                               gfs2_jrnl_flush_reqd(sdp) ||
+                               kthread_should_stop(),
+-- 
+2.40.1
+
diff --git a/queue-6.5/gfs2-switch-to-wait_event-in-gfs2_logd.patch b/queue-6.5/gfs2-switch-to-wait_event-in-gfs2_logd.patch
new file mode 100644 (file)
index 0000000..dea4fca
--- /dev/null
@@ -0,0 +1,57 @@
+From 8eb2794d7dc668c4569f10a1992e06bf9cfd973d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Aug 2023 15:46:16 +0200
+Subject: gfs2: Switch to wait_event in gfs2_logd
+
+From: Andreas Gruenbacher <agruenba@redhat.com>
+
+[ Upstream commit 6df373b09b1dcf2f7d579f515f653f89a896d417 ]
+
+In gfs2_logd(), switch from an open-coded wait loop to
+wait_event_interruptible_timeout().
+
+Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
+Stable-dep-of: b74cd55aa9a9 ("gfs2: low-memory forced flush fixes")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/gfs2/log.c | 17 +++++------------
+ 1 file changed, 5 insertions(+), 12 deletions(-)
+
+diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
+index aa568796207c0..d3da259820e30 100644
+--- a/fs/gfs2/log.c
++++ b/fs/gfs2/log.c
+@@ -1301,7 +1301,6 @@ int gfs2_logd(void *data)
+ {
+       struct gfs2_sbd *sdp = data;
+       unsigned long t = 1;
+-      DEFINE_WAIT(wait);
+       while (!kthread_should_stop()) {
+@@ -1338,17 +1337,11 @@ int gfs2_logd(void *data)
+               try_to_freeze();
+-              do {
+-                      prepare_to_wait(&sdp->sd_logd_waitq, &wait,
+-                                      TASK_INTERRUPTIBLE);
+-                      if (!gfs2_ail_flush_reqd(sdp) &&
+-                          !gfs2_jrnl_flush_reqd(sdp) &&
+-                          !kthread_should_stop())
+-                              t = schedule_timeout(t);
+-              } while(t && !gfs2_ail_flush_reqd(sdp) &&
+-                      !gfs2_jrnl_flush_reqd(sdp) &&
+-                      !kthread_should_stop());
+-              finish_wait(&sdp->sd_logd_waitq, &wait);
++              t = wait_event_interruptible_timeout(sdp->sd_logd_waitq,
++                              gfs2_ail_flush_reqd(sdp) ||
++                              gfs2_jrnl_flush_reqd(sdp) ||
++                              kthread_should_stop(),
++                              t);
+       }
+       return 0;
+-- 
+2.40.1
+
diff --git a/queue-6.5/gve-fix-frag_list-chaining.patch b/queue-6.5/gve-fix-frag_list-chaining.patch
new file mode 100644 (file)
index 0000000..3c82b8d
--- /dev/null
@@ -0,0 +1,58 @@
+From 028c75bb4c8aca93a1cd8fcfb39f328e46343acd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 31 Aug 2023 21:38:12 +0000
+Subject: gve: fix frag_list chaining
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 817c7cd2043a83a3d8147f40eea1505ac7300b62 ]
+
+gve_rx_append_frags() is able to build skbs chained with frag_list,
+like GRO engine.
+
+Problem is that shinfo->frag_list should only be used
+for the head of the chain.
+
+All other links should use skb->next pointer.
+
+Otherwise, built skbs are not valid and can cause crashes.
+
+Equivalent code in GRO (skb_gro_receive()) is:
+
+    if (NAPI_GRO_CB(p)->last == p)
+        skb_shinfo(p)->frag_list = skb;
+    else
+        NAPI_GRO_CB(p)->last->next = skb;
+    NAPI_GRO_CB(p)->last = skb;
+
+Fixes: 9b8dd5e5ea48 ("gve: DQO: Add RX path")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Bailey Forrest <bcf@google.com>
+Cc: Willem de Bruijn <willemb@google.com>
+Cc: Catherine Sullivan <csully@google.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/google/gve/gve_rx_dqo.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
+index e57b73eb70f62..ac041cc5714c0 100644
+--- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c
++++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
+@@ -492,7 +492,10 @@ static int gve_rx_append_frags(struct napi_struct *napi,
+               if (!skb)
+                       return -1;
+-              skb_shinfo(rx->ctx.skb_tail)->frag_list = skb;
++              if (rx->ctx.skb_tail == rx->ctx.skb_head)
++                      skb_shinfo(rx->ctx.skb_head)->frag_list = skb;
++              else
++                      rx->ctx.skb_tail->next = skb;
+               rx->ctx.skb_tail = skb;
+               num_frags = 0;
+       }
+-- 
+2.40.1
+
diff --git a/queue-6.5/i3c-master-svc-describe-member-saved_regs.patch b/queue-6.5/i3c-master-svc-describe-member-saved_regs.patch
new file mode 100644 (file)
index 0000000..0745d63
--- /dev/null
@@ -0,0 +1,43 @@
+From 0d7ca90f929ecb7134d13a61fa0238f2d1112c99 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Aug 2023 12:18:53 +0200
+Subject: i3c: master: svc: Describe member 'saved_regs'
+
+From: Miquel Raynal <miquel.raynal@bootlin.com>
+
+[ Upstream commit 5496eac6ad7428fa06811a8c34b3a15beb93b86d ]
+
+The 'saved_regs' member of the 'svc_i3c_master'        structure is not
+described in the kernel doc, which produces the following warning:
+
+    Function parameter or member 'saved_regs' not described in 'svc_i3c_master'
+
+Add the missing line in the kernel documentation of the parent
+structure.
+
+Fixes: 1c5ee2a77b1b ("i3c: master: svc: fix i3c suspend/resume issue")
+Reported-by: kernel test robot <lkp@intel.com>
+Closes: https://lore.kernel.org/oe-kbuild-all/202308171435.0xQ82lvu-lkp@intel.com/
+Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
+Link: https://lore.kernel.org/r/20230817101853.16805-1-miquel.raynal@bootlin.com
+Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/i3c/master/svc-i3c-master.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c
+index 2fefbe55c1675..6c43992c8cf6b 100644
+--- a/drivers/i3c/master/svc-i3c-master.c
++++ b/drivers/i3c/master/svc-i3c-master.c
+@@ -156,6 +156,7 @@ struct svc_i3c_regs_save {
+  * @base: I3C master controller
+  * @dev: Corresponding device
+  * @regs: Memory mapping
++ * @saved_regs: Volatile values for PM operations
+  * @free_slots: Bit array of available slots
+  * @addrs: Array containing the dynamic addresses of each attached device
+  * @descs: Array of descriptors, one per attached device
+-- 
+2.40.1
+
diff --git a/queue-6.5/idr-fix-param-name-in-idr_alloc_cyclic-doc.patch b/queue-6.5/idr-fix-param-name-in-idr_alloc_cyclic-doc.patch
new file mode 100644 (file)
index 0000000..7b96260
--- /dev/null
@@ -0,0 +1,35 @@
+From b70cbac0eb550b0957bee8e6bb95c5e9cff2c775 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 26 Aug 2023 20:33:17 +0300
+Subject: idr: fix param name in idr_alloc_cyclic() doc
+
+From: Ariel Marcovitch <arielmarcovitch@gmail.com>
+
+[ Upstream commit 2a15de80dd0f7e04a823291aa9eb49c5294f56af ]
+
+The relevant parameter is 'start' and not 'nextid'
+
+Fixes: 460488c58ca8 ("idr: Remove idr_alloc_ext")
+Signed-off-by: Ariel Marcovitch <arielmarcovitch@gmail.com>
+Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ lib/idr.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/lib/idr.c b/lib/idr.c
+index 7ecdfdb5309e7..13f2758c23773 100644
+--- a/lib/idr.c
++++ b/lib/idr.c
+@@ -100,7 +100,7 @@ EXPORT_SYMBOL_GPL(idr_alloc);
+  * @end: The maximum ID (exclusive).
+  * @gfp: Memory allocation flags.
+  *
+- * Allocates an unused ID in the range specified by @nextid and @end.  If
++ * Allocates an unused ID in the range specified by @start and @end.  If
+  * @end is <= 0, it is treated as one larger than %INT_MAX.  This allows
+  * callers to use @start + N as @end as long as N is within integer range.
+  * The search for an unused ID will start at the last ID allocated and will
+-- 
+2.40.1
+
diff --git a/queue-6.5/igb-change-igb_min-to-allow-set-rx-tx-value-between-.patch b/queue-6.5/igb-change-igb_min-to-allow-set-rx-tx-value-between-.patch
new file mode 100644 (file)
index 0000000..c6da202
--- /dev/null
@@ -0,0 +1,44 @@
+From c833338f4225c242c15548cc7ec40d01fad34b97 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 25 Jul 2023 10:10:58 +0200
+Subject: igb: Change IGB_MIN to allow set rx/tx value between 64 and 80
+
+From: Olga Zaborska <olga.zaborska@intel.com>
+
+[ Upstream commit 6319685bdc8ad5310890add907b7c42f89302886 ]
+
+Change the minimum value of RX/TX descriptors to 64 to enable setting the rx/tx
+value between 64 and 80. All igb devices can use as low as 64 descriptors.
+This change will unify igb with other drivers.
+Based on commit 7b1be1987c1e ("e1000e: lower ring minimum size to 64")
+
+Fixes: 9d5c824399de ("igb: PCI-Express 82575 Gigabit Ethernet driver")
+Signed-off-by: Olga Zaborska <olga.zaborska@intel.com>
+Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@intel.com> (A Contingent worker at Intel)
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igb/igb.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
+index 015b781441149..a2b759531cb7b 100644
+--- a/drivers/net/ethernet/intel/igb/igb.h
++++ b/drivers/net/ethernet/intel/igb/igb.h
+@@ -34,11 +34,11 @@ struct igb_adapter;
+ /* TX/RX descriptor defines */
+ #define IGB_DEFAULT_TXD               256
+ #define IGB_DEFAULT_TX_WORK   128
+-#define IGB_MIN_TXD           80
++#define IGB_MIN_TXD           64
+ #define IGB_MAX_TXD           4096
+ #define IGB_DEFAULT_RXD               256
+-#define IGB_MIN_RXD           80
++#define IGB_MIN_RXD           64
+ #define IGB_MAX_RXD           4096
+ #define IGB_DEFAULT_ITR               3 /* dynamic */
+-- 
+2.40.1
+
diff --git a/queue-6.5/igb-disable-virtualization-features-on-82580.patch b/queue-6.5/igb-disable-virtualization-features-on-82580.patch
new file mode 100644 (file)
index 0000000..e7cb578
--- /dev/null
@@ -0,0 +1,40 @@
+From aef0f1bd4a790725098ed5b52e9d31bd4d8a2382 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 31 Aug 2023 14:19:13 +0200
+Subject: igb: disable virtualization features on 82580
+
+From: Corinna Vinschen <vinschen@redhat.com>
+
+[ Upstream commit fa09bc40b21a33937872c4c4cf0f266ec9fa4869 ]
+
+Disable virtualization features on 82580 just as on i210/i211.
+This avoids that virt functions are acidentally called on 82850.
+
+Fixes: 55cac248caa4 ("igb: Add full support for 82580 devices")
+Signed-off-by: Corinna Vinschen <vinschen@redhat.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igb/igb_main.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
+index 08e3df37089fe..ac19730e8db91 100644
+--- a/drivers/net/ethernet/intel/igb/igb_main.c
++++ b/drivers/net/ethernet/intel/igb/igb_main.c
+@@ -3933,8 +3933,9 @@ static void igb_probe_vfs(struct igb_adapter *adapter)
+       struct pci_dev *pdev = adapter->pdev;
+       struct e1000_hw *hw = &adapter->hw;
+-      /* Virtualization features not supported on i210 family. */
+-      if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211))
++      /* Virtualization features not supported on i210 and 82580 family. */
++      if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211) ||
++          (hw->mac.type == e1000_82580))
+               return;
+       /* Of the below we really only want the effect of getting
+-- 
+2.40.1
+
diff --git a/queue-6.5/igbvf-change-igbvf_min-to-allow-set-rx-tx-value-betw.patch b/queue-6.5/igbvf-change-igbvf_min-to-allow-set-rx-tx-value-betw.patch
new file mode 100644 (file)
index 0000000..86d2940
--- /dev/null
@@ -0,0 +1,44 @@
+From 5e27240cc45a96619d8387071127edee26d21952 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 25 Jul 2023 10:10:57 +0200
+Subject: igbvf: Change IGBVF_MIN to allow set rx/tx value between 64 and 80
+
+From: Olga Zaborska <olga.zaborska@intel.com>
+
+[ Upstream commit 8360717524a24a421c36ef8eb512406dbd42160a ]
+
+Change the minimum value of RX/TX descriptors to 64 to enable setting the rx/tx
+value between 64 and 80. All igbvf devices can use as low as 64 descriptors.
+This change will unify igbvf with other drivers.
+Based on commit 7b1be1987c1e ("e1000e: lower ring minimum size to 64")
+
+Fixes: d4e0fe01a38a ("igbvf: add new driver to support 82576 virtual functions")
+Signed-off-by: Olga Zaborska <olga.zaborska@intel.com>
+Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igbvf/igbvf.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/igbvf/igbvf.h b/drivers/net/ethernet/intel/igbvf/igbvf.h
+index 57d39ee00b585..7b83678ba83a6 100644
+--- a/drivers/net/ethernet/intel/igbvf/igbvf.h
++++ b/drivers/net/ethernet/intel/igbvf/igbvf.h
+@@ -39,11 +39,11 @@ enum latency_range {
+ /* Tx/Rx descriptor defines */
+ #define IGBVF_DEFAULT_TXD     256
+ #define IGBVF_MAX_TXD         4096
+-#define IGBVF_MIN_TXD         80
++#define IGBVF_MIN_TXD         64
+ #define IGBVF_DEFAULT_RXD     256
+ #define IGBVF_MAX_RXD         4096
+-#define IGBVF_MIN_RXD         80
++#define IGBVF_MIN_RXD         64
+ #define IGBVF_MIN_ITR_USECS   10 /* 100000 irq/sec */
+ #define IGBVF_MAX_ITR_USECS   10000 /* 100    irq/sec */
+-- 
+2.40.1
+
diff --git a/queue-6.5/igc-change-igc_min-to-allow-set-rx-tx-value-between-.patch b/queue-6.5/igc-change-igc_min-to-allow-set-rx-tx-value-between-.patch
new file mode 100644 (file)
index 0000000..a3bd249
--- /dev/null
@@ -0,0 +1,44 @@
+From d60fd6ee78b2148b8fb9177aeacf229ab8e68218 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 25 Jul 2023 10:10:56 +0200
+Subject: igc: Change IGC_MIN to allow set rx/tx value between 64 and 80
+
+From: Olga Zaborska <olga.zaborska@intel.com>
+
+[ Upstream commit 5aa48279712e1f134aac908acde4df798955a955 ]
+
+Change the minimum value of RX/TX descriptors to 64 to enable setting the rx/tx
+value between 64 and 80. All igc devices can use as low as 64 descriptors.
+This change will unify igc with other drivers.
+Based on commit 7b1be1987c1e ("e1000e: lower ring minimum size to 64")
+
+Fixes: 0507ef8a0372 ("igc: Add transmit and receive fastpath and interrupt handlers")
+Signed-off-by: Olga Zaborska <olga.zaborska@intel.com>
+Tested-by: Naama Meir <naamax.meir@linux.intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igc/igc.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
+index 38901d2a46807..b4077c3f62ed1 100644
+--- a/drivers/net/ethernet/intel/igc/igc.h
++++ b/drivers/net/ethernet/intel/igc/igc.h
+@@ -368,11 +368,11 @@ static inline u32 igc_rss_type(const union igc_adv_rx_desc *rx_desc)
+ /* TX/RX descriptor defines */
+ #define IGC_DEFAULT_TXD               256
+ #define IGC_DEFAULT_TX_WORK   128
+-#define IGC_MIN_TXD           80
++#define IGC_MIN_TXD           64
+ #define IGC_MAX_TXD           4096
+ #define IGC_DEFAULT_RXD               256
+-#define IGC_MIN_RXD           80
++#define IGC_MIN_RXD           64
+ #define IGC_MAX_RXD           4096
+ /* Supported Rx Buffer Sizes */
+-- 
+2.40.1
+
diff --git a/queue-6.5/input-iqs7222-configure-power-mode-before-triggering.patch b/queue-6.5/input-iqs7222-configure-power-mode-before-triggering.patch
new file mode 100644 (file)
index 0000000..fd0e14d
--- /dev/null
@@ -0,0 +1,61 @@
+From 2cf6d8c16a368d2bca1a110997acff80476dcb27 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 9 Jul 2023 12:06:37 -0500
+Subject: Input: iqs7222 - configure power mode before triggering ATI
+
+From: Jeff LaBundy <jeff@labundy.com>
+
+[ Upstream commit 2e00b8bf5624767f6be7427b6eb532524793463e ]
+
+If the device drops into ultra-low-power mode before being placed
+into normal-power mode as part of ATI being triggered, the device
+does not assert any interrupts until the ATI routine is restarted
+two seconds later.
+
+Solve this problem by adopting the vendor's recommendation, which
+calls for the device to be placed into normal-power mode prior to
+being configured and ATI being triggered.
+
+The original implementation followed this sequence, but the order
+was inadvertently changed as part of the resolution of a separate
+erratum.
+
+Fixes: 1e4189d8af27 ("Input: iqs7222 - protect volatile registers")
+Signed-off-by: Jeff LaBundy <jeff@labundy.com>
+Link: https://lore.kernel.org/r/ZKrpHc2Ji9qR25r2@nixie71
+Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/input/misc/iqs7222.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/input/misc/iqs7222.c b/drivers/input/misc/iqs7222.c
+index 096b0925f41ba..acb95048e8230 100644
+--- a/drivers/input/misc/iqs7222.c
++++ b/drivers/input/misc/iqs7222.c
+@@ -1381,9 +1381,6 @@ static int iqs7222_ati_trigger(struct iqs7222_private *iqs7222)
+       if (error)
+               return error;
+-      sys_setup &= ~IQS7222_SYS_SETUP_INTF_MODE_MASK;
+-      sys_setup &= ~IQS7222_SYS_SETUP_PWR_MODE_MASK;
+-
+       for (i = 0; i < IQS7222_NUM_RETRIES; i++) {
+               /*
+                * Trigger ATI from streaming and normal-power modes so that
+@@ -1561,8 +1558,11 @@ static int iqs7222_dev_init(struct iqs7222_private *iqs7222, int dir)
+                       return error;
+       }
+-      if (dir == READ)
++      if (dir == READ) {
++              iqs7222->sys_setup[0] &= ~IQS7222_SYS_SETUP_INTF_MODE_MASK;
++              iqs7222->sys_setup[0] &= ~IQS7222_SYS_SETUP_PWR_MODE_MASK;
+               return 0;
++      }
+       return iqs7222_ati_trigger(iqs7222);
+ }
+-- 
+2.40.1
+
diff --git a/queue-6.5/input-tca6416-keypad-always-expect-proper-irq-number.patch b/queue-6.5/input-tca6416-keypad-always-expect-proper-irq-number.patch
new file mode 100644 (file)
index 0000000..1dcd431
--- /dev/null
@@ -0,0 +1,128 @@
+From 641765431ed7b00af45c46cf15c0cea9d8a1bd3f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 23 Jul 2023 22:30:18 -0700
+Subject: Input: tca6416-keypad - always expect proper IRQ number in i2c client
+
+From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
+
+[ Upstream commit 687fe7dfb736b03ab820d172ea5dbfc1ec447135 ]
+
+Remove option having i2c client contain raw gpio number instead of proper
+IRQ number. There are no users of this facility in mainline and it will
+allow cleaning up the driver code with regard to wakeup handling, etc.
+
+Link: https://lore.kernel.org/r/20230724053024.352054-1-dmitry.torokhov@gmail.com
+Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
+Stable-dep-of: cc141c35af87 ("Input: tca6416-keypad - fix interrupt enable disbalance")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/input/keyboard/tca6416-keypad.c | 27 +++++++++----------------
+ include/linux/tca6416_keypad.h          |  1 -
+ 2 files changed, 10 insertions(+), 18 deletions(-)
+
+diff --git a/drivers/input/keyboard/tca6416-keypad.c b/drivers/input/keyboard/tca6416-keypad.c
+index 2f745cabf4f24..01bc0b8811882 100644
+--- a/drivers/input/keyboard/tca6416-keypad.c
++++ b/drivers/input/keyboard/tca6416-keypad.c
+@@ -148,7 +148,7 @@ static int tca6416_keys_open(struct input_dev *dev)
+       if (chip->use_polling)
+               schedule_delayed_work(&chip->dwork, msecs_to_jiffies(100));
+       else
+-              enable_irq(chip->irqnum);
++              enable_irq(chip->client->irq);
+       return 0;
+ }
+@@ -160,7 +160,7 @@ static void tca6416_keys_close(struct input_dev *dev)
+       if (chip->use_polling)
+               cancel_delayed_work_sync(&chip->dwork);
+       else
+-              disable_irq(chip->irqnum);
++              disable_irq(chip->client->irq);
+ }
+ static int tca6416_setup_registers(struct tca6416_keypad_chip *chip)
+@@ -266,12 +266,7 @@ static int tca6416_keypad_probe(struct i2c_client *client)
+               goto fail1;
+       if (!chip->use_polling) {
+-              if (pdata->irq_is_gpio)
+-                      chip->irqnum = gpio_to_irq(client->irq);
+-              else
+-                      chip->irqnum = client->irq;
+-
+-              error = request_threaded_irq(chip->irqnum, NULL,
++              error = request_threaded_irq(client->irq, NULL,
+                                            tca6416_keys_isr,
+                                            IRQF_TRIGGER_FALLING |
+                                            IRQF_ONESHOT | IRQF_NO_AUTOEN,
+@@ -279,7 +274,7 @@ static int tca6416_keypad_probe(struct i2c_client *client)
+               if (error) {
+                       dev_dbg(&client->dev,
+                               "Unable to claim irq %d; error %d\n",
+-                              chip->irqnum, error);
++                              client->irq, error);
+                       goto fail1;
+               }
+       }
+@@ -298,8 +293,8 @@ static int tca6416_keypad_probe(struct i2c_client *client)
+ fail2:
+       if (!chip->use_polling) {
+-              free_irq(chip->irqnum, chip);
+-              enable_irq(chip->irqnum);
++              free_irq(client->irq, chip);
++              enable_irq(client->irq);
+       }
+ fail1:
+       input_free_device(input);
+@@ -312,8 +307,8 @@ static void tca6416_keypad_remove(struct i2c_client *client)
+       struct tca6416_keypad_chip *chip = i2c_get_clientdata(client);
+       if (!chip->use_polling) {
+-              free_irq(chip->irqnum, chip);
+-              enable_irq(chip->irqnum);
++              free_irq(client->irq, chip);
++              enable_irq(client->irq);
+       }
+       input_unregister_device(chip->input);
+@@ -323,10 +318,9 @@ static void tca6416_keypad_remove(struct i2c_client *client)
+ static int tca6416_keypad_suspend(struct device *dev)
+ {
+       struct i2c_client *client = to_i2c_client(dev);
+-      struct tca6416_keypad_chip *chip = i2c_get_clientdata(client);
+       if (device_may_wakeup(dev))
+-              enable_irq_wake(chip->irqnum);
++              enable_irq_wake(client->irq);
+       return 0;
+ }
+@@ -334,10 +328,9 @@ static int tca6416_keypad_suspend(struct device *dev)
+ static int tca6416_keypad_resume(struct device *dev)
+ {
+       struct i2c_client *client = to_i2c_client(dev);
+-      struct tca6416_keypad_chip *chip = i2c_get_clientdata(client);
+       if (device_may_wakeup(dev))
+-              disable_irq_wake(chip->irqnum);
++              disable_irq_wake(client->irq);
+       return 0;
+ }
+diff --git a/include/linux/tca6416_keypad.h b/include/linux/tca6416_keypad.h
+index b0d36a9934ccd..5cf6f6f82aa70 100644
+--- a/include/linux/tca6416_keypad.h
++++ b/include/linux/tca6416_keypad.h
+@@ -25,7 +25,6 @@ struct tca6416_keys_platform_data {
+       unsigned int rep:1;     /* enable input subsystem auto repeat */
+       uint16_t pinmask;
+       uint16_t invert;
+-      int irq_is_gpio;
+       int use_polling;        /* use polling if Interrupt is not connected*/
+ };
+ #endif
+-- 
+2.40.1
+
diff --git a/queue-6.5/input-tca6416-keypad-fix-interrupt-enable-disbalance.patch b/queue-6.5/input-tca6416-keypad-fix-interrupt-enable-disbalance.patch
new file mode 100644 (file)
index 0000000..c8af7c1
--- /dev/null
@@ -0,0 +1,52 @@
+From b6f8e9fd82f8786a8fd7bfceb301dd725a9b146d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 23 Jul 2023 22:30:20 -0700
+Subject: Input: tca6416-keypad - fix interrupt enable disbalance
+
+From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
+
+[ Upstream commit cc141c35af873c6796e043adcb820833bd8ef8c5 ]
+
+The driver has been switched to use IRQF_NO_AUTOEN, but in the error
+unwinding and remove paths calls to enable_irq() were left in place, which
+will lead to an incorrect enable counter value.
+
+Fixes: bcd9730a04a1 ("Input: move to use request_irq by IRQF_NO_AUTOEN flag")
+Link: https://lore.kernel.org/r/20230724053024.352054-3-dmitry.torokhov@gmail.com
+Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/input/keyboard/tca6416-keypad.c | 8 ++------
+ 1 file changed, 2 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/input/keyboard/tca6416-keypad.c b/drivers/input/keyboard/tca6416-keypad.c
+index 01bc0b8811882..d20cbddfae68c 100644
+--- a/drivers/input/keyboard/tca6416-keypad.c
++++ b/drivers/input/keyboard/tca6416-keypad.c
+@@ -292,10 +292,8 @@ static int tca6416_keypad_probe(struct i2c_client *client)
+       return 0;
+ fail2:
+-      if (!chip->use_polling) {
++      if (!chip->use_polling)
+               free_irq(client->irq, chip);
+-              enable_irq(client->irq);
+-      }
+ fail1:
+       input_free_device(input);
+       kfree(chip);
+@@ -306,10 +304,8 @@ static void tca6416_keypad_remove(struct i2c_client *client)
+ {
+       struct tca6416_keypad_chip *chip = i2c_get_clientdata(client);
+-      if (!chip->use_polling) {
++      if (!chip->use_polling)
+               free_irq(client->irq, chip);
+-              enable_irq(client->irq);
+-      }
+       input_unregister_device(chip->input);
+       kfree(chip);
+-- 
+2.40.1
+
diff --git a/queue-6.5/ip_tunnels-use-dev_stats_inc.patch b/queue-6.5/ip_tunnels-use-dev_stats_inc.patch
new file mode 100644 (file)
index 0000000..3bca92f
--- /dev/null
@@ -0,0 +1,128 @@
+From aa1522a2b4e8a61eef61c7d5eb4affa0d0333bc0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 5 Sep 2023 13:40:46 +0000
+Subject: ip_tunnels: use DEV_STATS_INC()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 9b271ebaf9a2c5c566a54bc6cd915962e8241130 ]
+
+syzbot/KCSAN reported data-races in iptunnel_xmit_stats() [1]
+
+This can run from multiple cpus without mutual exclusion.
+
+Adopt SMP safe DEV_STATS_INC() to update dev->stats fields.
+
+[1]
+BUG: KCSAN: data-race in iptunnel_xmit / iptunnel_xmit
+
+read-write to 0xffff8881353df170 of 8 bytes by task 30263 on cpu 1:
+iptunnel_xmit_stats include/net/ip_tunnels.h:493 [inline]
+iptunnel_xmit+0x432/0x4a0 net/ipv4/ip_tunnel_core.c:87
+ip_tunnel_xmit+0x1477/0x1750 net/ipv4/ip_tunnel.c:831
+__gre_xmit net/ipv4/ip_gre.c:469 [inline]
+ipgre_xmit+0x516/0x570 net/ipv4/ip_gre.c:662
+__netdev_start_xmit include/linux/netdevice.h:4889 [inline]
+netdev_start_xmit include/linux/netdevice.h:4903 [inline]
+xmit_one net/core/dev.c:3544 [inline]
+dev_hard_start_xmit+0x11b/0x3f0 net/core/dev.c:3560
+__dev_queue_xmit+0xeee/0x1de0 net/core/dev.c:4340
+dev_queue_xmit include/linux/netdevice.h:3082 [inline]
+__bpf_tx_skb net/core/filter.c:2129 [inline]
+__bpf_redirect_no_mac net/core/filter.c:2159 [inline]
+__bpf_redirect+0x723/0x9c0 net/core/filter.c:2182
+____bpf_clone_redirect net/core/filter.c:2453 [inline]
+bpf_clone_redirect+0x16c/0x1d0 net/core/filter.c:2425
+___bpf_prog_run+0xd7d/0x41e0 kernel/bpf/core.c:1954
+__bpf_prog_run512+0x74/0xa0 kernel/bpf/core.c:2195
+bpf_dispatcher_nop_func include/linux/bpf.h:1181 [inline]
+__bpf_prog_run include/linux/filter.h:609 [inline]
+bpf_prog_run include/linux/filter.h:616 [inline]
+bpf_test_run+0x15d/0x3d0 net/bpf/test_run.c:423
+bpf_prog_test_run_skb+0x77b/0xa00 net/bpf/test_run.c:1045
+bpf_prog_test_run+0x265/0x3d0 kernel/bpf/syscall.c:3996
+__sys_bpf+0x3af/0x780 kernel/bpf/syscall.c:5353
+__do_sys_bpf kernel/bpf/syscall.c:5439 [inline]
+__se_sys_bpf kernel/bpf/syscall.c:5437 [inline]
+__x64_sys_bpf+0x43/0x50 kernel/bpf/syscall.c:5437
+do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
+entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+read-write to 0xffff8881353df170 of 8 bytes by task 30249 on cpu 0:
+iptunnel_xmit_stats include/net/ip_tunnels.h:493 [inline]
+iptunnel_xmit+0x432/0x4a0 net/ipv4/ip_tunnel_core.c:87
+ip_tunnel_xmit+0x1477/0x1750 net/ipv4/ip_tunnel.c:831
+__gre_xmit net/ipv4/ip_gre.c:469 [inline]
+ipgre_xmit+0x516/0x570 net/ipv4/ip_gre.c:662
+__netdev_start_xmit include/linux/netdevice.h:4889 [inline]
+netdev_start_xmit include/linux/netdevice.h:4903 [inline]
+xmit_one net/core/dev.c:3544 [inline]
+dev_hard_start_xmit+0x11b/0x3f0 net/core/dev.c:3560
+__dev_queue_xmit+0xeee/0x1de0 net/core/dev.c:4340
+dev_queue_xmit include/linux/netdevice.h:3082 [inline]
+__bpf_tx_skb net/core/filter.c:2129 [inline]
+__bpf_redirect_no_mac net/core/filter.c:2159 [inline]
+__bpf_redirect+0x723/0x9c0 net/core/filter.c:2182
+____bpf_clone_redirect net/core/filter.c:2453 [inline]
+bpf_clone_redirect+0x16c/0x1d0 net/core/filter.c:2425
+___bpf_prog_run+0xd7d/0x41e0 kernel/bpf/core.c:1954
+__bpf_prog_run512+0x74/0xa0 kernel/bpf/core.c:2195
+bpf_dispatcher_nop_func include/linux/bpf.h:1181 [inline]
+__bpf_prog_run include/linux/filter.h:609 [inline]
+bpf_prog_run include/linux/filter.h:616 [inline]
+bpf_test_run+0x15d/0x3d0 net/bpf/test_run.c:423
+bpf_prog_test_run_skb+0x77b/0xa00 net/bpf/test_run.c:1045
+bpf_prog_test_run+0x265/0x3d0 kernel/bpf/syscall.c:3996
+__sys_bpf+0x3af/0x780 kernel/bpf/syscall.c:5353
+__do_sys_bpf kernel/bpf/syscall.c:5439 [inline]
+__se_sys_bpf kernel/bpf/syscall.c:5437 [inline]
+__x64_sys_bpf+0x43/0x50 kernel/bpf/syscall.c:5437
+do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
+entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+value changed: 0x0000000000018830 -> 0x0000000000018831
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 0 PID: 30249 Comm: syz-executor.4 Not tainted 6.5.0-syzkaller-11704-g3f86ed6ec0b3 #0
+
+Fixes: 039f50629b7f ("ip_tunnel: Move stats update to iptunnel_xmit()")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/ip_tunnels.h | 15 +++++++--------
+ 1 file changed, 7 insertions(+), 8 deletions(-)
+
+diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
+index ed4b6ad3fcac8..cd0e2744f66f3 100644
+--- a/include/net/ip_tunnels.h
++++ b/include/net/ip_tunnels.h
+@@ -482,15 +482,14 @@ static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len)
+               u64_stats_inc(&tstats->tx_packets);
+               u64_stats_update_end(&tstats->syncp);
+               put_cpu_ptr(tstats);
++              return;
++      }
++
++      if (pkt_len < 0) {
++              DEV_STATS_INC(dev, tx_errors);
++              DEV_STATS_INC(dev, tx_aborted_errors);
+       } else {
+-              struct net_device_stats *err_stats = &dev->stats;
+-
+-              if (pkt_len < 0) {
+-                      err_stats->tx_errors++;
+-                      err_stats->tx_aborted_errors++;
+-              } else {
+-                      err_stats->tx_dropped++;
+-              }
++              DEV_STATS_INC(dev, tx_dropped);
+       }
+ }
+-- 
+2.40.1
+
diff --git a/queue-6.5/ipv4-annotate-data-races-around-fi-fib_dead.patch b/queue-6.5/ipv4-annotate-data-races-around-fi-fib_dead.patch
new file mode 100644 (file)
index 0000000..bea5ccf
--- /dev/null
@@ -0,0 +1,136 @@
+From 0e15ad1d5e4b5a1641c7d46cd22fca90bae9f9bc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 30 Aug 2023 09:55:20 +0000
+Subject: ipv4: annotate data-races around fi->fib_dead
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit fce92af1c29d90184dfec638b5738831097d66e9 ]
+
+syzbot complained about a data-race in fib_table_lookup() [1]
+
+Add appropriate annotations to document it.
+
+[1]
+BUG: KCSAN: data-race in fib_release_info / fib_table_lookup
+
+write to 0xffff888150f31744 of 1 bytes by task 1189 on cpu 0:
+fib_release_info+0x3a0/0x460 net/ipv4/fib_semantics.c:281
+fib_table_delete+0x8d2/0x900 net/ipv4/fib_trie.c:1777
+fib_magic+0x1c1/0x1f0 net/ipv4/fib_frontend.c:1106
+fib_del_ifaddr+0x8cf/0xa60 net/ipv4/fib_frontend.c:1317
+fib_inetaddr_event+0x77/0x200 net/ipv4/fib_frontend.c:1448
+notifier_call_chain kernel/notifier.c:93 [inline]
+blocking_notifier_call_chain+0x90/0x200 kernel/notifier.c:388
+__inet_del_ifa+0x4df/0x800 net/ipv4/devinet.c:432
+inet_del_ifa net/ipv4/devinet.c:469 [inline]
+inetdev_destroy net/ipv4/devinet.c:322 [inline]
+inetdev_event+0x553/0xaf0 net/ipv4/devinet.c:1606
+notifier_call_chain kernel/notifier.c:93 [inline]
+raw_notifier_call_chain+0x6b/0x1c0 kernel/notifier.c:461
+call_netdevice_notifiers_info net/core/dev.c:1962 [inline]
+call_netdevice_notifiers_mtu+0xd2/0x130 net/core/dev.c:2037
+dev_set_mtu_ext+0x30b/0x3e0 net/core/dev.c:8673
+do_setlink+0x5be/0x2430 net/core/rtnetlink.c:2837
+rtnl_setlink+0x255/0x300 net/core/rtnetlink.c:3177
+rtnetlink_rcv_msg+0x807/0x8c0 net/core/rtnetlink.c:6445
+netlink_rcv_skb+0x126/0x220 net/netlink/af_netlink.c:2549
+rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:6463
+netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline]
+netlink_unicast+0x56f/0x640 net/netlink/af_netlink.c:1365
+netlink_sendmsg+0x665/0x770 net/netlink/af_netlink.c:1914
+sock_sendmsg_nosec net/socket.c:725 [inline]
+sock_sendmsg net/socket.c:748 [inline]
+sock_write_iter+0x1aa/0x230 net/socket.c:1129
+do_iter_write+0x4b4/0x7b0 fs/read_write.c:860
+vfs_writev+0x1a8/0x320 fs/read_write.c:933
+do_writev+0xf8/0x220 fs/read_write.c:976
+__do_sys_writev fs/read_write.c:1049 [inline]
+__se_sys_writev fs/read_write.c:1046 [inline]
+__x64_sys_writev+0x45/0x50 fs/read_write.c:1046
+do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
+entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+read to 0xffff888150f31744 of 1 bytes by task 21839 on cpu 1:
+fib_table_lookup+0x2bf/0xd50 net/ipv4/fib_trie.c:1585
+fib_lookup include/net/ip_fib.h:383 [inline]
+ip_route_output_key_hash_rcu+0x38c/0x12c0 net/ipv4/route.c:2751
+ip_route_output_key_hash net/ipv4/route.c:2641 [inline]
+__ip_route_output_key include/net/route.h:134 [inline]
+ip_route_output_flow+0xa6/0x150 net/ipv4/route.c:2869
+send4+0x1e7/0x500 drivers/net/wireguard/socket.c:61
+wg_socket_send_skb_to_peer+0x94/0x130 drivers/net/wireguard/socket.c:175
+wg_socket_send_buffer_to_peer+0xd6/0x100 drivers/net/wireguard/socket.c:200
+wg_packet_send_handshake_initiation drivers/net/wireguard/send.c:40 [inline]
+wg_packet_handshake_send_worker+0x10c/0x150 drivers/net/wireguard/send.c:51
+process_one_work+0x434/0x860 kernel/workqueue.c:2600
+worker_thread+0x5f2/0xa10 kernel/workqueue.c:2751
+kthread+0x1d7/0x210 kernel/kthread.c:389
+ret_from_fork+0x2e/0x40 arch/x86/kernel/process.c:145
+ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304
+
+value changed: 0x00 -> 0x01
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 1 PID: 21839 Comm: kworker/u4:18 Tainted: G W 6.5.0-syzkaller #0
+
+Fixes: dccd9ecc3744 ("ipv4: Do not use dead fib_info entries.")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://lore.kernel.org/r/20230830095520.1046984-1-edumazet@google.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/fib_semantics.c | 5 ++++-
+ net/ipv4/fib_trie.c      | 3 ++-
+ 2 files changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
+index 65ba18a91865a..eafa4a0335157 100644
+--- a/net/ipv4/fib_semantics.c
++++ b/net/ipv4/fib_semantics.c
+@@ -278,7 +278,8 @@ void fib_release_info(struct fib_info *fi)
+                               hlist_del(&nexthop_nh->nh_hash);
+                       } endfor_nexthops(fi)
+               }
+-              fi->fib_dead = 1;
++              /* Paired with READ_ONCE() from fib_table_lookup() */
++              WRITE_ONCE(fi->fib_dead, 1);
+               fib_info_put(fi);
+       }
+       spin_unlock_bh(&fib_info_lock);
+@@ -1581,6 +1582,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
+ link_it:
+       ofi = fib_find_info(fi);
+       if (ofi) {
++              /* fib_table_lookup() should not see @fi yet. */
+               fi->fib_dead = 1;
+               free_fib_info(fi);
+               refcount_inc(&ofi->fib_treeref);
+@@ -1619,6 +1621,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
+ failure:
+       if (fi) {
++              /* fib_table_lookup() should not see @fi yet. */
+               fi->fib_dead = 1;
+               free_fib_info(fi);
+       }
+diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
+index 74d403dbd2b4e..d13fb9e76b971 100644
+--- a/net/ipv4/fib_trie.c
++++ b/net/ipv4/fib_trie.c
+@@ -1582,7 +1582,8 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
+               if (fa->fa_dscp &&
+                   inet_dscp_to_dsfield(fa->fa_dscp) != flp->flowi4_tos)
+                       continue;
+-              if (fi->fib_dead)
++              /* Paired with WRITE_ONCE() in fib_release_info() */
++              if (READ_ONCE(fi->fib_dead))
+                       continue;
+               if (fa->fa_info->fib_scope < flp->flowi4_scope)
+                       continue;
+-- 
+2.40.1
+
diff --git a/queue-6.5/ipv4-ignore-dst-hint-for-multipath-routes.patch b/queue-6.5/ipv4-ignore-dst-hint-for-multipath-routes.patch
new file mode 100644 (file)
index 0000000..45ce312
--- /dev/null
@@ -0,0 +1,71 @@
+From 972a056acf572e614638bc0f6774fd8f7ffbbc2b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 31 Aug 2023 10:03:30 +0200
+Subject: ipv4: ignore dst hint for multipath routes
+
+From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
+
+[ Upstream commit 6ac66cb03ae306c2e288a9be18226310529f5b25 ]
+
+Route hints when the nexthop is part of a multipath group causes packets
+in the same receive batch to be sent to the same nexthop irrespective of
+the multipath hash of the packet. So, do not extract route hint for
+packets whose destination is part of a multipath group.
+
+A new SKB flag IPSKB_MULTIPATH is introduced for this purpose, set the
+flag when route is looked up in ip_mkroute_input() and use it in
+ip_extract_route_hint() to check for the existence of the flag.
+
+Fixes: 02b24941619f ("ipv4: use dst hint for ipv4 list receive")
+Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
+Reviewed-by: Ido Schimmel <idosch@nvidia.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/ip.h    | 1 +
+ net/ipv4/ip_input.c | 3 ++-
+ net/ipv4/route.c    | 1 +
+ 3 files changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/include/net/ip.h b/include/net/ip.h
+index 9276cea775cc2..3489a1cca5e7b 100644
+--- a/include/net/ip.h
++++ b/include/net/ip.h
+@@ -57,6 +57,7 @@ struct inet_skb_parm {
+ #define IPSKB_FRAG_PMTU               BIT(6)
+ #define IPSKB_L3SLAVE         BIT(7)
+ #define IPSKB_NOPOLICY                BIT(8)
++#define IPSKB_MULTIPATH               BIT(9)
+       u16                     frag_max_size;
+ };
+diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
+index fe9ead9ee863d..5e9c8156656a7 100644
+--- a/net/ipv4/ip_input.c
++++ b/net/ipv4/ip_input.c
+@@ -584,7 +584,8 @@ static void ip_sublist_rcv_finish(struct list_head *head)
+ static struct sk_buff *ip_extract_route_hint(const struct net *net,
+                                            struct sk_buff *skb, int rt_type)
+ {
+-      if (fib4_has_custom_rules(net) || rt_type == RTN_BROADCAST)
++      if (fib4_has_custom_rules(net) || rt_type == RTN_BROADCAST ||
++          IPCB(skb)->flags & IPSKB_MULTIPATH)
+               return NULL;
+       return skb;
+diff --git a/net/ipv4/route.c b/net/ipv4/route.c
+index 92fede388d520..33626619aee79 100644
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -2144,6 +2144,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
+               int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
+               fib_select_multipath(res, h);
++              IPCB(skb)->flags |= IPSKB_MULTIPATH;
+       }
+ #endif
+-- 
+2.40.1
+
diff --git a/queue-6.5/ipv6-ignore-dst-hint-for-multipath-routes.patch b/queue-6.5/ipv6-ignore-dst-hint-for-multipath-routes.patch
new file mode 100644 (file)
index 0000000..97a53a8
--- /dev/null
@@ -0,0 +1,73 @@
+From 3642992a47f41a5b95b3e948f0d5847ee6d0bcc9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 31 Aug 2023 10:03:31 +0200
+Subject: ipv6: ignore dst hint for multipath routes
+
+From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
+
+[ Upstream commit 8423be8926aa82cd2e28bba5cc96ccb72c7ce6be ]
+
+Route hints when the nexthop is part of a multipath group causes packets
+in the same receive batch to be sent to the same nexthop irrespective of
+the multipath hash of the packet. So, do not extract route hint for
+packets whose destination is part of a multipath group.
+
+A new SKB flag IP6SKB_MULTIPATH is introduced for this purpose, set the
+flag when route is looked up in fib6_select_path() and use it in
+ip6_can_use_hint() to check for the existence of the flag.
+
+Fixes: 197dbf24e360 ("ipv6: introduce and uses route look hints for list input.")
+Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
+Reviewed-by: Ido Schimmel <idosch@nvidia.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/ipv6.h | 1 +
+ net/ipv6/ip6_input.c | 3 ++-
+ net/ipv6/route.c     | 3 +++
+ 3 files changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
+index 660012997f54c..644e69354cba6 100644
+--- a/include/linux/ipv6.h
++++ b/include/linux/ipv6.h
+@@ -146,6 +146,7 @@ struct inet6_skb_parm {
+ #define IP6SKB_JUMBOGRAM      128
+ #define IP6SKB_SEG6         256
+ #define IP6SKB_FAKEJUMBO      512
++#define IP6SKB_MULTIPATH      1024
+ };
+ #if defined(CONFIG_NET_L3_MASTER_DEV)
+diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
+index d94041bb42872..b8378814532ce 100644
+--- a/net/ipv6/ip6_input.c
++++ b/net/ipv6/ip6_input.c
+@@ -99,7 +99,8 @@ static bool ip6_can_use_hint(const struct sk_buff *skb,
+ static struct sk_buff *ip6_extract_route_hint(const struct net *net,
+                                             struct sk_buff *skb)
+ {
+-      if (fib6_routes_require_src(net) || fib6_has_custom_rules(net))
++      if (fib6_routes_require_src(net) || fib6_has_custom_rules(net) ||
++          IP6CB(skb)->flags & IP6SKB_MULTIPATH)
+               return NULL;
+       return skb;
+diff --git a/net/ipv6/route.c b/net/ipv6/route.c
+index 56a55585eb798..a02328c93a537 100644
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -424,6 +424,9 @@ void fib6_select_path(const struct net *net, struct fib6_result *res,
+       if (match->nh && have_oif_match && res->nh)
+               return;
++      if (skb)
++              IP6CB(skb)->flags |= IP6SKB_MULTIPATH;
++
+       /* We might have already computed the hash for ICMPv6 errors. In such
+        * case it will always be non-zero. Otherwise now is the time to do it.
+        */
+-- 
+2.40.1
+
diff --git a/queue-6.5/kbuild-do-not-run-depmod-for-make-modules_sign.patch b/queue-6.5/kbuild-do-not-run-depmod-for-make-modules_sign.patch
new file mode 100644 (file)
index 0000000..769bc40
--- /dev/null
@@ -0,0 +1,41 @@
+From 1d74d20478ab7ade500f572431ac510c8ae90f57 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 23 Aug 2023 20:50:41 +0900
+Subject: kbuild: do not run depmod for 'make modules_sign'
+
+From: Masahiro Yamada <masahiroy@kernel.org>
+
+[ Upstream commit 2429742e506a2b5939a62c629c4a46d91df0ada8 ]
+
+Commit 961ab4a3cd66 ("kbuild: merge scripts/Makefile.modsign to
+scripts/Makefile.modinst") started to run depmod at the end of
+'make modules_sign'.
+
+Move the depmod rule to scripts/Makefile.modinst and run it only when
+$(modules_sign_only) is empty.
+
+Fixes: 961ab4a3cd66 ("kbuild: merge scripts/Makefile.modsign to scripts/Makefile.modinst")
+Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
+Reviewed-by: Nicolas Schier <nicolas@fjasle.eu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Makefile | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/Makefile b/Makefile
+index 901cdfa5e7d3b..a5178b9863fb2 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1962,7 +1962,9 @@ quiet_cmd_depmod = DEPMOD  $(MODLIB)
+ modules_install:
+       $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modinst
++ifndef modules_sign_only
+       $(call cmd,depmod)
++endif
+ else # CONFIG_MODULES
+-- 
+2.40.1
+
diff --git a/queue-6.5/kbuild-dummy-tools-make-mprofile_kernel-checks-work-.patch b/queue-6.5/kbuild-dummy-tools-make-mprofile_kernel-checks-work-.patch
new file mode 100644 (file)
index 0000000..93ac164
--- /dev/null
@@ -0,0 +1,44 @@
+From 3b3ef972b5bdb1cbb316047de1a3acb42a6432c7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 29 Aug 2023 12:51:06 +0200
+Subject: kbuild: dummy-tools: make MPROFILE_KERNEL checks work on BE
+
+From: Jiri Slaby <jslaby@suse.cz>
+
+[ Upstream commit bfb41e46d0b040ae83c1c4a50292298208b10f73 ]
+
+Commit 2eab791f940b ("kbuild: dummy-tools: support MPROFILE_KERNEL
+checks for ppc") added support for ppc64le's checks for
+-mprofile-kernel.
+
+Now, commit aec0ba7472a7 ("powerpc/64: Use -mprofile-kernel for big
+endian ELFv2 kernels") added support for -mprofile-kernel even on
+big-endian ppc.
+
+So lift the check in gcc-check-mprofile-kernel.sh to support big-endian too.
+
+Fixes: aec0ba7472a7 ("powerpc/64: Use -mprofile-kernel for big endian ELFv2 kernels")
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ scripts/dummy-tools/gcc | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/scripts/dummy-tools/gcc b/scripts/dummy-tools/gcc
+index 1db1889f6d81e..07f6dc4c5cf69 100755
+--- a/scripts/dummy-tools/gcc
++++ b/scripts/dummy-tools/gcc
+@@ -85,8 +85,7 @@ if arg_contain -S "$@"; then
+       fi
+       # For arch/powerpc/tools/gcc-check-mprofile-kernel.sh
+-      if arg_contain -m64 "$@" && arg_contain -mlittle-endian "$@" &&
+-              arg_contain -mprofile-kernel "$@"; then
++      if arg_contain -m64 "$@" && arg_contain -mprofile-kernel "$@"; then
+               if ! test -t 0 && ! grep -q notrace; then
+                       echo "_mcount"
+               fi
+-- 
+2.40.1
+
diff --git a/queue-6.5/kbuild-rpm-pkg-define-_arch-conditionally.patch b/queue-6.5/kbuild-rpm-pkg-define-_arch-conditionally.patch
new file mode 100644 (file)
index 0000000..aaa59c1
--- /dev/null
@@ -0,0 +1,39 @@
+From 5ee3773cfe9f44a684670a26169868fc4e43bac2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 22 Jul 2023 13:47:48 +0900
+Subject: kbuild: rpm-pkg: define _arch conditionally
+
+From: Masahiro Yamada <masahiroy@kernel.org>
+
+[ Upstream commit 233046a2afd12a4f699305b92ee634eebf1e4f31 ]
+
+Commit 3089b2be0cce ("kbuild: rpm-pkg: fix build error when _arch is
+undefined") does not work as intended; _arch is always defined as
+$UTS_MACHINE.
+
+The intention was to define _arch to $UTS_MACHINE only when it is not
+defined.
+
+Fixes: 3089b2be0cce ("kbuild: rpm-pkg: fix build error when _arch is undefined")
+Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ scripts/package/mkspec | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/scripts/package/mkspec b/scripts/package/mkspec
+index 8049f0e2c110f..c9299f9c1f3e4 100755
+--- a/scripts/package/mkspec
++++ b/scripts/package/mkspec
+@@ -57,7 +57,7 @@ $S   BuildRequires: gcc make openssl openssl-devel perl python3 rsync
+       # $UTS_MACHINE as a fallback of _arch in case
+       # /usr/lib/rpm/platform/*/macros was not included.
+-      %define _arch %{?_arch:$UTS_MACHINE}
++      %{!?_arch: %define _arch $UTS_MACHINE}
+       %define __spec_install_post /usr/lib/rpm/brp-compress || :
+       %define debug_package %{nil}
+-- 
+2.40.1
+
diff --git a/queue-6.5/kcm-destroy-mutex-in-kcm_exit_net.patch b/queue-6.5/kcm-destroy-mutex-in-kcm_exit_net.patch
new file mode 100644 (file)
index 0000000..0818ff0
--- /dev/null
@@ -0,0 +1,37 @@
+From de3d0b42499a42d86d813796afc4edc3aa6096af Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 3 Sep 2023 02:07:08 +0900
+Subject: kcm: Destroy mutex in kcm_exit_net()
+
+From: Shigeru Yoshida <syoshida@redhat.com>
+
+[ Upstream commit 6ad40b36cd3b04209e2d6c89d252c873d8082a59 ]
+
+kcm_exit_net() should call mutex_destroy() on knet->mutex. This is especially
+needed if CONFIG_DEBUG_MUTEXES is enabled.
+
+Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module")
+Signed-off-by: Shigeru Yoshida <syoshida@redhat.com>
+Link: https://lore.kernel.org/r/20230902170708.1727999-1-syoshida@redhat.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/kcm/kcmsock.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
+index 393f01b2a7e6d..4580f61426bb8 100644
+--- a/net/kcm/kcmsock.c
++++ b/net/kcm/kcmsock.c
+@@ -1859,6 +1859,8 @@ static __net_exit void kcm_exit_net(struct net *net)
+        * that all multiplexors and psocks have been destroyed.
+        */
+       WARN_ON(!list_empty(&knet->mux_list));
++
++      mutex_destroy(&knet->mutex);
+ }
+ static struct pernet_operations kcm_net_ops = {
+-- 
+2.40.1
+
diff --git a/queue-6.5/kconfig-fix-possible-buffer-overflow.patch b/queue-6.5/kconfig-fix-possible-buffer-overflow.patch
new file mode 100644 (file)
index 0000000..4c0ca8b
--- /dev/null
@@ -0,0 +1,38 @@
+From 635bcaf5e476853131c2bfa09c18245252eb63ae Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 5 Sep 2023 17:59:14 +0800
+Subject: kconfig: fix possible buffer overflow
+
+From: Konstantin Meskhidze <konstantin.meskhidze@huawei.com>
+
+[ Upstream commit a3b7039bb2b22fcd2ad20d59c00ed4e606ce3754 ]
+
+Buffer 'new_argv' is accessed without bound check after accessing with
+bound check via 'new_argc' index.
+
+Fixes: e298f3b49def ("kconfig: add built-in function support")
+Co-developed-by: Ivanov Mikhail <ivanov.mikhail1@huawei-partners.com>
+Signed-off-by: Konstantin Meskhidze <konstantin.meskhidze@huawei.com>
+Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ scripts/kconfig/preprocess.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/scripts/kconfig/preprocess.c b/scripts/kconfig/preprocess.c
+index 748da578b418c..d1f5bcff4b62d 100644
+--- a/scripts/kconfig/preprocess.c
++++ b/scripts/kconfig/preprocess.c
+@@ -396,6 +396,9 @@ static char *eval_clause(const char *str, size_t len, int argc, char *argv[])
+               p++;
+       }
++
++      if (new_argc >= FUNCTION_MAX_ARGS)
++              pperror("too many function arguments");
+       new_argv[new_argc++] = prev;
+       /*
+-- 
+2.40.1
+
diff --git a/queue-6.5/kvm-svm-correct-the-size-of-spec_ctrl-field-in-vmcb-.patch b/queue-6.5/kvm-svm-correct-the-size-of-spec_ctrl-field-in-vmcb-.patch
new file mode 100644 (file)
index 0000000..32bb257
--- /dev/null
@@ -0,0 +1,60 @@
+From 578a5cd9dc2ccde736f91d2def6e3d808e414838 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Jul 2023 04:19:03 +0000
+Subject: KVM: SVM: correct the size of spec_ctrl field in VMCB save area
+
+From: Manali Shukla <manali.shukla@amd.com>
+
+[ Upstream commit f67063414c0e83bb4a9e12358cc179af53c2a8bb ]
+
+Correct the spec_ctrl field in the VMCB save area based on the AMD
+Programmer's manual.
+
+Originally, the spec_ctrl was listed as u32 with 4 bytes of reserved
+area.  The AMD Programmer's Manual now lists the spec_ctrl as 8 bytes
+in VMCB save area.
+
+The Public Processor Programming reference for Genoa, shows SPEC_CTRL
+as 64b register, but the AMD Programmer's Manual lists SPEC_CTRL as
+32b register. This discrepancy will be cleaned up in next revision of
+the AMD Programmer's Manual.
+
+Since remaining bits above bit 7 are reserved bits in SPEC_CTRL MSR
+and thus, not being used, the spec_ctrl added as u32 in the VMCB save
+area is currently not an issue.
+
+Fixes: 3dd2775b74c9 ("KVM: SVM: Create a separate mapping for the SEV-ES save area")
+Suggested-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Manali Shukla <manali.shukla@amd.com>
+Link: https://lore.kernel.org/r/20230717041903.85480-1-manali.shukla@amd.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/include/asm/svm.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
+index e7c7379d6ac7b..dee9fa91120bc 100644
+--- a/arch/x86/include/asm/svm.h
++++ b/arch/x86/include/asm/svm.h
+@@ -345,7 +345,7 @@ struct vmcb_save_area {
+       u64 last_excp_from;
+       u64 last_excp_to;
+       u8 reserved_0x298[72];
+-      u32 spec_ctrl;          /* Guest version of SPEC_CTRL at 0x2E0 */
++      u64 spec_ctrl;          /* Guest version of SPEC_CTRL at 0x2E0 */
+ } __packed;
+ /* Save area definition for SEV-ES and SEV-SNP guests */
+@@ -512,7 +512,7 @@ struct ghcb {
+ } __packed;
+-#define EXPECTED_VMCB_SAVE_AREA_SIZE          740
++#define EXPECTED_VMCB_SAVE_AREA_SIZE          744
+ #define EXPECTED_GHCB_SAVE_AREA_SIZE          1032
+ #define EXPECTED_SEV_ES_SAVE_AREA_SIZE                1648
+ #define EXPECTED_VMCB_CONTROL_AREA_SIZE               1024
+-- 
+2.40.1
+
diff --git a/queue-6.5/kvm-svm-don-t-defer-nmi-unblocking-until-next-exit-f.patch b/queue-6.5/kvm-svm-don-t-defer-nmi-unblocking-until-next-exit-f.patch
new file mode 100644 (file)
index 0000000..b6f41f7
--- /dev/null
@@ -0,0 +1,86 @@
+From 2e811765550d5ff39fa876ca7ad0309fd92e9d41 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 15 Jun 2023 16:37:56 +1000
+Subject: KVM: SVM: Don't defer NMI unblocking until next exit for SEV-ES
+ guests
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit 389fbbec261b2842fd0e34b26a2b288b122cc406 ]
+
+Immediately mark NMIs as unmasked in response to #VMGEXIT(NMI complete)
+instead of setting awaiting_iret_completion and waiting until the *next*
+VM-Exit to unmask NMIs.  The whole point of "NMI complete" is that the
+guest is responsible for telling the hypervisor when it's safe to inject
+an NMI, i.e. there's no need to wait.  And because there's no IRET to
+single-step, the next VM-Exit could be a long time coming, i.e. KVM could
+incorrectly hold an NMI pending for far longer than what is required and
+expected.
+
+Opportunistically fix a stale reference to HF_IRET_MASK.
+
+Fixes: 916b54a7688b ("KVM: x86: Move HF_NMI_MASK and HF_IRET_MASK into "struct vcpu_svm"")
+Fixes: 4444dfe4050b ("KVM: SVM: Add NMI support for an SEV-ES guest")
+Cc: Tom Lendacky <thomas.lendacky@amd.com>
+Link: https://lore.kernel.org/r/20230615063757.3039121-9-aik@amd.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/svm/sev.c |  5 ++++-
+ arch/x86/kvm/svm/svm.c | 10 +++++-----
+ 2 files changed, 9 insertions(+), 6 deletions(-)
+
+diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
+index d3aec1f2cad20..42630f5b11875 100644
+--- a/arch/x86/kvm/svm/sev.c
++++ b/arch/x86/kvm/svm/sev.c
+@@ -2881,7 +2881,10 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
+                                           svm->sev_es.ghcb_sa);
+               break;
+       case SVM_VMGEXIT_NMI_COMPLETE:
+-              ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_IRET);
++              ++vcpu->stat.nmi_window_exits;
++              svm->nmi_masked = false;
++              kvm_make_request(KVM_REQ_EVENT, vcpu);
++              ret = 1;
+               break;
+       case SVM_VMGEXIT_AP_HLT_LOOP:
+               ret = kvm_emulate_ap_reset_hold(vcpu);
+diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
+index d4bfdc607fe7f..dfb8a3f504322 100644
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -2510,12 +2510,13 @@ static int iret_interception(struct kvm_vcpu *vcpu)
+ {
+       struct vcpu_svm *svm = to_svm(vcpu);
++      WARN_ON_ONCE(sev_es_guest(vcpu->kvm));
++
+       ++vcpu->stat.nmi_window_exits;
+       svm->awaiting_iret_completion = true;
+       svm_clr_iret_intercept(svm);
+-      if (!sev_es_guest(vcpu->kvm))
+-              svm->nmi_iret_rip = kvm_rip_read(vcpu);
++      svm->nmi_iret_rip = kvm_rip_read(vcpu);
+       kvm_make_request(KVM_REQ_EVENT, vcpu);
+       return 1;
+@@ -3918,12 +3919,11 @@ static void svm_complete_interrupts(struct kvm_vcpu *vcpu)
+       svm->soft_int_injected = false;
+       /*
+-       * If we've made progress since setting HF_IRET_MASK, we've
++       * If we've made progress since setting awaiting_iret_completion, we've
+        * executed an IRET and can allow NMI injection.
+        */
+       if (svm->awaiting_iret_completion &&
+-          (sev_es_guest(vcpu->kvm) ||
+-           kvm_rip_read(vcpu) != svm->nmi_iret_rip)) {
++          kvm_rip_read(vcpu) != svm->nmi_iret_rip) {
+               svm->awaiting_iret_completion = false;
+               svm->nmi_masked = false;
+               kvm_make_request(KVM_REQ_EVENT, vcpu);
+-- 
+2.40.1
+
diff --git a/queue-6.5/mailbox-qcom-ipcc-fix-incorrect-num_chans-counting.patch b/queue-6.5/mailbox-qcom-ipcc-fix-incorrect-num_chans-counting.patch
new file mode 100644 (file)
index 0000000..78cee5a
--- /dev/null
@@ -0,0 +1,39 @@
+From 9cd12decf69e83d0ca7bc4197c7994874677efd5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Aug 2023 09:52:22 -0400
+Subject: mailbox: qcom-ipcc: fix incorrect num_chans counting
+
+From: Jonathan Marek <jonathan@marek.ca>
+
+[ Upstream commit a493208079e299aefdc15169dc80e3da3ebb718a ]
+
+Breaking out early when a match is found leads to an incorrect num_chans
+value when more than one ipcc mailbox channel is used by the same device.
+
+Fixes: e9d50e4b4d04 ("mailbox: qcom-ipcc: Dynamic alloc for channel arrangement")
+Signed-off-by: Jonathan Marek <jonathan@marek.ca>
+Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/mailbox/qcom-ipcc.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+diff --git a/drivers/mailbox/qcom-ipcc.c b/drivers/mailbox/qcom-ipcc.c
+index 7e27acf6c0cca..f597a1bd56847 100644
+--- a/drivers/mailbox/qcom-ipcc.c
++++ b/drivers/mailbox/qcom-ipcc.c
+@@ -227,10 +227,8 @@ static int qcom_ipcc_setup_mbox(struct qcom_ipcc *ipcc,
+                       ret = of_parse_phandle_with_args(client_dn, "mboxes",
+                                               "#mbox-cells", j, &curr_ph);
+                       of_node_put(curr_ph.np);
+-                      if (!ret && curr_ph.np == controller_dn) {
++                      if (!ret && curr_ph.np == controller_dn)
+                               ipcc->num_chans++;
+-                              break;
+-                      }
+               }
+       }
+-- 
+2.40.1
+
diff --git a/queue-6.5/mlx5-core-e-switch-create-acl-ft-for-eswitch-manager.patch b/queue-6.5/mlx5-core-e-switch-create-acl-ft-for-eswitch-manager.patch
new file mode 100644 (file)
index 0000000..2491c2d
--- /dev/null
@@ -0,0 +1,178 @@
+From e3fbc0e0967d35fcdcb52e1367d95b97dcc117aa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 5 Sep 2023 10:48:46 -0700
+Subject: mlx5/core: E-Switch, Create ACL FT for eswitch manager in switchdev
+ mode
+
+From: Bodong Wang <bodong@nvidia.com>
+
+[ Upstream commit 344134609a564f28b3cc81ca6650319ccd5d8961 ]
+
+ACL flow table is required in switchdev mode when metadata is enabled,
+driver creates such table when loading each vport. However, not every
+vport is loaded in switchdev mode. Such as ECPF if it's the eswitch manager.
+In this case, ACL flow table is still needed.
+
+To make it modularized, create ACL flow table for eswitch manager as
+default and skip such operations when loading manager vport.
+
+Also, there is no need to load the eswitch manager vport in switchdev mode.
+This means there is no need to load it on regular connect-x HCAs where
+the PF is the eswitch manager. This will avoid creating duplicate ACL
+flow table for host PF vport.
+
+Fixes: 29bcb6e4fe70 ("net/mlx5e: E-Switch, Use metadata for vport matching in send-to-vport rules")
+Fixes: eb8e9fae0a22 ("mlx5/core: E-Switch, Allocate ECPF vport if it's an eswitch manager")
+Fixes: 5019833d661f ("net/mlx5: E-switch, Introduce helper function to enable/disable vports")
+Signed-off-by: Bodong Wang <bodong@nvidia.com>
+Reviewed-by: Mark Bloch <mbloch@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/mellanox/mlx5/core/eswitch.c | 21 ++++++--
+ .../mellanox/mlx5/core/eswitch_offloads.c     | 49 +++++++++++++------
+ 2 files changed, 51 insertions(+), 19 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+index 591184d892af6..6e9b1b183190d 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+@@ -1212,12 +1212,19 @@ int
+ mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw,
+                                enum mlx5_eswitch_vport_event enabled_events)
+ {
++      bool pf_needed;
+       int ret;
++      pf_needed = mlx5_core_is_ecpf_esw_manager(esw->dev) ||
++                  esw->mode == MLX5_ESWITCH_LEGACY;
++
+       /* Enable PF vport */
+-      ret = mlx5_eswitch_load_pf_vf_vport(esw, MLX5_VPORT_PF, enabled_events);
+-      if (ret)
+-              return ret;
++      if (pf_needed) {
++              ret = mlx5_eswitch_load_pf_vf_vport(esw, MLX5_VPORT_PF,
++                                                  enabled_events);
++              if (ret)
++                      return ret;
++      }
+       /* Enable external host PF HCA */
+       ret = host_pf_enable_hca(esw->dev);
+@@ -1253,7 +1260,8 @@ mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw,
+ ecpf_err:
+       host_pf_disable_hca(esw->dev);
+ pf_hca_err:
+-      mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF);
++      if (pf_needed)
++              mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF);
+       return ret;
+ }
+@@ -1271,7 +1279,10 @@ void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw)
+       }
+       host_pf_disable_hca(esw->dev);
+-      mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF);
++
++      if (mlx5_core_is_ecpf_esw_manager(esw->dev) ||
++          esw->mode == MLX5_ESWITCH_LEGACY)
++              mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF);
+ }
+ static void mlx5_eswitch_get_devlink_param(struct mlx5_eswitch *esw)
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+index 2f8bab73643e2..1ad5a72dcc3fd 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+@@ -3092,26 +3092,47 @@ esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw,
+       esw_acl_ingress_ofld_cleanup(esw, vport);
+ }
+-static int esw_create_uplink_offloads_acl_tables(struct mlx5_eswitch *esw)
++static int esw_create_offloads_acl_tables(struct mlx5_eswitch *esw)
+ {
+-      struct mlx5_vport *vport;
++      struct mlx5_vport *uplink, *manager;
++      int ret;
+-      vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
+-      if (IS_ERR(vport))
+-              return PTR_ERR(vport);
++      uplink = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
++      if (IS_ERR(uplink))
++              return PTR_ERR(uplink);
++
++      ret = esw_vport_create_offloads_acl_tables(esw, uplink);
++      if (ret)
++              return ret;
++
++      manager = mlx5_eswitch_get_vport(esw, esw->manager_vport);
++      if (IS_ERR(manager)) {
++              ret = PTR_ERR(manager);
++              goto err_manager;
++      }
+-      return esw_vport_create_offloads_acl_tables(esw, vport);
++      ret = esw_vport_create_offloads_acl_tables(esw, manager);
++      if (ret)
++              goto err_manager;
++
++      return 0;
++
++err_manager:
++      esw_vport_destroy_offloads_acl_tables(esw, uplink);
++      return ret;
+ }
+-static void esw_destroy_uplink_offloads_acl_tables(struct mlx5_eswitch *esw)
++static void esw_destroy_offloads_acl_tables(struct mlx5_eswitch *esw)
+ {
+       struct mlx5_vport *vport;
+-      vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
+-      if (IS_ERR(vport))
+-              return;
++      vport = mlx5_eswitch_get_vport(esw, esw->manager_vport);
++      if (!IS_ERR(vport))
++              esw_vport_destroy_offloads_acl_tables(esw, vport);
+-      esw_vport_destroy_offloads_acl_tables(esw, vport);
++      vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
++      if (!IS_ERR(vport))
++              esw_vport_destroy_offloads_acl_tables(esw, vport);
+ }
+ int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw)
+@@ -3156,7 +3177,7 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw)
+       }
+       esw->fdb_table.offloads.indir = indir;
+-      err = esw_create_uplink_offloads_acl_tables(esw);
++      err = esw_create_offloads_acl_tables(esw);
+       if (err)
+               goto create_acl_err;
+@@ -3197,7 +3218,7 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw)
+ create_restore_err:
+       esw_destroy_offloads_table(esw);
+ create_offloads_err:
+-      esw_destroy_uplink_offloads_acl_tables(esw);
++      esw_destroy_offloads_acl_tables(esw);
+ create_acl_err:
+       mlx5_esw_indir_table_destroy(esw->fdb_table.offloads.indir);
+ create_indir_err:
+@@ -3213,7 +3234,7 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw)
+       esw_destroy_offloads_fdb_tables(esw);
+       esw_destroy_restore_table(esw);
+       esw_destroy_offloads_table(esw);
+-      esw_destroy_uplink_offloads_acl_tables(esw);
++      esw_destroy_offloads_acl_tables(esw);
+       mlx5_esw_indir_table_destroy(esw->fdb_table.offloads.indir);
+       mutex_destroy(&esw->fdb_table.offloads.vports.lock);
+ }
+-- 
+2.40.1
+
diff --git a/queue-6.5/mptcp-annotate-data-races-around-msk-rmem_fwd_alloc.patch b/queue-6.5/mptcp-annotate-data-races-around-msk-rmem_fwd_alloc.patch
new file mode 100644 (file)
index 0000000..954646f
--- /dev/null
@@ -0,0 +1,93 @@
+From 4b4296591bbb909409efad05306c1e7e54d606ec Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 31 Aug 2023 13:52:10 +0000
+Subject: mptcp: annotate data-races around msk->rmem_fwd_alloc
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 9531e4a83febc3fb47ac77e24cfb5ea97e50034d ]
+
+msk->rmem_fwd_alloc can be read locklessly.
+
+Add mptcp_rmem_fwd_alloc_add(), similar to sk_forward_alloc_add(),
+and appropriate READ_ONCE()/WRITE_ONCE() annotations.
+
+Fixes: 6511882cdd82 ("mptcp: allocate fwd memory separately on the rx and tx path")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/protocol.c | 19 +++++++++++++------
+ 1 file changed, 13 insertions(+), 6 deletions(-)
+
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index 996e031dff78a..40258d9f8c799 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -136,9 +136,15 @@ static void mptcp_drop(struct sock *sk, struct sk_buff *skb)
+       __kfree_skb(skb);
+ }
++static void mptcp_rmem_fwd_alloc_add(struct sock *sk, int size)
++{
++      WRITE_ONCE(mptcp_sk(sk)->rmem_fwd_alloc,
++                 mptcp_sk(sk)->rmem_fwd_alloc + size);
++}
++
+ static void mptcp_rmem_charge(struct sock *sk, int size)
+ {
+-      mptcp_sk(sk)->rmem_fwd_alloc -= size;
++      mptcp_rmem_fwd_alloc_add(sk, -size);
+ }
+ static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to,
+@@ -179,7 +185,7 @@ static bool mptcp_ooo_try_coalesce(struct mptcp_sock *msk, struct sk_buff *to,
+ static void __mptcp_rmem_reclaim(struct sock *sk, int amount)
+ {
+       amount >>= PAGE_SHIFT;
+-      mptcp_sk(sk)->rmem_fwd_alloc -= amount << PAGE_SHIFT;
++      mptcp_rmem_charge(sk, amount << PAGE_SHIFT);
+       __sk_mem_reduce_allocated(sk, amount);
+ }
+@@ -188,7 +194,7 @@ static void mptcp_rmem_uncharge(struct sock *sk, int size)
+       struct mptcp_sock *msk = mptcp_sk(sk);
+       int reclaimable;
+-      msk->rmem_fwd_alloc += size;
++      mptcp_rmem_fwd_alloc_add(sk, size);
+       reclaimable = msk->rmem_fwd_alloc - sk_unused_reserved_mem(sk);
+       /* see sk_mem_uncharge() for the rationale behind the following schema */
+@@ -343,7 +349,7 @@ static bool mptcp_rmem_schedule(struct sock *sk, struct sock *ssk, int size)
+       if (!__sk_mem_raise_allocated(sk, size, amt, SK_MEM_RECV))
+               return false;
+-      msk->rmem_fwd_alloc += amount;
++      mptcp_rmem_fwd_alloc_add(sk, amount);
+       return true;
+ }
+@@ -3243,7 +3249,7 @@ void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags)
+        * inet_sock_destruct() will dispose it
+        */
+       sk_forward_alloc_add(sk, msk->rmem_fwd_alloc);
+-      msk->rmem_fwd_alloc = 0;
++      WRITE_ONCE(msk->rmem_fwd_alloc, 0);
+       mptcp_token_destroy(msk);
+       mptcp_pm_free_anno_list(msk);
+       mptcp_free_local_addr_list(msk);
+@@ -3513,7 +3519,8 @@ static void mptcp_shutdown(struct sock *sk, int how)
+ static int mptcp_forward_alloc_get(const struct sock *sk)
+ {
+-      return READ_ONCE(sk->sk_forward_alloc) + mptcp_sk(sk)->rmem_fwd_alloc;
++      return READ_ONCE(sk->sk_forward_alloc) +
++             READ_ONCE(mptcp_sk(sk)->rmem_fwd_alloc);
+ }
+ static int mptcp_ioctl_outq(const struct mptcp_sock *msk, u64 v)
+-- 
+2.40.1
+
diff --git a/queue-6.5/net-annotate-data-races-around-sk-sk_bind_phc.patch b/queue-6.5/net-annotate-data-races-around-sk-sk_bind_phc.patch
new file mode 100644 (file)
index 0000000..46ef936
--- /dev/null
@@ -0,0 +1,59 @@
+From 18c6066052dbb014a36fa555edc629f3ae73b727 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 31 Aug 2023 13:52:12 +0000
+Subject: net: annotate data-races around sk->sk_bind_phc
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 251cd405a9e6e70b92fe5afbdd17fd5caf9d3266 ]
+
+sk->sk_bind_phc is read locklessly. Add corresponding annotations.
+
+Fixes: d463126e23f1 ("net: sock: extend SO_TIMESTAMPING for PHC binding")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Yangbo Lu <yangbo.lu@nxp.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/sock.c | 4 ++--
+ net/socket.c    | 2 +-
+ 2 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/net/core/sock.c b/net/core/sock.c
+index fea5961c51fd1..0a687c8fbed7f 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -894,7 +894,7 @@ static int sock_timestamping_bind_phc(struct sock *sk, int phc_index)
+       if (!match)
+               return -EINVAL;
+-      sk->sk_bind_phc = phc_index;
++      WRITE_ONCE(sk->sk_bind_phc, phc_index);
+       return 0;
+ }
+@@ -1719,7 +1719,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
+       case SO_TIMESTAMPING_OLD:
+               lv = sizeof(v.timestamping);
+               v.timestamping.flags = READ_ONCE(sk->sk_tsflags);
+-              v.timestamping.bind_phc = sk->sk_bind_phc;
++              v.timestamping.bind_phc = READ_ONCE(sk->sk_bind_phc);
+               break;
+       case SO_RCVTIMEO_OLD:
+diff --git a/net/socket.c b/net/socket.c
+index 6bba7818b593d..b5639a6500158 100644
+--- a/net/socket.c
++++ b/net/socket.c
+@@ -935,7 +935,7 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
+               if (tsflags & SOF_TIMESTAMPING_BIND_PHC)
+                       hwtstamp = ptp_convert_timestamp(&hwtstamp,
+-                                                       sk->sk_bind_phc);
++                                                       READ_ONCE(sk->sk_bind_phc));
+               if (ktime_to_timespec64_cond(hwtstamp, tss.ts + 2)) {
+                       empty = 0;
+-- 
+2.40.1
+
diff --git a/queue-6.5/net-annotate-data-races-around-sk-sk_forward_alloc.patch b/queue-6.5/net-annotate-data-races-around-sk-sk_forward_alloc.patch
new file mode 100644 (file)
index 0000000..f450a06
--- /dev/null
@@ -0,0 +1,177 @@
+From 9106291f26e496c7f72c43137b0da9d46010334e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 31 Aug 2023 13:52:09 +0000
+Subject: net: annotate data-races around sk->sk_forward_alloc
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 5e6300e7b3a4ab5b72a82079753868e91fbf9efc ]
+
+Every time sk->sk_forward_alloc is read locklessly,
+add a READ_ONCE().
+
+Add sk_forward_alloc_add() helper to centralize updates,
+to reduce number of WRITE_ONCE().
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/sock.h    | 12 +++++++++---
+ net/core/sock.c       |  8 ++++----
+ net/ipv4/tcp_output.c |  2 +-
+ net/ipv4/udp.c        |  6 +++---
+ net/mptcp/protocol.c  |  6 +++---
+ 5 files changed, 20 insertions(+), 14 deletions(-)
+
+diff --git a/include/net/sock.h b/include/net/sock.h
+index e8927f2d47a3c..85f9dffde05d0 100644
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -1053,6 +1053,12 @@ static inline void sk_wmem_queued_add(struct sock *sk, int val)
+       WRITE_ONCE(sk->sk_wmem_queued, sk->sk_wmem_queued + val);
+ }
++static inline void sk_forward_alloc_add(struct sock *sk, int val)
++{
++      /* Paired with lockless reads of sk->sk_forward_alloc */
++      WRITE_ONCE(sk->sk_forward_alloc, sk->sk_forward_alloc + val);
++}
++
+ void sk_stream_write_space(struct sock *sk);
+ /* OOB backlog add */
+@@ -1377,7 +1383,7 @@ static inline int sk_forward_alloc_get(const struct sock *sk)
+       if (sk->sk_prot->forward_alloc_get)
+               return sk->sk_prot->forward_alloc_get(sk);
+ #endif
+-      return sk->sk_forward_alloc;
++      return READ_ONCE(sk->sk_forward_alloc);
+ }
+ static inline bool __sk_stream_memory_free(const struct sock *sk, int wake)
+@@ -1673,14 +1679,14 @@ static inline void sk_mem_charge(struct sock *sk, int size)
+ {
+       if (!sk_has_account(sk))
+               return;
+-      sk->sk_forward_alloc -= size;
++      sk_forward_alloc_add(sk, -size);
+ }
+ static inline void sk_mem_uncharge(struct sock *sk, int size)
+ {
+       if (!sk_has_account(sk))
+               return;
+-      sk->sk_forward_alloc += size;
++      sk_forward_alloc_add(sk, size);
+       sk_mem_reclaim(sk);
+ }
+diff --git a/net/core/sock.c b/net/core/sock.c
+index 3109eb0cd512e..5edb17de6d1df 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1045,7 +1045,7 @@ static int sock_reserve_memory(struct sock *sk, int bytes)
+               mem_cgroup_uncharge_skmem(sk->sk_memcg, pages);
+               return -ENOMEM;
+       }
+-      sk->sk_forward_alloc += pages << PAGE_SHIFT;
++      sk_forward_alloc_add(sk, pages << PAGE_SHIFT);
+       WRITE_ONCE(sk->sk_reserved_mem,
+                  sk->sk_reserved_mem + (pages << PAGE_SHIFT));
+@@ -3138,10 +3138,10 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
+ {
+       int ret, amt = sk_mem_pages(size);
+-      sk->sk_forward_alloc += amt << PAGE_SHIFT;
++      sk_forward_alloc_add(sk, amt << PAGE_SHIFT);
+       ret = __sk_mem_raise_allocated(sk, size, amt, kind);
+       if (!ret)
+-              sk->sk_forward_alloc -= amt << PAGE_SHIFT;
++              sk_forward_alloc_add(sk, -(amt << PAGE_SHIFT));
+       return ret;
+ }
+ EXPORT_SYMBOL(__sk_mem_schedule);
+@@ -3173,7 +3173,7 @@ void __sk_mem_reduce_allocated(struct sock *sk, int amount)
+ void __sk_mem_reclaim(struct sock *sk, int amount)
+ {
+       amount >>= PAGE_SHIFT;
+-      sk->sk_forward_alloc -= amount << PAGE_SHIFT;
++      sk_forward_alloc_add(sk, -(amount << PAGE_SHIFT));
+       __sk_mem_reduce_allocated(sk, amount);
+ }
+ EXPORT_SYMBOL(__sk_mem_reclaim);
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index 51d8638d4b4c6..9f9ca68c47026 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -3459,7 +3459,7 @@ void sk_forced_mem_schedule(struct sock *sk, int size)
+       if (delta <= 0)
+               return;
+       amt = sk_mem_pages(delta);
+-      sk->sk_forward_alloc += amt << PAGE_SHIFT;
++      sk_forward_alloc_add(sk, amt << PAGE_SHIFT);
+       sk_memory_allocated_add(sk, amt);
+       if (mem_cgroup_sockets_enabled && sk->sk_memcg)
+diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
+index b3aa68ea29de2..4c847baf52d1c 100644
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -1443,9 +1443,9 @@ static void udp_rmem_release(struct sock *sk, int size, int partial,
+               spin_lock(&sk_queue->lock);
+-      sk->sk_forward_alloc += size;
++      sk_forward_alloc_add(sk, size);
+       amt = (sk->sk_forward_alloc - partial) & ~(PAGE_SIZE - 1);
+-      sk->sk_forward_alloc -= amt;
++      sk_forward_alloc_add(sk, -amt);
+       if (amt)
+               __sk_mem_reduce_allocated(sk, amt >> PAGE_SHIFT);
+@@ -1556,7 +1556,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
+               goto uncharge_drop;
+       }
+-      sk->sk_forward_alloc -= size;
++      sk_forward_alloc_add(sk, -size);
+       /* no need to setup a destructor, we will explicitly release the
+        * forward allocated memory on dequeue
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index 0efc52c640b59..996e031dff78a 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -1773,7 +1773,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+               }
+               /* data successfully copied into the write queue */
+-              sk->sk_forward_alloc -= total_ts;
++              sk_forward_alloc_add(sk, -total_ts);
+               copied += psize;
+               dfrag->data_len += psize;
+               frag_truesize += psize;
+@@ -3242,7 +3242,7 @@ void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags)
+       /* move all the rx fwd alloc into the sk_mem_reclaim_final in
+        * inet_sock_destruct() will dispose it
+        */
+-      sk->sk_forward_alloc += msk->rmem_fwd_alloc;
++      sk_forward_alloc_add(sk, msk->rmem_fwd_alloc);
+       msk->rmem_fwd_alloc = 0;
+       mptcp_token_destroy(msk);
+       mptcp_pm_free_anno_list(msk);
+@@ -3513,7 +3513,7 @@ static void mptcp_shutdown(struct sock *sk, int how)
+ static int mptcp_forward_alloc_get(const struct sock *sk)
+ {
+-      return sk->sk_forward_alloc + mptcp_sk(sk)->rmem_fwd_alloc;
++      return READ_ONCE(sk->sk_forward_alloc) + mptcp_sk(sk)->rmem_fwd_alloc;
+ }
+ static int mptcp_ioctl_outq(const struct mptcp_sock *msk, u64 v)
+-- 
+2.40.1
+
diff --git a/queue-6.5/net-annotate-data-races-around-sk-sk_tsflags.patch b/queue-6.5/net-annotate-data-races-around-sk-sk_tsflags.patch
new file mode 100644 (file)
index 0000000..056b28d
--- /dev/null
@@ -0,0 +1,366 @@
+From 69c575d1b47df6ac0f83567058ce34b2c9c1515e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 31 Aug 2023 13:52:11 +0000
+Subject: net: annotate data-races around sk->sk_tsflags
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit e3390b30a5dfb112e8e802a59c0f68f947b638b2 ]
+
+sk->sk_tsflags can be read locklessly, add corresponding annotations.
+
+Fixes: b9f40e21ef42 ("net-timestamp: move timestamp flags out of sk_flags")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Willem de Bruijn <willemb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/ip.h       |  2 +-
+ include/net/sock.h     | 17 ++++++++++-------
+ net/can/j1939/socket.c | 10 ++++++----
+ net/core/skbuff.c      | 10 ++++++----
+ net/core/sock.c        |  4 ++--
+ net/ipv4/ip_output.c   |  2 +-
+ net/ipv4/ip_sockglue.c |  2 +-
+ net/ipv4/tcp.c         |  4 ++--
+ net/ipv6/ip6_output.c  |  2 +-
+ net/ipv6/ping.c        |  2 +-
+ net/ipv6/raw.c         |  2 +-
+ net/ipv6/udp.c         |  2 +-
+ net/socket.c           | 13 +++++++------
+ 13 files changed, 40 insertions(+), 32 deletions(-)
+
+diff --git a/include/net/ip.h b/include/net/ip.h
+index 19adacd5ece03..9276cea775cc2 100644
+--- a/include/net/ip.h
++++ b/include/net/ip.h
+@@ -94,7 +94,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
+       ipcm_init(ipcm);
+       ipcm->sockc.mark = READ_ONCE(inet->sk.sk_mark);
+-      ipcm->sockc.tsflags = inet->sk.sk_tsflags;
++      ipcm->sockc.tsflags = READ_ONCE(inet->sk.sk_tsflags);
+       ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if);
+       ipcm->addr = inet->inet_saddr;
+       ipcm->protocol = inet->inet_num;
+diff --git a/include/net/sock.h b/include/net/sock.h
+index 85f9dffde05d0..4e787285fc66b 100644
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -1906,7 +1906,9 @@ struct sockcm_cookie {
+ static inline void sockcm_init(struct sockcm_cookie *sockc,
+                              const struct sock *sk)
+ {
+-      *sockc = (struct sockcm_cookie) { .tsflags = sk->sk_tsflags };
++      *sockc = (struct sockcm_cookie) {
++              .tsflags = READ_ONCE(sk->sk_tsflags)
++      };
+ }
+ int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg,
+@@ -2701,9 +2703,9 @@ void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
+ static inline void
+ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
+ {
+-      ktime_t kt = skb->tstamp;
+       struct skb_shared_hwtstamps *hwtstamps = skb_hwtstamps(skb);
+-
++      u32 tsflags = READ_ONCE(sk->sk_tsflags);
++      ktime_t kt = skb->tstamp;
+       /*
+        * generate control messages if
+        * - receive time stamping in software requested
+@@ -2711,10 +2713,10 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
+        * - hardware time stamps available and wanted
+        */
+       if (sock_flag(sk, SOCK_RCVTSTAMP) ||
+-          (sk->sk_tsflags & SOF_TIMESTAMPING_RX_SOFTWARE) ||
+-          (kt && sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) ||
++          (tsflags & SOF_TIMESTAMPING_RX_SOFTWARE) ||
++          (kt && tsflags & SOF_TIMESTAMPING_SOFTWARE) ||
+           (hwtstamps->hwtstamp &&
+-           (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)))
++           (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)))
+               __sock_recv_timestamp(msg, sk, skb);
+       else
+               sock_write_timestamp(sk, kt);
+@@ -2736,7 +2738,8 @@ static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
+ #define TSFLAGS_ANY     (SOF_TIMESTAMPING_SOFTWARE                    | \
+                          SOF_TIMESTAMPING_RAW_HARDWARE)
+-      if (sk->sk_flags & FLAGS_RECV_CMSGS || sk->sk_tsflags & TSFLAGS_ANY)
++      if (sk->sk_flags & FLAGS_RECV_CMSGS ||
++          READ_ONCE(sk->sk_tsflags) & TSFLAGS_ANY)
+               __sock_recv_cmsgs(msg, sk, skb);
+       else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP)))
+               sock_write_timestamp(sk, skb->tstamp);
+diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
+index feaec4ad6d163..b28c976f52a0a 100644
+--- a/net/can/j1939/socket.c
++++ b/net/can/j1939/socket.c
+@@ -974,6 +974,7 @@ static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
+       struct sock_exterr_skb *serr;
+       struct sk_buff *skb;
+       char *state = "UNK";
++      u32 tsflags;
+       int err;
+       jsk = j1939_sk(sk);
+@@ -981,13 +982,14 @@ static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
+       if (!(jsk->state & J1939_SOCK_ERRQUEUE))
+               return;
++      tsflags = READ_ONCE(sk->sk_tsflags);
+       switch (type) {
+       case J1939_ERRQUEUE_TX_ACK:
+-              if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK))
++              if (!(tsflags & SOF_TIMESTAMPING_TX_ACK))
+                       return;
+               break;
+       case J1939_ERRQUEUE_TX_SCHED:
+-              if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED))
++              if (!(tsflags & SOF_TIMESTAMPING_TX_SCHED))
+                       return;
+               break;
+       case J1939_ERRQUEUE_TX_ABORT:
+@@ -997,7 +999,7 @@ static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
+       case J1939_ERRQUEUE_RX_DPO:
+               fallthrough;
+       case J1939_ERRQUEUE_RX_ABORT:
+-              if (!(sk->sk_tsflags & SOF_TIMESTAMPING_RX_SOFTWARE))
++              if (!(tsflags & SOF_TIMESTAMPING_RX_SOFTWARE))
+                       return;
+               break;
+       default:
+@@ -1054,7 +1056,7 @@ static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
+       }
+       serr->opt_stats = true;
+-      if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
++      if (tsflags & SOF_TIMESTAMPING_OPT_ID)
+               serr->ee.ee_data = session->tskey;
+       netdev_dbg(session->priv->ndev, "%s: 0x%p tskey: %i, state: %s\n",
+diff --git a/net/core/skbuff.c b/net/core/skbuff.c
+index acdf94bb54c80..7dfae58055c2b 100644
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -5149,7 +5149,7 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb,
+       serr->ee.ee_info = tstype;
+       serr->opt_stats = opt_stats;
+       serr->header.h4.iif = skb->dev ? skb->dev->ifindex : 0;
+-      if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
++      if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) {
+               serr->ee.ee_data = skb_shinfo(skb)->tskey;
+               if (sk_is_tcp(sk))
+                       serr->ee.ee_data -= atomic_read(&sk->sk_tskey);
+@@ -5205,21 +5205,23 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
+ {
+       struct sk_buff *skb;
+       bool tsonly, opt_stats = false;
++      u32 tsflags;
+       if (!sk)
+               return;
+-      if (!hwtstamps && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) &&
++      tsflags = READ_ONCE(sk->sk_tsflags);
++      if (!hwtstamps && !(tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) &&
+           skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS)
+               return;
+-      tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
++      tsonly = tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
+       if (!skb_may_tx_timestamp(sk, tsonly))
+               return;
+       if (tsonly) {
+ #ifdef CONFIG_INET
+-              if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
++              if ((tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
+                   sk_is_tcp(sk)) {
+                       skb = tcp_get_timestamping_opt_stats(sk, orig_skb,
+                                                            ack_skb);
+diff --git a/net/core/sock.c b/net/core/sock.c
+index 5edb17de6d1df..fea5961c51fd1 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -937,7 +937,7 @@ int sock_set_timestamping(struct sock *sk, int optname,
+                       return ret;
+       }
+-      sk->sk_tsflags = val;
++      WRITE_ONCE(sk->sk_tsflags, val);
+       sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
+       if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
+@@ -1718,7 +1718,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
+       case SO_TIMESTAMPING_OLD:
+               lv = sizeof(v.timestamping);
+-              v.timestamping.flags = sk->sk_tsflags;
++              v.timestamping.flags = READ_ONCE(sk->sk_tsflags);
+               v.timestamping.bind_phc = sk->sk_bind_phc;
+               break;
+diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
+index a6e4c82615d7e..6935d07a60c35 100644
+--- a/net/ipv4/ip_output.c
++++ b/net/ipv4/ip_output.c
+@@ -982,7 +982,7 @@ static int __ip_append_data(struct sock *sk,
+       paged = !!cork->gso_size;
+       if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
+-          sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
++          READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID)
+               tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+       hh_len = LL_RESERVED_SPACE(rt->dst.dev);
+diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
+index d41bce8927b2c..d7006942fc2f9 100644
+--- a/net/ipv4/ip_sockglue.c
++++ b/net/ipv4/ip_sockglue.c
+@@ -510,7 +510,7 @@ static bool ipv4_datagram_support_cmsg(const struct sock *sk,
+        * or without payload (SOF_TIMESTAMPING_OPT_TSONLY).
+        */
+       info = PKTINFO_SKB_CB(skb);
+-      if (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG) ||
++      if (!(READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_CMSG) ||
+           !info->ipi_ifindex)
+               return false;
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 8ed52e1e3c99a..75f24b931a185 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -2256,14 +2256,14 @@ void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
+                       }
+               }
+-              if (sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE)
++              if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_SOFTWARE)
+                       has_timestamping = true;
+               else
+                       tss->ts[0] = (struct timespec64) {0};
+       }
+       if (tss->ts[2].tv_sec || tss->ts[2].tv_nsec) {
+-              if (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)
++              if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_RAW_HARDWARE)
+                       has_timestamping = true;
+               else
+                       tss->ts[2] = (struct timespec64) {0};
+diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
+index 016b0a513259f..9270ef7f8e98b 100644
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -1502,7 +1502,7 @@ static int __ip6_append_data(struct sock *sk,
+       orig_mtu = mtu;
+       if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
+-          sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
++          READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID)
+               tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+       hh_len = LL_RESERVED_SPACE(rt->dst.dev);
+diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
+index 1b27728349725..5831aaa53d75e 100644
+--- a/net/ipv6/ping.c
++++ b/net/ipv6/ping.c
+@@ -119,7 +119,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+               return -EINVAL;
+       ipcm6_init_sk(&ipc6, np);
+-      ipc6.sockc.tsflags = sk->sk_tsflags;
++      ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
+       ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
+       fl6.flowi6_oif = oif;
+diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
+index ea16734f5e1f7..d52d5e34c12ae 100644
+--- a/net/ipv6/raw.c
++++ b/net/ipv6/raw.c
+@@ -778,7 +778,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+       fl6.flowi6_uid = sk->sk_uid;
+       ipcm6_init(&ipc6);
+-      ipc6.sockc.tsflags = sk->sk_tsflags;
++      ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
+       ipc6.sockc.mark = fl6.flowi6_mark;
+       if (sin6) {
+diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
+index 3ffca158d3e11..24d3c5c791218 100644
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -1368,7 +1368,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+       ipcm6_init(&ipc6);
+       ipc6.gso_size = READ_ONCE(up->gso_size);
+-      ipc6.sockc.tsflags = sk->sk_tsflags;
++      ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
+       ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
+       /* destination address check */
+diff --git a/net/socket.c b/net/socket.c
+index f49edb9b49185..6bba7818b593d 100644
+--- a/net/socket.c
++++ b/net/socket.c
+@@ -821,7 +821,7 @@ static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
+ static ktime_t get_timestamp(struct sock *sk, struct sk_buff *skb, int *if_index)
+ {
+-      bool cycles = sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC;
++      bool cycles = READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_BIND_PHC;
+       struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
+       struct net_device *orig_dev;
+       ktime_t hwtstamp;
+@@ -873,12 +873,12 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
+       int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
+       int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
+       struct scm_timestamping_internal tss;
+-
+       int empty = 1, false_tstamp = 0;
+       struct skb_shared_hwtstamps *shhwtstamps =
+               skb_hwtstamps(skb);
+       int if_index;
+       ktime_t hwtstamp;
++      u32 tsflags;
+       /* Race occurred between timestamp enabling and packet
+          receiving.  Fill in the current time for now. */
+@@ -920,11 +920,12 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
+       }
+       memset(&tss, 0, sizeof(tss));
+-      if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
++      tsflags = READ_ONCE(sk->sk_tsflags);
++      if ((tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
+           ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
+               empty = 0;
+       if (shhwtstamps &&
+-          (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
++          (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
+           !skb_is_swtx_tstamp(skb, false_tstamp)) {
+               if_index = 0;
+               if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NETDEV)
+@@ -932,14 +933,14 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
+               else
+                       hwtstamp = shhwtstamps->hwtstamp;
+-              if (sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC)
++              if (tsflags & SOF_TIMESTAMPING_BIND_PHC)
+                       hwtstamp = ptp_convert_timestamp(&hwtstamp,
+                                                        sk->sk_bind_phc);
+               if (ktime_to_timespec64_cond(hwtstamp, tss.ts + 2)) {
+                       empty = 0;
+-                      if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
++                      if ((tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
+                           !skb_is_err_queue(skb))
+                               put_ts_pktinfo(msg, skb, if_index);
+               }
+-- 
+2.40.1
+
diff --git a/queue-6.5/net-dsa-sja1105-complete-tc-cbs-offload-support-on-s.patch b/queue-6.5/net-dsa-sja1105-complete-tc-cbs-offload-support-on-s.patch
new file mode 100644 (file)
index 0000000..8f909b0
--- /dev/null
@@ -0,0 +1,136 @@
+From a40636dc497de589203f923e5ca5fd625c1de01e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 6 Sep 2023 00:53:38 +0300
+Subject: net: dsa: sja1105: complete tc-cbs offload support on SJA1110
+
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+
+[ Upstream commit 180a7419fe4adc8d9c8e0ef0fd17bcdd0cf78acd ]
+
+The blamed commit left this delta behind:
+
+  struct sja1105_cbs_entry {
+ -     u64 port;
+ -     u64 prio;
+ +     u64 port; /* Not used for SJA1110 */
+ +     u64 prio; /* Not used for SJA1110 */
+       u64 credit_hi;
+       u64 credit_lo;
+       u64 send_slope;
+       u64 idle_slope;
+  };
+
+but did not actually implement tc-cbs offload fully for the new switch.
+The offload is accepted, but it doesn't work.
+
+The difference compared to earlier switch generations is that now, the
+table of CBS shapers is sparse, because there are many more shapers, so
+the mapping between a {port, prio} and a table index is static, rather
+than requiring us to store the port and prio into the sja1105_cbs_entry.
+
+So, the problem is that the code programs the CBS shaper parameters at a
+dynamic table index which is incorrect.
+
+All that needs to be done for SJA1110 CBS shapers to work is to bypass
+the logic which allocates shapers in a dense manner, as for SJA1105, and
+use the fixed mapping instead.
+
+Fixes: 3e77e59bf8cf ("net: dsa: sja1105: add support for the SJA1110 switch family")
+Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/sja1105/sja1105.h      |  2 ++
+ drivers/net/dsa/sja1105/sja1105_main.c | 13 +++++++++++++
+ drivers/net/dsa/sja1105/sja1105_spi.c  |  4 ++++
+ 3 files changed, 19 insertions(+)
+
+diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
+index dee35ba924ad2..0617d5ccd3ff1 100644
+--- a/drivers/net/dsa/sja1105/sja1105.h
++++ b/drivers/net/dsa/sja1105/sja1105.h
+@@ -132,6 +132,8 @@ struct sja1105_info {
+       int max_frame_mem;
+       int num_ports;
+       bool multiple_cascade_ports;
++      /* Every {port, TXQ} has its own CBS shaper */
++      bool fixed_cbs_mapping;
+       enum dsa_tag_protocol tag_proto;
+       const struct sja1105_dynamic_table_ops *dyn_ops;
+       const struct sja1105_table_ops *static_ops;
+diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
+index e6d82ac106bee..b6deba4a75121 100644
+--- a/drivers/net/dsa/sja1105/sja1105_main.c
++++ b/drivers/net/dsa/sja1105/sja1105_main.c
+@@ -2122,12 +2122,22 @@ static void sja1105_bridge_leave(struct dsa_switch *ds, int port,
+ }
+ #define BYTES_PER_KBIT (1000LL / 8)
++/* Port 0 (the uC port) does not have CBS shapers */
++#define SJA1110_FIXED_CBS(port, prio) ((((port) - 1) * SJA1105_NUM_TC) + (prio))
+ static int sja1105_find_cbs_shaper(struct sja1105_private *priv,
+                                  int port, int prio)
+ {
+       int i;
++      if (priv->info->fixed_cbs_mapping) {
++              i = SJA1110_FIXED_CBS(port, prio);
++              if (i >= 0 && i < priv->info->num_cbs_shapers)
++                      return i;
++
++              return -1;
++      }
++
+       for (i = 0; i < priv->info->num_cbs_shapers; i++)
+               if (priv->cbs[i].port == port && priv->cbs[i].prio == prio)
+                       return i;
+@@ -2139,6 +2149,9 @@ static int sja1105_find_unused_cbs_shaper(struct sja1105_private *priv)
+ {
+       int i;
++      if (priv->info->fixed_cbs_mapping)
++              return -1;
++
+       for (i = 0; i < priv->info->num_cbs_shapers; i++)
+               if (!priv->cbs[i].idle_slope && !priv->cbs[i].send_slope)
+                       return i;
+diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c
+index 5ce29c8057a41..834b5c1b4db0c 100644
+--- a/drivers/net/dsa/sja1105/sja1105_spi.c
++++ b/drivers/net/dsa/sja1105/sja1105_spi.c
+@@ -781,6 +781,7 @@ const struct sja1105_info sja1110a_info = {
+       .tag_proto              = DSA_TAG_PROTO_SJA1110,
+       .can_limit_mcast_flood  = true,
+       .multiple_cascade_ports = true,
++      .fixed_cbs_mapping      = true,
+       .ptp_ts_bits            = 32,
+       .ptpegr_ts_bytes        = 8,
+       .max_frame_mem          = SJA1110_MAX_FRAME_MEMORY,
+@@ -831,6 +832,7 @@ const struct sja1105_info sja1110b_info = {
+       .tag_proto              = DSA_TAG_PROTO_SJA1110,
+       .can_limit_mcast_flood  = true,
+       .multiple_cascade_ports = true,
++      .fixed_cbs_mapping      = true,
+       .ptp_ts_bits            = 32,
+       .ptpegr_ts_bytes        = 8,
+       .max_frame_mem          = SJA1110_MAX_FRAME_MEMORY,
+@@ -881,6 +883,7 @@ const struct sja1105_info sja1110c_info = {
+       .tag_proto              = DSA_TAG_PROTO_SJA1110,
+       .can_limit_mcast_flood  = true,
+       .multiple_cascade_ports = true,
++      .fixed_cbs_mapping      = true,
+       .ptp_ts_bits            = 32,
+       .ptpegr_ts_bytes        = 8,
+       .max_frame_mem          = SJA1110_MAX_FRAME_MEMORY,
+@@ -931,6 +934,7 @@ const struct sja1105_info sja1110d_info = {
+       .tag_proto              = DSA_TAG_PROTO_SJA1110,
+       .can_limit_mcast_flood  = true,
+       .multiple_cascade_ports = true,
++      .fixed_cbs_mapping      = true,
+       .ptp_ts_bits            = 32,
+       .ptpegr_ts_bytes        = 8,
+       .max_frame_mem          = SJA1110_MAX_FRAME_MEMORY,
+-- 
+2.40.1
+
diff --git a/queue-6.5/net-dsa-sja1105-fix-bandwidth-discrepancy-between-tc.patch b/queue-6.5/net-dsa-sja1105-fix-bandwidth-discrepancy-between-tc.patch
new file mode 100644 (file)
index 0000000..cf376b4
--- /dev/null
@@ -0,0 +1,136 @@
+From 3feac9c48d5741b29bd8f80ed2a9a9f4578ebd89 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 6 Sep 2023 00:53:36 +0300
+Subject: net: dsa: sja1105: fix bandwidth discrepancy between tc-cbs software
+ and offload
+
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+
+[ Upstream commit 954ad9bf13c4f95a4958b5f8433301f2ab99e1f5 ]
+
+More careful measurement of the tc-cbs bandwidth shows that the stream
+bandwidth (effectively idleslope) increases, there is a larger and
+larger discrepancy between the rate limit obtained by the software
+Qdisc, and the rate limit obtained by its offloaded counterpart.
+
+The discrepancy becomes so large, that e.g. at an idleslope of 40000
+(40Mbps), the offloaded cbs does not actually rate limit anything, and
+traffic will pass at line rate through a 100 Mbps port.
+
+The reason for the discrepancy is that the hardware documentation I've
+been following is incorrect. UM11040.pdf (for SJA1105P/Q/R/S) states
+about IDLE_SLOPE that it is "the rate (in unit of bytes/sec) at which
+the credit counter is increased".
+
+Cross-checking with UM10944.pdf (for SJA1105E/T) and UM11107.pdf
+(for SJA1110), the wording is different: "This field specifies the
+value, in bytes per second times link speed, by which the credit counter
+is increased".
+
+So there's an extra scaling for link speed that the driver is currently
+not accounting for, and apparently (empirically), that link speed is
+expressed in Kbps.
+
+I've pondered whether to pollute the sja1105_mac_link_up()
+implementation with CBS shaper reprogramming, but I don't think it is
+worth it. IMO, the UAPI exposed by tc-cbs requires user space to
+recalculate the sendslope anyway, since the formula for that depends on
+port_transmit_rate (see man tc-cbs), which is not an invariant from tc's
+perspective.
+
+So we use the offload->sendslope and offload->idleslope to deduce the
+original port_transmit_rate from the CBS formula, and use that value to
+scale the offload->sendslope and offload->idleslope to values that the
+hardware understands.
+
+Some numerical data points:
+
+ 40Mbps stream, max interfering frame size 1500, port speed 100M
+ ---------------------------------------------------------------
+
+ tc-cbs parameters:
+ idleslope 40000 sendslope -60000 locredit -900 hicredit 600
+
+ which result in hardware values:
+
+ Before (doesn't work)           After (works)
+ credit_hi    600                600
+ credit_lo    900                900
+ send_slope   7500000            75
+ idle_slope   5000000            50
+
+ 40Mbps stream, max interfering frame size 1500, port speed 1G
+ -------------------------------------------------------------
+
+ tc-cbs parameters:
+ idleslope 40000 sendslope -960000 locredit -1440 hicredit 60
+
+ which result in hardware values:
+
+ Before (doesn't work)           After (works)
+ credit_hi    60                 60
+ credit_lo    1440               1440
+ send_slope   120000000          120
+ idle_slope   5000000            5
+
+ 5.12Mbps stream, max interfering frame size 1522, port speed 100M
+ -----------------------------------------------------------------
+
+ tc-cbs parameters:
+ idleslope 5120 sendslope -94880 locredit -1444 hicredit 77
+
+ which result in hardware values:
+
+ Before (doesn't work)           After (works)
+ credit_hi    77                 77
+ credit_lo    1444               1444
+ send_slope   11860000           118
+ idle_slope   640000             6
+
+Tested on SJA1105T, SJA1105S and SJA1110A, at 1Gbps and 100Mbps.
+
+Fixes: 4d7525085a9b ("net: dsa: sja1105: offload the Credit-Based Shaper qdisc")
+Reported-by: Yanan Yang <yanan.yang@nxp.com>
+Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/sja1105/sja1105_main.c | 15 ++++++++++++---
+ 1 file changed, 12 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
+index 3529a565b4aaf..fe7f09519653a 100644
+--- a/drivers/net/dsa/sja1105/sja1105_main.c
++++ b/drivers/net/dsa/sja1105/sja1105_main.c
+@@ -2157,6 +2157,7 @@ static int sja1105_setup_tc_cbs(struct dsa_switch *ds, int port,
+ {
+       struct sja1105_private *priv = ds->priv;
+       struct sja1105_cbs_entry *cbs;
++      s64 port_transmit_rate_kbps;
+       int index;
+       if (!offload->enable)
+@@ -2174,9 +2175,17 @@ static int sja1105_setup_tc_cbs(struct dsa_switch *ds, int port,
+        */
+       cbs->credit_hi = offload->hicredit;
+       cbs->credit_lo = abs(offload->locredit);
+-      /* User space is in kbits/sec, hardware in bytes/sec */
+-      cbs->idle_slope = offload->idleslope * BYTES_PER_KBIT;
+-      cbs->send_slope = abs(offload->sendslope * BYTES_PER_KBIT);
++      /* User space is in kbits/sec, while the hardware in bytes/sec times
++       * link speed. Since the given offload->sendslope is good only for the
++       * current link speed anyway, and user space is likely to reprogram it
++       * when that changes, don't even bother to track the port's link speed,
++       * but deduce the port transmit rate from idleslope - sendslope.
++       */
++      port_transmit_rate_kbps = offload->idleslope - offload->sendslope;
++      cbs->idle_slope = div_s64(offload->idleslope * BYTES_PER_KBIT,
++                                port_transmit_rate_kbps);
++      cbs->send_slope = div_s64(abs(offload->sendslope * BYTES_PER_KBIT),
++                                port_transmit_rate_kbps);
+       /* Convert the negative values from 64-bit 2's complement
+        * to 32-bit 2's complement (for the case of 0x80000000 whose
+        * negative is still negative).
+-- 
+2.40.1
+
diff --git a/queue-6.5/net-dsa-sja1105-fix-enospc-when-replacing-the-same-t.patch b/queue-6.5/net-dsa-sja1105-fix-enospc-when-replacing-the-same-t.patch
new file mode 100644 (file)
index 0000000..e385adf
--- /dev/null
@@ -0,0 +1,81 @@
+From 19a088981a1244d30c155b62a4fec26aaec53ff2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 6 Sep 2023 00:53:37 +0300
+Subject: net: dsa: sja1105: fix -ENOSPC when replacing the same tc-cbs too
+ many times
+
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+
+[ Upstream commit 894cafc5c62ccced758077bd4e970dc714c42637 ]
+
+After running command [2] too many times in a row:
+
+[1] $ tc qdisc add dev sw2p0 root handle 1: mqprio num_tc 8 \
+       map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 hw 0
+[2] $ tc qdisc replace dev sw2p0 parent 1:1 cbs offload 1 \
+       idleslope 120000 sendslope -880000 locredit -1320 hicredit 180
+
+(aka more than priv->info->num_cbs_shapers times)
+
+we start seeing the following error message:
+
+Error: Specified device failed to setup cbs hardware offload.
+
+This comes from the fact that ndo_setup_tc(TC_SETUP_QDISC_CBS) presents
+the same API for the qdisc create and replace cases, and the sja1105
+driver fails to distinguish between the 2. Thus, it always thinks that
+it must allocate the same shaper for a {port, queue} pair, when it may
+instead have to replace an existing one.
+
+Fixes: 4d7525085a9b ("net: dsa: sja1105: offload the Credit-Based Shaper qdisc")
+Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/sja1105/sja1105_main.c | 23 ++++++++++++++++++++---
+ 1 file changed, 20 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
+index fe7f09519653a..e6d82ac106bee 100644
+--- a/drivers/net/dsa/sja1105/sja1105_main.c
++++ b/drivers/net/dsa/sja1105/sja1105_main.c
+@@ -2123,6 +2123,18 @@ static void sja1105_bridge_leave(struct dsa_switch *ds, int port,
+ #define BYTES_PER_KBIT (1000LL / 8)
++static int sja1105_find_cbs_shaper(struct sja1105_private *priv,
++                                 int port, int prio)
++{
++      int i;
++
++      for (i = 0; i < priv->info->num_cbs_shapers; i++)
++              if (priv->cbs[i].port == port && priv->cbs[i].prio == prio)
++                      return i;
++
++      return -1;
++}
++
+ static int sja1105_find_unused_cbs_shaper(struct sja1105_private *priv)
+ {
+       int i;
+@@ -2163,9 +2175,14 @@ static int sja1105_setup_tc_cbs(struct dsa_switch *ds, int port,
+       if (!offload->enable)
+               return sja1105_delete_cbs_shaper(priv, port, offload->queue);
+-      index = sja1105_find_unused_cbs_shaper(priv);
+-      if (index < 0)
+-              return -ENOSPC;
++      /* The user may be replacing an existing shaper */
++      index = sja1105_find_cbs_shaper(priv, port, offload->queue);
++      if (index < 0) {
++              /* That isn't the case - see if we can allocate a new one */
++              index = sja1105_find_unused_cbs_shaper(priv);
++              if (index < 0)
++                      return -ENOSPC;
++      }
+       cbs = &priv->cbs[index];
+       cbs->port = port;
+-- 
+2.40.1
+
diff --git a/queue-6.5/net-enetc-distinguish-error-from-valid-pointers-in-e.patch b/queue-6.5/net-enetc-distinguish-error-from-valid-pointers-in-e.patch
new file mode 100644 (file)
index 0000000..b8c47dd
--- /dev/null
@@ -0,0 +1,46 @@
+From 8e225fac2fe4d7d6d1550864a29ae65fa95173a0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 6 Sep 2023 17:16:09 +0300
+Subject: net: enetc: distinguish error from valid pointers in
+ enetc_fixup_clear_rss_rfs()
+
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+
+[ Upstream commit 1b36955cc048c8ff6ba448dbf4be0e52f59f2963 ]
+
+enetc_psi_create() returns an ERR_PTR() or a valid station interface
+pointer, but checking for the non-NULL quality of the return code blurs
+that difference away. So if enetc_psi_create() fails, we call
+enetc_psi_destroy() when we shouldn't. This will likely result in
+crashes, since enetc_psi_create() cleans up everything after itself when
+it returns an ERR_PTR().
+
+Fixes: f0168042a212 ("net: enetc: reimplement RFS/RSS memory clearing as PCI quirk")
+Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
+Closes: https://lore.kernel.org/netdev/582183ef-e03b-402b-8e2d-6d9bb3c83bd9@moroto.mountain/
+Suggested-by: Dan Carpenter <dan.carpenter@linaro.org>
+Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://lore.kernel.org/r/20230906141609.247579-1-vladimir.oltean@nxp.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/freescale/enetc/enetc_pf.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+index e0a4cb7e3f501..c153dc083aff0 100644
+--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
++++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+@@ -1402,7 +1402,7 @@ static void enetc_fixup_clear_rss_rfs(struct pci_dev *pdev)
+               return;
+       si = enetc_psi_create(pdev);
+-      if (si)
++      if (!IS_ERR(si))
+               enetc_psi_destroy(pdev);
+ }
+ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_FREESCALE, ENETC_DEV_ID_PF,
+-- 
+2.40.1
+
diff --git a/queue-6.5/net-fib-avoid-warn-splat-in-flow-dissector.patch b/queue-6.5/net-fib-avoid-warn-splat-in-flow-dissector.patch
new file mode 100644 (file)
index 0000000..dd44958
--- /dev/null
@@ -0,0 +1,77 @@
+From 742c1d8394ebb754ef1ff930bffbb0d725db6715 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 30 Aug 2023 13:00:37 +0200
+Subject: net: fib: avoid warn splat in flow dissector
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 8aae7625ff3f0bd5484d01f1b8d5af82e44bec2d ]
+
+New skbs allocated via nf_send_reset() have skb->dev == NULL.
+
+fib*_rules_early_flow_dissect helpers already have a 'struct net'
+argument but its not passed down to the flow dissector core, which
+will then WARN as it can't derive a net namespace to use:
+
+ WARNING: CPU: 0 PID: 0 at net/core/flow_dissector.c:1016 __skb_flow_dissect+0xa91/0x1cd0
+ [..]
+  ip_route_me_harder+0x143/0x330
+  nf_send_reset+0x17c/0x2d0 [nf_reject_ipv4]
+  nft_reject_inet_eval+0xa9/0xf2 [nft_reject_inet]
+  nft_do_chain+0x198/0x5d0 [nf_tables]
+  nft_do_chain_inet+0xa4/0x110 [nf_tables]
+  nf_hook_slow+0x41/0xc0
+  ip_local_deliver+0xce/0x110
+  ..
+
+Cc: Stanislav Fomichev <sdf@google.com>
+Cc: David Ahern <dsahern@kernel.org>
+Cc: Ido Schimmel <idosch@nvidia.com>
+Fixes: 812fa71f0d96 ("netfilter: Dissect flow after packet mangling")
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=217826
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Reviewed-by: Ido Schimmel <idosch@nvidia.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://lore.kernel.org/r/20230830110043.30497-1-fw@strlen.de
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/ip6_fib.h | 5 ++++-
+ include/net/ip_fib.h  | 5 ++++-
+ 2 files changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
+index 05e6f756feafe..9ba6413fd2e3e 100644
+--- a/include/net/ip6_fib.h
++++ b/include/net/ip6_fib.h
+@@ -604,7 +604,10 @@ static inline bool fib6_rules_early_flow_dissect(struct net *net,
+       if (!net->ipv6.fib6_rules_require_fldissect)
+               return false;
+-      skb_flow_dissect_flow_keys(skb, flkeys, flag);
++      memset(flkeys, 0, sizeof(*flkeys));
++      __skb_flow_dissect(net, skb, &flow_keys_dissector,
++                         flkeys, NULL, 0, 0, 0, flag);
++
+       fl6->fl6_sport = flkeys->ports.src;
+       fl6->fl6_dport = flkeys->ports.dst;
+       fl6->flowi6_proto = flkeys->basic.ip_proto;
+diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
+index a378eff827c74..f0c13864180e2 100644
+--- a/include/net/ip_fib.h
++++ b/include/net/ip_fib.h
+@@ -418,7 +418,10 @@ static inline bool fib4_rules_early_flow_dissect(struct net *net,
+       if (!net->ipv4.fib_rules_require_fldissect)
+               return false;
+-      skb_flow_dissect_flow_keys(skb, flkeys, flag);
++      memset(flkeys, 0, sizeof(*flkeys));
++      __skb_flow_dissect(net, skb, &flow_keys_dissector,
++                         flkeys, NULL, 0, 0, 0, flag);
++
+       fl4->fl4_sport = flkeys->ports.src;
+       fl4->fl4_dport = flkeys->ports.dst;
+       fl4->flowi4_proto = flkeys->basic.ip_proto;
+-- 
+2.40.1
+
diff --git a/queue-6.5/net-handshake-fix-null-ptr-deref-in-handshake_nl_don.patch b/queue-6.5/net-handshake-fix-null-ptr-deref-in-handshake_nl_don.patch
new file mode 100644 (file)
index 0000000..2e4d4ca
--- /dev/null
@@ -0,0 +1,136 @@
+From 3f9032dbba45f02d579cddc8022914a934d6dc50 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 31 Aug 2023 08:45:09 +0000
+Subject: net/handshake: fix null-ptr-deref in handshake_nl_done_doit()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 82ba0ff7bf0483d962e592017bef659ae022d754 ]
+
+We should not call trace_handshake_cmd_done_err() if socket lookup has failed.
+
+Also we should call trace_handshake_cmd_done_err() before releasing the file,
+otherwise dereferencing sock->sk can return garbage.
+
+This also reverts 7afc6d0a107f ("net/handshake: Fix uninitialized local variable")
+
+Unable to handle kernel paging request at virtual address dfff800000000003
+KASAN: null-ptr-deref in range [0x0000000000000018-0x000000000000001f]
+Mem abort info:
+ESR = 0x0000000096000005
+EC = 0x25: DABT (current EL), IL = 32 bits
+SET = 0, FnV = 0
+EA = 0, S1PTW = 0
+FSC = 0x05: level 1 translation fault
+Data abort info:
+ISV = 0, ISS = 0x00000005, ISS2 = 0x00000000
+CM = 0, WnR = 0, TnD = 0, TagAccess = 0
+GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
+[dfff800000000003] address between user and kernel address ranges
+Internal error: Oops: 0000000096000005 [#1] PREEMPT SMP
+Modules linked in:
+CPU: 1 PID: 5986 Comm: syz-executor292 Not tainted 6.5.0-rc7-syzkaller-gfe4469582053 #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/26/2023
+pstate: 80400005 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
+pc : handshake_nl_done_doit+0x198/0x9c8 net/handshake/netlink.c:193
+lr : handshake_nl_done_doit+0x180/0x9c8
+sp : ffff800096e37180
+x29: ffff800096e37200 x28: 1ffff00012dc6e34 x27: dfff800000000000
+x26: ffff800096e373d0 x25: 0000000000000000 x24: 00000000ffffffa8
+x23: ffff800096e373f0 x22: 1ffff00012dc6e38 x21: 0000000000000000
+x20: ffff800096e371c0 x19: 0000000000000018 x18: 0000000000000000
+x17: 0000000000000000 x16: ffff800080516cc4 x15: 0000000000000001
+x14: 1fffe0001b14aa3b x13: 0000000000000000 x12: 0000000000000000
+x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000003
+x8 : 0000000000000003 x7 : ffff800080afe47c x6 : 0000000000000000
+x5 : 0000000000000000 x4 : 0000000000000000 x3 : ffff800080a88078
+x2 : 0000000000000001 x1 : 00000000ffffffa8 x0 : 0000000000000000
+Call trace:
+handshake_nl_done_doit+0x198/0x9c8 net/handshake/netlink.c:193
+genl_family_rcv_msg_doit net/netlink/genetlink.c:970 [inline]
+genl_family_rcv_msg net/netlink/genetlink.c:1050 [inline]
+genl_rcv_msg+0x96c/0xc50 net/netlink/genetlink.c:1067
+netlink_rcv_skb+0x214/0x3c4 net/netlink/af_netlink.c:2549
+genl_rcv+0x38/0x50 net/netlink/genetlink.c:1078
+netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline]
+netlink_unicast+0x660/0x8d4 net/netlink/af_netlink.c:1365
+netlink_sendmsg+0x834/0xb18 net/netlink/af_netlink.c:1914
+sock_sendmsg_nosec net/socket.c:725 [inline]
+sock_sendmsg net/socket.c:748 [inline]
+____sys_sendmsg+0x56c/0x840 net/socket.c:2494
+___sys_sendmsg net/socket.c:2548 [inline]
+__sys_sendmsg+0x26c/0x33c net/socket.c:2577
+__do_sys_sendmsg net/socket.c:2586 [inline]
+__se_sys_sendmsg net/socket.c:2584 [inline]
+__arm64_sys_sendmsg+0x80/0x94 net/socket.c:2584
+__invoke_syscall arch/arm64/kernel/syscall.c:37 [inline]
+invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:51
+el0_svc_common+0x130/0x23c arch/arm64/kernel/syscall.c:136
+do_el0_svc+0x48/0x58 arch/arm64/kernel/syscall.c:155
+el0_svc+0x58/0x16c arch/arm64/kernel/entry-common.c:678
+el0t_64_sync_handler+0x84/0xfc arch/arm64/kernel/entry-common.c:696
+el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:591
+Code: 12800108 b90043e8 910062b3 d343fe68 (387b6908)
+
+Fixes: 3b3009ea8abb ("net/handshake: Create a NETLINK service for handling handshake requests")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Chuck Lever <chuck.lever@oracle.com>
+Reviewed-by: Michal Kubiak <michal.kubiak@intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/handshake/netlink.c | 18 ++++++------------
+ 1 file changed, 6 insertions(+), 12 deletions(-)
+
+diff --git a/net/handshake/netlink.c b/net/handshake/netlink.c
+index 1086653e1fada..d0bc1dd8e65a8 100644
+--- a/net/handshake/netlink.c
++++ b/net/handshake/netlink.c
+@@ -157,26 +157,24 @@ int handshake_nl_accept_doit(struct sk_buff *skb, struct genl_info *info)
+ int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info)
+ {
+       struct net *net = sock_net(skb->sk);
+-      struct handshake_req *req = NULL;
+-      struct socket *sock = NULL;
++      struct handshake_req *req;
++      struct socket *sock;
+       int fd, status, err;
+       if (GENL_REQ_ATTR_CHECK(info, HANDSHAKE_A_DONE_SOCKFD))
+               return -EINVAL;
+       fd = nla_get_u32(info->attrs[HANDSHAKE_A_DONE_SOCKFD]);
+-      err = 0;
+       sock = sockfd_lookup(fd, &err);
+-      if (err) {
+-              err = -EBADF;
+-              goto out_status;
+-      }
++      if (!sock)
++              return err;
+       req = handshake_req_hash_lookup(sock->sk);
+       if (!req) {
+               err = -EBUSY;
++              trace_handshake_cmd_done_err(net, req, sock->sk, err);
+               fput(sock->file);
+-              goto out_status;
++              return err;
+       }
+       trace_handshake_cmd_done(net, req, sock->sk, fd);
+@@ -188,10 +186,6 @@ int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info)
+       handshake_complete(req, status, info);
+       fput(sock->file);
+       return 0;
+-
+-out_status:
+-      trace_handshake_cmd_done_err(net, req, sock->sk, err);
+-      return err;
+ }
+ static unsigned int handshake_net_id;
+-- 
+2.40.1
+
diff --git a/queue-6.5/net-hns3-fix-byte-order-conversion-issue-in-hclge_db.patch b/queue-6.5/net-hns3-fix-byte-order-conversion-issue-in-hclge_db.patch
new file mode 100644 (file)
index 0000000..3c55649
--- /dev/null
@@ -0,0 +1,70 @@
+From d5f785f1e4030ed91c9232b434e9660f70c2f96d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 6 Sep 2023 15:20:14 +0800
+Subject: net: hns3: fix byte order conversion issue in
+ hclge_dbg_fd_tcam_read()
+
+From: Hao Chen <chenhao418@huawei.com>
+
+[ Upstream commit efccf655e99b6907ca07a466924e91805892e7d3 ]
+
+req1->tcam_data is defined as "u8 tcam_data[8]", and we convert it as
+(u32 *) without considerring byte order conversion,
+it may result in printing wrong data for tcam_data.
+
+Convert tcam_data to (__le32 *) first to fix it.
+
+Fixes: b5a0b70d77b9 ("net: hns3: refactor dump fd tcam of debugfs")
+Signed-off-by: Hao Chen <chenhao418@huawei.com>
+Signed-off-by: Jijie Shao <shaojijie@huawei.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c | 14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+index f01a7a9ee02ca..ff3f8f424ad90 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+@@ -1519,7 +1519,7 @@ static int hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, bool sel_x,
+       struct hclge_desc desc[3];
+       int pos = 0;
+       int ret, i;
+-      u32 *req;
++      __le32 *req;
+       hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_FD_TCAM_OP, true);
+       desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT);
+@@ -1544,22 +1544,22 @@ static int hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, bool sel_x,
+                        tcam_msg.loc);
+       /* tcam_data0 ~ tcam_data1 */
+-      req = (u32 *)req1->tcam_data;
++      req = (__le32 *)req1->tcam_data;
+       for (i = 0; i < 2; i++)
+               pos += scnprintf(tcam_buf + pos, HCLGE_DBG_TCAM_BUF_SIZE - pos,
+-                               "%08x\n", *req++);
++                               "%08x\n", le32_to_cpu(*req++));
+       /* tcam_data2 ~ tcam_data7 */
+-      req = (u32 *)req2->tcam_data;
++      req = (__le32 *)req2->tcam_data;
+       for (i = 0; i < 6; i++)
+               pos += scnprintf(tcam_buf + pos, HCLGE_DBG_TCAM_BUF_SIZE - pos,
+-                               "%08x\n", *req++);
++                               "%08x\n", le32_to_cpu(*req++));
+       /* tcam_data8 ~ tcam_data12 */
+-      req = (u32 *)req3->tcam_data;
++      req = (__le32 *)req3->tcam_data;
+       for (i = 0; i < 5; i++)
+               pos += scnprintf(tcam_buf + pos, HCLGE_DBG_TCAM_BUF_SIZE - pos,
+-                               "%08x\n", *req++);
++                               "%08x\n", le32_to_cpu(*req++));
+       return ret;
+ }
+-- 
+2.40.1
+
diff --git a/queue-6.5/net-hns3-fix-debugfs-concurrency-issue-between-kfree.patch b/queue-6.5/net-hns3-fix-debugfs-concurrency-issue-between-kfree.patch
new file mode 100644 (file)
index 0000000..2866993
--- /dev/null
@@ -0,0 +1,62 @@
+From ebdc9bff58b1749897283fe6da4a63e729d07338 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 6 Sep 2023 15:20:15 +0800
+Subject: net: hns3: fix debugfs concurrency issue between kfree buffer and
+ read
+
+From: Hao Chen <chenhao418@huawei.com>
+
+[ Upstream commit c295160b1d95e885f1af4586a221cb221d232d10 ]
+
+Now in hns3_dbg_uninit(), there may be concurrency between
+kfree buffer and read, it may result in memory error.
+
+Moving debugfs_remove_recursive() in front of kfree buffer to ensure
+they don't happen at the same time.
+
+Fixes: 5e69ea7ee2a6 ("net: hns3: refactor the debugfs process")
+Signed-off-by: Hao Chen <chenhao418@huawei.com>
+Signed-off-by: Jijie Shao <shaojijie@huawei.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+index f276b5ecb431f..26fb6fefcb9d9 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+@@ -1411,9 +1411,9 @@ int hns3_dbg_init(struct hnae3_handle *handle)
+       return 0;
+ out:
+-      mutex_destroy(&handle->dbgfs_lock);
+       debugfs_remove_recursive(handle->hnae3_dbgfs);
+       handle->hnae3_dbgfs = NULL;
++      mutex_destroy(&handle->dbgfs_lock);
+       return ret;
+ }
+@@ -1421,6 +1421,9 @@ void hns3_dbg_uninit(struct hnae3_handle *handle)
+ {
+       u32 i;
++      debugfs_remove_recursive(handle->hnae3_dbgfs);
++      handle->hnae3_dbgfs = NULL;
++
+       for (i = 0; i < ARRAY_SIZE(hns3_dbg_cmd); i++)
+               if (handle->dbgfs_buf[i]) {
+                       kvfree(handle->dbgfs_buf[i]);
+@@ -1428,8 +1431,6 @@ void hns3_dbg_uninit(struct hnae3_handle *handle)
+               }
+       mutex_destroy(&handle->dbgfs_lock);
+-      debugfs_remove_recursive(handle->hnae3_dbgfs);
+-      handle->hnae3_dbgfs = NULL;
+ }
+ void hns3_dbg_register_debugfs(const char *debugfs_dir_name)
+-- 
+2.40.1
+
diff --git a/queue-6.5/net-hns3-fix-invalid-mutex-between-tc-qdisc-and-dcb-.patch b/queue-6.5/net-hns3-fix-invalid-mutex-between-tc-qdisc-and-dcb-.patch
new file mode 100644 (file)
index 0000000..c253943
--- /dev/null
@@ -0,0 +1,153 @@
+From 6d0255cf19728d624630624c8148471e728a7e0b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 6 Sep 2023 15:20:16 +0800
+Subject: net: hns3: fix invalid mutex between tc qdisc and dcb ets command
+ issue
+
+From: Jijie Shao <shaojijie@huawei.com>
+
+[ Upstream commit fa5564945f7d15ae2390b00c08b6abaef0165cda ]
+
+We hope that tc qdisc and dcb ets commands can not be used crosswise.
+If we want to use any of the commands to configure tc,
+We must use the other command to clear the existing configuration.
+
+However, when we configure a single tc with tc qdisc,
+we can still configure it with dcb ets.
+Because we use mqprio_active as the tag of tc qdisc configuration,
+but with dcb ets, we do not check mqprio_active.
+
+This patch fix this issue by check mqprio_active before
+executing the dcb ets command. and add dcb_ets_active to
+replace HCLGE_FLAG_DCB_ENABLE and HCLGE_FLAG_MQPRIO_ENABLE
+at the hclge layer,
+
+Fixes: cacde272dd00 ("net: hns3: Add hclge_dcb module for the support of DCB feature")
+Signed-off-by: Jijie Shao <shaojijie@huawei.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/hisilicon/hns3/hnae3.h   |  1 +
+ .../hisilicon/hns3/hns3pf/hclge_dcb.c         | 20 +++++--------------
+ .../hisilicon/hns3/hns3pf/hclge_main.c        |  5 +++--
+ .../hisilicon/hns3/hns3pf/hclge_main.h        |  2 --
+ 4 files changed, 9 insertions(+), 19 deletions(-)
+
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+index a4b43bcd2f0c9..aaf1f42624a79 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
++++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+@@ -814,6 +814,7 @@ struct hnae3_tc_info {
+       u8 max_tc; /* Total number of TCs */
+       u8 num_tc; /* Total number of enabled TCs */
+       bool mqprio_active;
++      bool dcb_ets_active;
+ };
+ #define HNAE3_MAX_DSCP                        64
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
+index fad5a5ff3cda5..b98301e205f7f 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
+@@ -259,7 +259,7 @@ static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets)
+       int ret;
+       if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) ||
+-          hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE)
++          h->kinfo.tc_info.mqprio_active)
+               return -EINVAL;
+       ret = hclge_ets_validate(hdev, ets, &num_tc, &map_changed);
+@@ -275,10 +275,7 @@ static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets)
+       }
+       hclge_tm_schd_info_update(hdev, num_tc);
+-      if (num_tc > 1)
+-              hdev->flag |= HCLGE_FLAG_DCB_ENABLE;
+-      else
+-              hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
++      h->kinfo.tc_info.dcb_ets_active = num_tc > 1;
+       ret = hclge_ieee_ets_to_tm_info(hdev, ets);
+       if (ret)
+@@ -487,7 +484,7 @@ static u8 hclge_getdcbx(struct hnae3_handle *h)
+       struct hclge_vport *vport = hclge_get_vport(h);
+       struct hclge_dev *hdev = vport->back;
+-      if (hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE)
++      if (h->kinfo.tc_info.mqprio_active)
+               return 0;
+       return hdev->dcbx_cap;
+@@ -611,7 +608,8 @@ static int hclge_setup_tc(struct hnae3_handle *h,
+       if (!test_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state))
+               return -EBUSY;
+-      if (hdev->flag & HCLGE_FLAG_DCB_ENABLE)
++      kinfo = &vport->nic.kinfo;
++      if (kinfo->tc_info.dcb_ets_active)
+               return -EINVAL;
+       ret = hclge_mqprio_qopt_check(hdev, mqprio_qopt);
+@@ -625,7 +623,6 @@ static int hclge_setup_tc(struct hnae3_handle *h,
+       if (ret)
+               return ret;
+-      kinfo = &vport->nic.kinfo;
+       memcpy(&old_tc_info, &kinfo->tc_info, sizeof(old_tc_info));
+       hclge_sync_mqprio_qopt(&kinfo->tc_info, mqprio_qopt);
+       kinfo->tc_info.mqprio_active = tc > 0;
+@@ -634,13 +631,6 @@ static int hclge_setup_tc(struct hnae3_handle *h,
+       if (ret)
+               goto err_out;
+-      hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
+-
+-      if (tc > 1)
+-              hdev->flag |= HCLGE_FLAG_MQPRIO_ENABLE;
+-      else
+-              hdev->flag &= ~HCLGE_FLAG_MQPRIO_ENABLE;
+-
+       return hclge_notify_init_up(hdev);
+ err_out:
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+index 2d5a2e1ef664d..ce6b658a930cc 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+@@ -11026,6 +11026,7 @@ static void hclge_get_mdix_mode(struct hnae3_handle *handle,
+ static void hclge_info_show(struct hclge_dev *hdev)
+ {
++      struct hnae3_handle *handle = &hdev->vport->nic;
+       struct device *dev = &hdev->pdev->dev;
+       dev_info(dev, "PF info begin:\n");
+@@ -11042,9 +11043,9 @@ static void hclge_info_show(struct hclge_dev *hdev)
+       dev_info(dev, "This is %s PF\n",
+                hdev->flag & HCLGE_FLAG_MAIN ? "main" : "not main");
+       dev_info(dev, "DCB %s\n",
+-               hdev->flag & HCLGE_FLAG_DCB_ENABLE ? "enable" : "disable");
++               handle->kinfo.tc_info.dcb_ets_active ? "enable" : "disable");
+       dev_info(dev, "MQPRIO %s\n",
+-               hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE ? "enable" : "disable");
++               handle->kinfo.tc_info.mqprio_active ? "enable" : "disable");
+       dev_info(dev, "Default tx spare buffer size: %u\n",
+                hdev->tx_spare_buf_size);
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+index 8f76b568c1bf6..70319ce49a1d2 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+@@ -919,8 +919,6 @@ struct hclge_dev {
+ #define HCLGE_FLAG_MAIN                       BIT(0)
+ #define HCLGE_FLAG_DCB_CAPABLE                BIT(1)
+-#define HCLGE_FLAG_DCB_ENABLE         BIT(2)
+-#define HCLGE_FLAG_MQPRIO_ENABLE      BIT(3)
+       u32 flag;
+       u32 pkt_buf_size; /* Total pf buf size for tx/rx */
+-- 
+2.40.1
+
diff --git a/queue-6.5/net-hns3-fix-the-port-information-display-when-sfp-i.patch b/queue-6.5/net-hns3-fix-the-port-information-display-when-sfp-i.patch
new file mode 100644 (file)
index 0000000..6791e79
--- /dev/null
@@ -0,0 +1,39 @@
+From cda81d43b4938ae2f8f59d0d6e342408b52742a4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 6 Sep 2023 15:20:17 +0800
+Subject: net: hns3: fix the port information display when sfp is absent
+
+From: Yisen Zhuang <yisen.zhuang@huawei.com>
+
+[ Upstream commit 674d9591a32d01df75d6b5fffed4ef942a294376 ]
+
+When sfp is absent or unidentified, the port type should be
+displayed as PORT_OTHERS, rather than PORT_FIBRE.
+
+Fixes: 88d10bd6f730 ("net: hns3: add support for multiple media type")
+Signed-off-by: Yisen Zhuang <yisen.zhuang@huawei.com>
+Signed-off-by: Jijie Shao <shaojijie@huawei.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+index 407d30ee55d2e..64858b3114ac9 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+@@ -773,7 +773,9 @@ static int hns3_get_link_ksettings(struct net_device *netdev,
+               hns3_get_ksettings(h, cmd);
+               break;
+       case HNAE3_MEDIA_TYPE_FIBER:
+-              if (module_type == HNAE3_MODULE_TYPE_CR)
++              if (module_type == HNAE3_MODULE_TYPE_UNKNOWN)
++                      cmd->base.port = PORT_OTHER;
++              else if (module_type == HNAE3_MODULE_TYPE_CR)
+                       cmd->base.port = PORT_DA;
+               else
+                       cmd->base.port = PORT_FIBRE;
+-- 
+2.40.1
+
diff --git a/queue-6.5/net-hns3-fix-tx-timeout-issue.patch b/queue-6.5/net-hns3-fix-tx-timeout-issue.patch
new file mode 100644 (file)
index 0000000..526d5af
--- /dev/null
@@ -0,0 +1,79 @@
+From dabe1b0e70aa270baa870856a49fa9b677037a5e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 6 Sep 2023 15:20:12 +0800
+Subject: net: hns3: fix tx timeout issue
+
+From: Jian Shen <shenjian15@huawei.com>
+
+[ Upstream commit 61a1deacc3d4fd3d57d7fda4d935f7f7503e8440 ]
+
+Currently, the driver knocks the ring doorbell before updating
+the ring->last_to_use in tx flow. if the hardware transmiting
+packet and napi poll scheduling are fast enough, it may get
+the old ring->last_to_use in drivers' napi poll.
+In this case, the driver will think the tx is not completed, and
+return directly without clear the flag __QUEUE_STATE_STACK_XOFF,
+which may cause tx timeout.
+
+Fixes: 20d06ca2679c ("net: hns3: optimize the tx clean process")
+Signed-off-by: Jian Shen <shenjian15@huawei.com>
+Signed-off-by: Jijie Shao <shaojijie@huawei.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 17 ++++++++++++-----
+ 1 file changed, 12 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+index b7b51e56b0308..71772213b4448 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+@@ -2102,8 +2102,12 @@ static void hns3_tx_doorbell(struct hns3_enet_ring *ring, int num,
+        */
+       if (test_bit(HNS3_NIC_STATE_TX_PUSH_ENABLE, &priv->state) && num &&
+           !ring->pending_buf && num <= HNS3_MAX_PUSH_BD_NUM && doorbell) {
++              /* This smp_store_release() pairs with smp_load_aquire() in
++               * hns3_nic_reclaim_desc(). Ensure that the BD valid bit
++               * is updated.
++               */
++              smp_store_release(&ring->last_to_use, ring->next_to_use);
+               hns3_tx_push_bd(ring, num);
+-              WRITE_ONCE(ring->last_to_use, ring->next_to_use);
+               return;
+       }
+@@ -2114,6 +2118,11 @@ static void hns3_tx_doorbell(struct hns3_enet_ring *ring, int num,
+               return;
+       }
++      /* This smp_store_release() pairs with smp_load_aquire() in
++       * hns3_nic_reclaim_desc(). Ensure that the BD valid bit is updated.
++       */
++      smp_store_release(&ring->last_to_use, ring->next_to_use);
++
+       if (ring->tqp->mem_base)
+               hns3_tx_mem_doorbell(ring);
+       else
+@@ -2121,7 +2130,6 @@ static void hns3_tx_doorbell(struct hns3_enet_ring *ring, int num,
+                      ring->tqp->io_base + HNS3_RING_TX_RING_TAIL_REG);
+       ring->pending_buf = 0;
+-      WRITE_ONCE(ring->last_to_use, ring->next_to_use);
+ }
+ static void hns3_tsyn(struct net_device *netdev, struct sk_buff *skb,
+@@ -3562,9 +3570,8 @@ static void hns3_reuse_buffer(struct hns3_enet_ring *ring, int i)
+ static bool hns3_nic_reclaim_desc(struct hns3_enet_ring *ring,
+                                 int *bytes, int *pkts, int budget)
+ {
+-      /* pair with ring->last_to_use update in hns3_tx_doorbell(),
+-       * smp_store_release() is not used in hns3_tx_doorbell() because
+-       * the doorbell operation already have the needed barrier operation.
++      /* This smp_load_acquire() pairs with smp_store_release() in
++       * hns3_tx_doorbell().
+        */
+       int ltu = smp_load_acquire(&ring->last_to_use);
+       int ntc = ring->next_to_clean;
+-- 
+2.40.1
+
diff --git a/queue-6.5/net-hns3-remove-gso-partial-feature-bit.patch b/queue-6.5/net-hns3-remove-gso-partial-feature-bit.patch
new file mode 100644 (file)
index 0000000..1549593
--- /dev/null
@@ -0,0 +1,39 @@
+From 60fb8375bfd6a5847b401ade56e5fd4c58906c47 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 6 Sep 2023 15:20:18 +0800
+Subject: net: hns3: remove GSO partial feature bit
+
+From: Jie Wang <wangjie125@huawei.com>
+
+[ Upstream commit 60326634f6c54528778de18bfef1e8a7a93b3771 ]
+
+HNS3 NIC does not support GSO partial packets segmentation. Actually tunnel
+packets for example NvGRE packets segment offload and checksum offload is
+already supported. There is no need to keep gso partial feature bit. So
+this patch removes it.
+
+Fixes: 76ad4f0ee747 ("net: hns3: Add support of HNS3 Ethernet Driver for hip08 SoC")
+Signed-off-by: Jie Wang <wangjie125@huawei.com>
+Signed-off-by: Jijie Shao <shaojijie@huawei.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+index 71772213b4448..613d0a779cef2 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+@@ -3315,8 +3315,6 @@ static void hns3_set_default_feature(struct net_device *netdev)
+       netdev->priv_flags |= IFF_UNICAST_FLT;
+-      netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM;
+-
+       netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER |
+               NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
+               NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO |
+-- 
+2.40.1
+
diff --git a/queue-6.5/net-ipv6-addrconf-avoid-integer-underflow-in-ipv6_cr.patch b/queue-6.5/net-ipv6-addrconf-avoid-integer-underflow-in-ipv6_cr.patch
new file mode 100644 (file)
index 0000000..8400d9a
--- /dev/null
@@ -0,0 +1,40 @@
+From cf5001ad021421160d0db73e2f2e6911539c053e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 31 Aug 2023 22:41:27 -0600
+Subject: net: ipv6/addrconf: avoid integer underflow in ipv6_create_tempaddr
+
+From: Alex Henrie <alexhenrie24@gmail.com>
+
+[ Upstream commit f31867d0d9d82af757c1e0178b659438f4c1ea3c ]
+
+The existing code incorrectly casted a negative value (the result of a
+subtraction) to an unsigned value without checking. For example, if
+/proc/sys/net/ipv6/conf/*/temp_prefered_lft was set to 1, the preferred
+lifetime would jump to 4 billion seconds. On my machine and network the
+shortest lifetime that avoided underflow was 3 seconds.
+
+Fixes: 76506a986dc3 ("IPv6: fix DESYNC_FACTOR")
+Signed-off-by: Alex Henrie <alexhenrie24@gmail.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/addrconf.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
+index 94cec2075eee8..c93a2b9a91723 100644
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -1368,7 +1368,7 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, bool block)
+        * idev->desync_factor if it's larger
+        */
+       cnf_temp_preferred_lft = READ_ONCE(idev->cnf.temp_prefered_lft);
+-      max_desync_factor = min_t(__u32,
++      max_desync_factor = min_t(long,
+                                 idev->cnf.max_desync_factor,
+                                 cnf_temp_preferred_lft - regen_advance);
+-- 
+2.40.1
+
diff --git a/queue-6.5/net-mlx5-give-esw_offloads_load-unload_rep-mlx5_-pre.patch b/queue-6.5/net-mlx5-give-esw_offloads_load-unload_rep-mlx5_-pre.patch
new file mode 100644 (file)
index 0000000..f830b94
--- /dev/null
@@ -0,0 +1,112 @@
+From 0ea3193105a27c651c5a8cdc26fad0004ad54ae8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 25 May 2023 09:42:09 +0200
+Subject: net/mlx5: Give esw_offloads_load/unload_rep() "mlx5_" prefix
+
+From: Jiri Pirko <jiri@nvidia.com>
+
+[ Upstream commit 9eca8bb8da4385b02bd02b6876af8d4225bf4713 ]
+
+As esw_offloads_load/unload_rep() are used outside eswitch.c it is nicer
+for them to have "mlx5_" prefix. Add it.
+
+Signed-off-by: Jiri Pirko <jiri@nvidia.com>
+Reviewed-by: Shay Drory <shayd@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Stable-dep-of: 344134609a56 ("mlx5/core: E-Switch, Create ACL FT for eswitch manager in switchdev mode")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/eswitch.c      |  4 ++--
+ drivers/net/ethernet/mellanox/mlx5/core/eswitch.h      |  4 ++--
+ .../net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 10 +++++-----
+ 3 files changed, 9 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+index 243c455f10297..85ddf6f7e37df 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+@@ -1078,7 +1078,7 @@ int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num,
+       if (err)
+               return err;
+-      err = esw_offloads_load_rep(esw, vport_num);
++      err = mlx5_esw_offloads_load_rep(esw, vport_num);
+       if (err)
+               goto err_rep;
+@@ -1091,7 +1091,7 @@ int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num,
+ void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, u16 vport_num)
+ {
+-      esw_offloads_unload_rep(esw, vport_num);
++      mlx5_esw_offloads_unload_rep(esw, vport_num);
+       mlx5_esw_vport_disable(esw, vport_num);
+ }
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+index ae0dc8a3060d7..040ed6d79258f 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+@@ -725,8 +725,8 @@ void mlx5_esw_set_spec_source_port(struct mlx5_eswitch *esw,
+                                  u16 vport,
+                                  struct mlx5_flow_spec *spec);
+-int esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num);
+-void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num);
++int mlx5_esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num);
++void mlx5_esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num);
+ int mlx5_esw_offloads_rep_load(struct mlx5_eswitch *esw, u16 vport_num);
+ void mlx5_esw_offloads_rep_unload(struct mlx5_eswitch *esw, u16 vport_num);
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+index e59380ee1ead3..1eb49784c0e11 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+@@ -2424,7 +2424,7 @@ void mlx5_esw_offloads_rep_unload(struct mlx5_eswitch *esw, u16 vport_num)
+               __esw_offloads_unload_rep(esw, rep, rep_type);
+ }
+-int esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num)
++int mlx5_esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num)
+ {
+       int err;
+@@ -2448,7 +2448,7 @@ int esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num)
+       return err;
+ }
+-void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num)
++void mlx5_esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num)
+ {
+       if (esw->mode != MLX5_ESWITCH_OFFLOADS)
+               return;
+@@ -3355,7 +3355,7 @@ int esw_offloads_enable(struct mlx5_eswitch *esw)
+                       vport->info.link_state = MLX5_VPORT_ADMIN_STATE_DOWN;
+       /* Uplink vport rep must load first. */
+-      err = esw_offloads_load_rep(esw, MLX5_VPORT_UPLINK);
++      err = mlx5_esw_offloads_load_rep(esw, MLX5_VPORT_UPLINK);
+       if (err)
+               goto err_uplink;
+@@ -3366,7 +3366,7 @@ int esw_offloads_enable(struct mlx5_eswitch *esw)
+       return 0;
+ err_vports:
+-      esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK);
++      mlx5_esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK);
+ err_uplink:
+       esw_offloads_steering_cleanup(esw);
+ err_steering_init:
+@@ -3404,7 +3404,7 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw,
+ void esw_offloads_disable(struct mlx5_eswitch *esw)
+ {
+       mlx5_eswitch_disable_pf_vf_vports(esw);
+-      esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK);
++      mlx5_esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK);
+       esw_set_passing_vport_metadata(esw, false);
+       esw_offloads_steering_cleanup(esw);
+       mapping_destroy(esw->offloads.reg_c0_obj_pool);
+-- 
+2.40.1
+
diff --git a/queue-6.5/net-mlx5-push-devlink-port-pf-vf-init-cleanup-calls-.patch b/queue-6.5/net-mlx5-push-devlink-port-pf-vf-init-cleanup-calls-.patch
new file mode 100644 (file)
index 0000000..d2b8219
--- /dev/null
@@ -0,0 +1,258 @@
+From 956fc0e57a32801f5cc318e43a88dee755c76f40 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 25 May 2023 10:01:02 +0200
+Subject: net/mlx5: Push devlink port PF/VF init/cleanup calls out of
+ devlink_port_register/unregister()
+
+From: Jiri Pirko <jiri@nvidia.com>
+
+[ Upstream commit d9833bcfe840fff5d368b1c7c68e05c95be8d19c ]
+
+In order to prepare for
+mlx5_esw_offloads_devlink_port_register/unregister() to be used
+for SFs as well, push out the PF/VF specific init/cleanup calls outside.
+Introduce mlx5_eswitch_load/unload_pf_vf_vport() and call them from
+there. Use these new helpers of PF/VF loading and make
+mlx5_eswitch_local/unload_vport() reusable for SFs.
+
+Signed-off-by: Jiri Pirko <jiri@nvidia.com>
+Reviewed-by: Shay Drory <shayd@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Stable-dep-of: 344134609a56 ("mlx5/core: E-Switch, Create ACL FT for eswitch manager in switchdev mode")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../mellanox/mlx5/core/esw/devlink_port.c     | 13 ++----
+ .../net/ethernet/mellanox/mlx5/core/eswitch.c | 45 ++++++++++++++-----
+ .../net/ethernet/mellanox/mlx5/core/eswitch.h |  4 ++
+ .../mellanox/mlx5/core/eswitch_offloads.c     | 16 +++++++
+ 4 files changed, 58 insertions(+), 20 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
+index 463bde802e45e..2170539461fa2 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
+@@ -62,7 +62,7 @@ static void mlx5_esw_offloads_pf_vf_devlink_port_attrs_set(struct mlx5_eswitch *
+       }
+ }
+-static int mlx5_esw_offloads_pf_vf_devlink_port_init(struct mlx5_eswitch *esw, u16 vport_num)
++int mlx5_esw_offloads_pf_vf_devlink_port_init(struct mlx5_eswitch *esw, u16 vport_num)
+ {
+       struct devlink_port *dl_port;
+       struct mlx5_vport *vport;
+@@ -84,7 +84,7 @@ static int mlx5_esw_offloads_pf_vf_devlink_port_init(struct mlx5_eswitch *esw, u
+       return 0;
+ }
+-static void mlx5_esw_offloads_pf_vf_devlink_port_cleanup(struct mlx5_eswitch *esw, u16 vport_num)
++void mlx5_esw_offloads_pf_vf_devlink_port_cleanup(struct mlx5_eswitch *esw, u16 vport_num)
+ {
+       struct mlx5_vport *vport;
+@@ -118,10 +118,6 @@ int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_
+       if (IS_ERR(vport))
+               return PTR_ERR(vport);
+-      err = mlx5_esw_offloads_pf_vf_devlink_port_init(esw, vport_num);
+-      if (err)
+-              return err;
+-
+       dl_port = vport->dl_port;
+       if (!dl_port)
+               return 0;
+@@ -131,7 +127,7 @@ int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_
+       err = devl_port_register_with_ops(devlink, dl_port, dl_port_index,
+                                         &mlx5_esw_dl_port_ops);
+       if (err)
+-              goto reg_err;
++              return err;
+       err = devl_rate_leaf_create(dl_port, vport, NULL);
+       if (err)
+@@ -141,8 +137,6 @@ int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_
+ rate_err:
+       devl_port_unregister(dl_port);
+-reg_err:
+-      mlx5_esw_offloads_pf_vf_devlink_port_cleanup(esw, vport_num);
+       return err;
+ }
+@@ -160,7 +154,6 @@ void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vpo
+       }
+       devl_port_unregister(vport->dl_port);
+-      mlx5_esw_offloads_pf_vf_devlink_port_cleanup(esw, vport_num);
+ }
+ struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num)
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+index 85ddf6f7e37df..591184d892af6 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+@@ -1095,6 +1095,31 @@ void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, u16 vport_num)
+       mlx5_esw_vport_disable(esw, vport_num);
+ }
++static int mlx5_eswitch_load_pf_vf_vport(struct mlx5_eswitch *esw, u16 vport_num,
++                                       enum mlx5_eswitch_vport_event enabled_events)
++{
++      int err;
++
++      err = mlx5_esw_offloads_init_pf_vf_rep(esw, vport_num);
++      if (err)
++              return err;
++
++      err = mlx5_eswitch_load_vport(esw, vport_num, enabled_events);
++      if (err)
++              goto err_load;
++      return 0;
++
++err_load:
++      mlx5_esw_offloads_cleanup_pf_vf_rep(esw, vport_num);
++      return err;
++}
++
++static void mlx5_eswitch_unload_pf_vf_vport(struct mlx5_eswitch *esw, u16 vport_num)
++{
++      mlx5_eswitch_unload_vport(esw, vport_num);
++      mlx5_esw_offloads_cleanup_pf_vf_rep(esw, vport_num);
++}
++
+ void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs)
+ {
+       struct mlx5_vport *vport;
+@@ -1103,7 +1128,7 @@ void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs)
+       mlx5_esw_for_each_vf_vport(esw, i, vport, num_vfs) {
+               if (!vport->enabled)
+                       continue;
+-              mlx5_eswitch_unload_vport(esw, vport->vport);
++              mlx5_eswitch_unload_pf_vf_vport(esw, vport->vport);
+       }
+ }
+@@ -1116,7 +1141,7 @@ static void mlx5_eswitch_unload_ec_vf_vports(struct mlx5_eswitch *esw,
+       mlx5_esw_for_each_ec_vf_vport(esw, i, vport, num_ec_vfs) {
+               if (!vport->enabled)
+                       continue;
+-              mlx5_eswitch_unload_vport(esw, vport->vport);
++              mlx5_eswitch_unload_pf_vf_vport(esw, vport->vport);
+       }
+ }
+@@ -1128,7 +1153,7 @@ int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs,
+       int err;
+       mlx5_esw_for_each_vf_vport(esw, i, vport, num_vfs) {
+-              err = mlx5_eswitch_load_vport(esw, vport->vport, enabled_events);
++              err = mlx5_eswitch_load_pf_vf_vport(esw, vport->vport, enabled_events);
+               if (err)
+                       goto vf_err;
+       }
+@@ -1148,7 +1173,7 @@ static int mlx5_eswitch_load_ec_vf_vports(struct mlx5_eswitch *esw, u16 num_ec_v
+       int err;
+       mlx5_esw_for_each_ec_vf_vport(esw, i, vport, num_ec_vfs) {
+-              err = mlx5_eswitch_load_vport(esw, vport->vport, enabled_events);
++              err = mlx5_eswitch_load_pf_vf_vport(esw, vport->vport, enabled_events);
+               if (err)
+                       goto vf_err;
+       }
+@@ -1190,7 +1215,7 @@ mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw,
+       int ret;
+       /* Enable PF vport */
+-      ret = mlx5_eswitch_load_vport(esw, MLX5_VPORT_PF, enabled_events);
++      ret = mlx5_eswitch_load_pf_vf_vport(esw, MLX5_VPORT_PF, enabled_events);
+       if (ret)
+               return ret;
+@@ -1201,7 +1226,7 @@ mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw,
+       /* Enable ECPF vport */
+       if (mlx5_ecpf_vport_exists(esw->dev)) {
+-              ret = mlx5_eswitch_load_vport(esw, MLX5_VPORT_ECPF, enabled_events);
++              ret = mlx5_eswitch_load_pf_vf_vport(esw, MLX5_VPORT_ECPF, enabled_events);
+               if (ret)
+                       goto ecpf_err;
+               if (mlx5_core_ec_sriov_enabled(esw->dev)) {
+@@ -1224,11 +1249,11 @@ mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw,
+               mlx5_eswitch_unload_ec_vf_vports(esw, esw->esw_funcs.num_ec_vfs);
+ ec_vf_err:
+       if (mlx5_ecpf_vport_exists(esw->dev))
+-              mlx5_eswitch_unload_vport(esw, MLX5_VPORT_ECPF);
++              mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_ECPF);
+ ecpf_err:
+       host_pf_disable_hca(esw->dev);
+ pf_hca_err:
+-      mlx5_eswitch_unload_vport(esw, MLX5_VPORT_PF);
++      mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF);
+       return ret;
+ }
+@@ -1242,11 +1267,11 @@ void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw)
+       if (mlx5_ecpf_vport_exists(esw->dev)) {
+               if (mlx5_core_ec_sriov_enabled(esw->dev))
+                       mlx5_eswitch_unload_ec_vf_vports(esw, esw->esw_funcs.num_vfs);
+-              mlx5_eswitch_unload_vport(esw, MLX5_VPORT_ECPF);
++              mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_ECPF);
+       }
+       host_pf_disable_hca(esw->dev);
+-      mlx5_eswitch_unload_vport(esw, MLX5_VPORT_PF);
++      mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF);
+ }
+ static void mlx5_eswitch_get_devlink_param(struct mlx5_eswitch *esw)
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+index 040ed6d79258f..56d9a261a5c80 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+@@ -725,6 +725,8 @@ void mlx5_esw_set_spec_source_port(struct mlx5_eswitch *esw,
+                                  u16 vport,
+                                  struct mlx5_flow_spec *spec);
++int mlx5_esw_offloads_init_pf_vf_rep(struct mlx5_eswitch *esw, u16 vport_num);
++void mlx5_esw_offloads_cleanup_pf_vf_rep(struct mlx5_eswitch *esw, u16 vport_num);
+ int mlx5_esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num);
+ void mlx5_esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num);
+@@ -739,6 +741,8 @@ int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs,
+                               enum mlx5_eswitch_vport_event enabled_events);
+ void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs);
++int mlx5_esw_offloads_pf_vf_devlink_port_init(struct mlx5_eswitch *esw, u16 vport_num);
++void mlx5_esw_offloads_pf_vf_devlink_port_cleanup(struct mlx5_eswitch *esw, u16 vport_num);
+ int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_num);
+ void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vport_num);
+ struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num);
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+index 1eb49784c0e11..2f8bab73643e2 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+@@ -2424,6 +2424,22 @@ void mlx5_esw_offloads_rep_unload(struct mlx5_eswitch *esw, u16 vport_num)
+               __esw_offloads_unload_rep(esw, rep, rep_type);
+ }
++int mlx5_esw_offloads_init_pf_vf_rep(struct mlx5_eswitch *esw, u16 vport_num)
++{
++      if (esw->mode != MLX5_ESWITCH_OFFLOADS)
++              return 0;
++
++      return mlx5_esw_offloads_pf_vf_devlink_port_init(esw, vport_num);
++}
++
++void mlx5_esw_offloads_cleanup_pf_vf_rep(struct mlx5_eswitch *esw, u16 vport_num)
++{
++      if (esw->mode != MLX5_ESWITCH_OFFLOADS)
++              return;
++
++      mlx5_esw_offloads_pf_vf_devlink_port_cleanup(esw, vport_num);
++}
++
+ int mlx5_esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num)
+ {
+       int err;
+-- 
+2.40.1
+
diff --git a/queue-6.5/net-mlx5-rework-devlink-port-alloc-free-into-init-cl.patch b/queue-6.5/net-mlx5-rework-devlink-port-alloc-free-into-init-cl.patch
new file mode 100644 (file)
index 0000000..281ea67
--- /dev/null
@@ -0,0 +1,160 @@
+From cce906617266ed80ac8c6ea0012a3529360b0b28 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 24 May 2023 17:46:47 +0200
+Subject: net/mlx5: Rework devlink port alloc/free into init/cleanup
+
+From: Jiri Pirko <jiri@nvidia.com>
+
+[ Upstream commit 4c0dac1ef8abc6295a91197884f5ceb5d11c2bd9 ]
+
+In order to prepare the devlink port registration function to be common
+for PFs/VFs and SFs, change the existing devlink port allocation and
+free functions into PF/VF init and cleanup, so similar helpers could be
+later on introduced for SFs. Make the init/cleanup helpers responsible
+for setting/clearing the vport->dl_port pointer.
+
+Signed-off-by: Jiri Pirko <jiri@nvidia.com>
+Reviewed-by: Shay Drory <shayd@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Stable-dep-of: 344134609a56 ("mlx5/core: E-Switch, Create ACL FT for eswitch manager in switchdev mode")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../mellanox/mlx5/core/esw/devlink_port.c     | 65 ++++++++++++-------
+ 1 file changed, 43 insertions(+), 22 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
+index fdf2be548e855..463bde802e45e 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
+@@ -22,20 +22,17 @@ static bool mlx5_esw_devlink_port_supported(struct mlx5_eswitch *esw, u16 vport_
+              mlx5_core_is_ec_vf_vport(esw->dev, vport_num);
+ }
+-static struct devlink_port *mlx5_esw_dl_port_alloc(struct mlx5_eswitch *esw, u16 vport_num)
++static void mlx5_esw_offloads_pf_vf_devlink_port_attrs_set(struct mlx5_eswitch *esw,
++                                                         u16 vport_num,
++                                                         struct devlink_port *dl_port)
+ {
+       struct mlx5_core_dev *dev = esw->dev;
+       struct devlink_port_attrs attrs = {};
+       struct netdev_phys_item_id ppid = {};
+-      struct devlink_port *dl_port;
+       u32 controller_num = 0;
+       bool external;
+       u16 pfnum;
+-      dl_port = kzalloc(sizeof(*dl_port), GFP_KERNEL);
+-      if (!dl_port)
+-              return NULL;
+-
+       mlx5_esw_get_port_parent_id(dev, &ppid);
+       pfnum = mlx5_get_dev_index(dev);
+       external = mlx5_core_is_ecpf_esw_manager(dev);
+@@ -63,12 +60,40 @@ static struct devlink_port *mlx5_esw_dl_port_alloc(struct mlx5_eswitch *esw, u16
+               devlink_port_attrs_pci_vf_set(dl_port, 0, pfnum,
+                                             vport_num - 1, false);
+       }
+-      return dl_port;
+ }
+-static void mlx5_esw_dl_port_free(struct devlink_port *dl_port)
++static int mlx5_esw_offloads_pf_vf_devlink_port_init(struct mlx5_eswitch *esw, u16 vport_num)
++{
++      struct devlink_port *dl_port;
++      struct mlx5_vport *vport;
++
++      if (!mlx5_esw_devlink_port_supported(esw, vport_num))
++              return 0;
++
++      vport = mlx5_eswitch_get_vport(esw, vport_num);
++      if (IS_ERR(vport))
++              return PTR_ERR(vport);
++
++      dl_port = kzalloc(sizeof(*dl_port), GFP_KERNEL);
++      if (!dl_port)
++              return -ENOMEM;
++
++      mlx5_esw_offloads_pf_vf_devlink_port_attrs_set(esw, vport_num, dl_port);
++
++      vport->dl_port = dl_port;
++      return 0;
++}
++
++static void mlx5_esw_offloads_pf_vf_devlink_port_cleanup(struct mlx5_eswitch *esw, u16 vport_num)
+ {
+-      kfree(dl_port);
++      struct mlx5_vport *vport;
++
++      vport = mlx5_eswitch_get_vport(esw, vport_num);
++      if (IS_ERR(vport) || !vport->dl_port)
++              return;
++
++      kfree(vport->dl_port);
++      vport->dl_port = NULL;
+ }
+ static const struct devlink_port_ops mlx5_esw_dl_port_ops = {
+@@ -89,16 +114,17 @@ int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_
+       struct devlink *devlink;
+       int err;
+-      if (!mlx5_esw_devlink_port_supported(esw, vport_num))
+-              return 0;
+-
+       vport = mlx5_eswitch_get_vport(esw, vport_num);
+       if (IS_ERR(vport))
+               return PTR_ERR(vport);
+-      dl_port = mlx5_esw_dl_port_alloc(esw, vport_num);
++      err = mlx5_esw_offloads_pf_vf_devlink_port_init(esw, vport_num);
++      if (err)
++              return err;
++
++      dl_port = vport->dl_port;
+       if (!dl_port)
+-              return -ENOMEM;
++              return 0;
+       devlink = priv_to_devlink(dev);
+       dl_port_index = mlx5_esw_vport_to_devlink_port_index(dev, vport_num);
+@@ -111,13 +137,12 @@ int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_
+       if (err)
+               goto rate_err;
+-      vport->dl_port = dl_port;
+       return 0;
+ rate_err:
+       devl_port_unregister(dl_port);
+ reg_err:
+-      mlx5_esw_dl_port_free(dl_port);
++      mlx5_esw_offloads_pf_vf_devlink_port_cleanup(esw, vport_num);
+       return err;
+ }
+@@ -125,11 +150,8 @@ void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vpo
+ {
+       struct mlx5_vport *vport;
+-      if (!mlx5_esw_devlink_port_supported(esw, vport_num))
+-              return;
+-
+       vport = mlx5_eswitch_get_vport(esw, vport_num);
+-      if (IS_ERR(vport))
++      if (IS_ERR(vport) || !vport->dl_port)
+               return;
+       if (vport->dl_port->devlink_rate) {
+@@ -138,8 +160,7 @@ void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vpo
+       }
+       devl_port_unregister(vport->dl_port);
+-      mlx5_esw_dl_port_free(vport->dl_port);
+-      vport->dl_port = NULL;
++      mlx5_esw_offloads_pf_vf_devlink_port_cleanup(esw, vport_num);
+ }
+ struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num)
+-- 
+2.40.1
+
diff --git a/queue-6.5/net-mlx5e-clear-mirred-devices-array-if-the-rule-is-.patch b/queue-6.5/net-mlx5e-clear-mirred-devices-array-if-the-rule-is-.patch
new file mode 100644 (file)
index 0000000..0c2c8c1
--- /dev/null
@@ -0,0 +1,140 @@
+From c4a4415f7ebae22af4048266f13179e7a629cd0a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 5 Sep 2023 10:48:45 -0700
+Subject: net/mlx5e: Clear mirred devices array if the rule is split
+
+From: Jianbo Liu <jianbol@nvidia.com>
+
+[ Upstream commit b7558a77529fef60e7992f40fb5353fed8be0cf8 ]
+
+In the cited commit, the mirred devices are recorded and checked while
+parsing the actions. In order to avoid system crash, the duplicate
+action in a single rule is not allowed.
+
+But the rule is actually break down into several FTEs in different
+tables, for either mirroring, or the specified types of actions which
+use post action infrastructure.
+
+It will reject certain action list by mistake, for example:
+    actions:enp8s0f0_1,set(ipv4(ttl=63)),enp8s0f0_0,enp8s0f0_1.
+Here the rule is split to two FTEs because of pedit action.
+
+To fix this issue, when parsing the rule actions, reset if_count to
+clear the mirred devices array if the rule is split to multiple
+FTEs, and then the duplicate checking is restarted.
+
+Fixes: 554fe75c1b3f ("net/mlx5e: Avoid duplicating rule destinations")
+Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
+Reviewed-by: Vlad Buslov <vladbu@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c        | 4 +++-
+ drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c    | 1 +
+ drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c     | 4 +++-
+ .../ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c  | 1 +
+ drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c      | 1 +
+ .../net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c   | 4 +++-
+ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c               | 1 +
+ 7 files changed, 13 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c
+index 92d3952dfa8b7..feeb41693c176 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c
+@@ -17,8 +17,10 @@ tc_act_parse_ct(struct mlx5e_tc_act_parse_state *parse_state,
+       if (err)
+               return err;
+-      if (mlx5e_is_eswitch_flow(parse_state->flow))
++      if (mlx5e_is_eswitch_flow(parse_state->flow)) {
+               attr->esw_attr->split_count = attr->esw_attr->out_count;
++              parse_state->if_count = 0;
++      }
+       attr->flags |= MLX5_ATTR_FLAG_CT;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
+index 291193f7120d5..f63402c480280 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
+@@ -294,6 +294,7 @@ parse_mirred_ovs_master(struct mlx5e_tc_act_parse_state *parse_state,
+       if (err)
+               return err;
++      parse_state->if_count = 0;
+       esw_attr->out_count++;
+       return 0;
+ }
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c
+index 3b272bbf4c538..368a95fa77d32 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c
+@@ -98,8 +98,10 @@ tc_act_parse_pedit(struct mlx5e_tc_act_parse_state *parse_state,
+       attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+-      if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
++      if (ns_type == MLX5_FLOW_NAMESPACE_FDB) {
+               esw_attr->split_count = esw_attr->out_count;
++              parse_state->if_count = 0;
++      }
+       return 0;
+ }
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c
+index ad09a8a5f36e0..2d1d4a04501b4 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c
+@@ -66,6 +66,7 @@ tc_act_parse_redirect_ingress(struct mlx5e_tc_act_parse_state *parse_state,
+       if (err)
+               return err;
++      parse_state->if_count = 0;
+       esw_attr->out_count++;
+       return 0;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c
+index c8a3eaf189f6a..a13c5e707b83c 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c
+@@ -166,6 +166,7 @@ tc_act_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state,
+               return err;
+       esw_attr->split_count = esw_attr->out_count;
++      parse_state->if_count = 0;
+       return 0;
+ }
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c
+index 310b992307607..f17575b09788d 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c
+@@ -65,8 +65,10 @@ tc_act_parse_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state,
+       if (err)
+               return err;
+-      if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
++      if (ns_type == MLX5_FLOW_NAMESPACE_FDB) {
+               attr->esw_attr->split_count = attr->esw_attr->out_count;
++              parse_state->if_count = 0;
++      }
+       return 0;
+ }
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+index 31708d5aa6087..4b22a91482cec 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+@@ -3939,6 +3939,7 @@ parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state,
+                       }
+                       i_split = i + 1;
++                      parse_state->if_count = 0;
+                       list_add(&attr->list, &flow->attrs);
+               }
+-- 
+2.40.1
+
diff --git a/queue-6.5/net-phy-micrel-correct-bit-assignments-for-phy_devic.patch b/queue-6.5/net-phy-micrel-correct-bit-assignments-for-phy_devic.patch
new file mode 100644 (file)
index 0000000..63403cb
--- /dev/null
@@ -0,0 +1,54 @@
+From 01eaadd917df3f9fe80794f7279edef182d1dcef Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 1 Sep 2023 06:53:23 +0200
+Subject: net: phy: micrel: Correct bit assignments for phy_device flags
+
+From: Oleksij Rempel <o.rempel@pengutronix.de>
+
+[ Upstream commit 719c5e37e99d2fd588d1c994284d17650a66354c ]
+
+Previously, the defines for phy_device flags in the Micrel driver were
+ambiguous in their representation. They were intended to be bit masks
+but were mistakenly defined as bit positions. This led to the following
+issues:
+
+- MICREL_KSZ8_P1_ERRATA, designated for KSZ88xx switches, overlapped
+  with MICREL_PHY_FXEN and MICREL_PHY_50MHZ_CLK.
+- Due to this overlap, the code path for MICREL_PHY_FXEN, tailored for
+  the KSZ8041 PHY, was not executed for KSZ88xx PHYs.
+- Similarly, the code associated with MICREL_PHY_50MHZ_CLK wasn't
+  triggered for KSZ88xx.
+
+To rectify this, all three flags have now been explicitly converted to
+use the `BIT()` macro, ensuring they are defined as bit masks and
+preventing potential overlaps in the future.
+
+Fixes: 49011e0c1555 ("net: phy: micrel: ksz886x/ksz8081: add cabletest support")
+Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
+Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/micrel_phy.h | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h
+index 8bef1ab62bba3..322d872559847 100644
+--- a/include/linux/micrel_phy.h
++++ b/include/linux/micrel_phy.h
+@@ -41,9 +41,9 @@
+ #define       PHY_ID_KSZ9477          0x00221631
+ /* struct phy_device dev_flags definitions */
+-#define MICREL_PHY_50MHZ_CLK  0x00000001
+-#define MICREL_PHY_FXEN               0x00000002
+-#define MICREL_KSZ8_P1_ERRATA 0x00000003
++#define MICREL_PHY_50MHZ_CLK  BIT(0)
++#define MICREL_PHY_FXEN               BIT(1)
++#define MICREL_KSZ8_P1_ERRATA BIT(2)
+ #define MICREL_KSZ9021_EXTREG_CTRL    0xB
+ #define MICREL_KSZ9021_EXTREG_DATA_WRITE      0xC
+-- 
+2.40.1
+
diff --git a/queue-6.5/net-phy-provide-module-4-ksz9477-errata-ds80000754c.patch b/queue-6.5/net-phy-provide-module-4-ksz9477-errata-ds80000754c.patch
new file mode 100644 (file)
index 0000000..c52ff73
--- /dev/null
@@ -0,0 +1,114 @@
+From 7f11018b7e97ce5357a1b204dc71f6b2b0a330a2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 5 Sep 2023 11:33:15 +0200
+Subject: net: phy: Provide Module 4 KSZ9477 errata (DS80000754C)
+
+From: Lukasz Majewski <lukma@denx.de>
+
+[ Upstream commit 08c6d8bae48c2c28f7017d7b61b5d5a1518ceb39 ]
+
+The KSZ9477 errata points out (in 'Module 4') the link up/down problems
+when EEE (Energy Efficient Ethernet) is enabled in the device to which
+the KSZ9477 tries to auto negotiate.
+
+The suggested workaround is to clear advertisement of EEE for PHYs in
+this chip driver.
+
+To avoid regressions with other switch ICs the new MICREL_NO_EEE flag
+has been introduced.
+
+Moreover, the in-register disablement of MMD_DEVICE_ID_EEE_ADV.MMD_EEE_ADV
+MMD register is removed, as this code is both; now executed too late
+(after previous rework of the PHY and DSA for KSZ switches) and not
+required as setting all members of eee_broken_modes bit field prevents
+the KSZ9477 from advertising EEE.
+
+Fixes: 69d3b36ca045 ("net: dsa: microchip: enable EEE support") # for KSZ9477
+Signed-off-by: Lukasz Majewski <lukma@denx.de>
+Tested-by: Oleksij Rempel <o.rempel@pengutronix.de> # Confirmed disabled EEE with oscilloscope.
+Reviewed-by: Oleksij Rempel <o.rempel@pengutronix.de>
+Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
+Link: https://lore.kernel.org/r/20230905093315.784052-1-lukma@denx.de
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/microchip/ksz_common.c | 16 +++++++++++++++-
+ drivers/net/phy/micrel.c               |  9 ++++++---
+ include/linux/micrel_phy.h             |  1 +
+ 3 files changed, 22 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
+index 6c0623f88654e..d9d843efd111f 100644
+--- a/drivers/net/dsa/microchip/ksz_common.c
++++ b/drivers/net/dsa/microchip/ksz_common.c
+@@ -2337,13 +2337,27 @@ static u32 ksz_get_phy_flags(struct dsa_switch *ds, int port)
+ {
+       struct ksz_device *dev = ds->priv;
+-      if (dev->chip_id == KSZ8830_CHIP_ID) {
++      switch (dev->chip_id) {
++      case KSZ8830_CHIP_ID:
+               /* Silicon Errata Sheet (DS80000830A):
+                * Port 1 does not work with LinkMD Cable-Testing.
+                * Port 1 does not respond to received PAUSE control frames.
+                */
+               if (!port)
+                       return MICREL_KSZ8_P1_ERRATA;
++              break;
++      case KSZ9477_CHIP_ID:
++              /* KSZ9477 Errata DS80000754C
++               *
++               * Module 4: Energy Efficient Ethernet (EEE) feature select must
++               * be manually disabled
++               *   The EEE feature is enabled by default, but it is not fully
++               *   operational. It must be manually disabled through register
++               *   controls. If not disabled, the PHY ports can auto-negotiate
++               *   to enable EEE, and this feature can cause link drops when
++               *   linked to another device supporting EEE.
++               */
++              return MICREL_NO_EEE;
+       }
+       return 0;
+diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
+index b6d7981b2d1ee..927d3d54658ef 100644
+--- a/drivers/net/phy/micrel.c
++++ b/drivers/net/phy/micrel.c
+@@ -1800,9 +1800,6 @@ static const struct ksz9477_errata_write ksz9477_errata_writes[] = {
+       /* Transmit waveform amplitude can be improved (1000BASE-T, 100BASE-TX, 10BASE-Te) */
+       {0x1c, 0x04, 0x00d0},
+-      /* Energy Efficient Ethernet (EEE) feature select must be manually disabled */
+-      {0x07, 0x3c, 0x0000},
+-
+       /* Register settings are required to meet data sheet supply current specifications */
+       {0x1c, 0x13, 0x6eff},
+       {0x1c, 0x14, 0xe6ff},
+@@ -1847,6 +1844,12 @@ static int ksz9477_config_init(struct phy_device *phydev)
+                       return err;
+       }
++      /* According to KSZ9477 Errata DS80000754C (Module 4) all EEE modes
++       * in this switch shall be regarded as broken.
++       */
++      if (phydev->dev_flags & MICREL_NO_EEE)
++              phydev->eee_broken_modes = -1;
++
+       err = genphy_restart_aneg(phydev);
+       if (err)
+               return err;
+diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h
+index 322d872559847..4e27ca7c49def 100644
+--- a/include/linux/micrel_phy.h
++++ b/include/linux/micrel_phy.h
+@@ -44,6 +44,7 @@
+ #define MICREL_PHY_50MHZ_CLK  BIT(0)
+ #define MICREL_PHY_FXEN               BIT(1)
+ #define MICREL_KSZ8_P1_ERRATA BIT(2)
++#define MICREL_NO_EEE         BIT(3)
+ #define MICREL_KSZ9021_EXTREG_CTRL    0xB
+ #define MICREL_KSZ9021_EXTREG_DATA_WRITE      0xC
+-- 
+2.40.1
+
diff --git a/queue-6.5/net-phylink-fix-sphinx-complaint-about-invalid-liter.patch b/queue-6.5/net-phylink-fix-sphinx-complaint-about-invalid-liter.patch
new file mode 100644 (file)
index 0000000..3a6dbc1
--- /dev/null
@@ -0,0 +1,56 @@
+From a89f397ced084289cd4c27b5bd9afa6e1c3c21c5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 5 Sep 2023 16:42:02 -0700
+Subject: net: phylink: fix sphinx complaint about invalid literal
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 1a961e74d5abbea049588a3d74b759955b4ed9d5 ]
+
+sphinx complains about the use of "%PHYLINK_PCS_NEG_*":
+
+Documentation/networking/kapi:144: ./include/linux/phylink.h:601: WARNING: Inline literal start-string without end-string.
+Documentation/networking/kapi:144: ./include/linux/phylink.h:633: WARNING: Inline literal start-string without end-string.
+
+These are not valid symbols so drop the '%' prefix.
+
+Alternatively we could use %PHYLINK_PCS_NEG_\* (escape the *)
+or use normal literal ``PHYLINK_PCS_NEG_*`` but there is already
+a handful of un-adorned DEFINE_* in this file.
+
+Fixes: f99d471afa03 ("net: phylink: add PCS negotiation mode")
+Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
+Link: https://lore.kernel.org/all/20230626162908.2f149f98@canb.auug.org.au/
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Reviewed-by: Bagas Sanjaya <bagasdotme@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/phylink.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/include/linux/phylink.h b/include/linux/phylink.h
+index 1817940a3418e..4dff1eba425ba 100644
+--- a/include/linux/phylink.h
++++ b/include/linux/phylink.h
+@@ -615,7 +615,7 @@ void pcs_get_state(struct phylink_pcs *pcs,
+  *
+  * The %neg_mode argument should be tested via the phylink_mode_*() family of
+  * functions, or for PCS that set pcs->neg_mode true, should be tested
+- * against the %PHYLINK_PCS_NEG_* definitions.
++ * against the PHYLINK_PCS_NEG_* definitions.
+  */
+ int pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode,
+              phy_interface_t interface, const unsigned long *advertising,
+@@ -645,7 +645,7 @@ void pcs_an_restart(struct phylink_pcs *pcs);
+  *
+  * The %mode argument should be tested via the phylink_mode_*() family of
+  * functions, or for PCS that set pcs->neg_mode true, should be tested
+- * against the %PHYLINK_PCS_NEG_* definitions.
++ * against the PHYLINK_PCS_NEG_* definitions.
+  */
+ void pcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode,
+                phy_interface_t interface, int speed, int duplex);
+-- 
+2.40.1
+
diff --git a/queue-6.5/net-read-sk-sk_family-once-in-sk_mc_loop.patch b/queue-6.5/net-read-sk-sk_family-once-in-sk_mc_loop.patch
new file mode 100644 (file)
index 0000000..f4c9799
--- /dev/null
@@ -0,0 +1,87 @@
+From 364eeef56a7633bce37fe93ee252fc06bbc8ecae Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 30 Aug 2023 10:12:44 +0000
+Subject: net: read sk->sk_family once in sk_mc_loop()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit a3e0fdf71bbe031de845e8e08ed7fba49f9c702c ]
+
+syzbot is playing with IPV6_ADDRFORM quite a lot these days,
+and managed to hit the WARN_ON_ONCE(1) in sk_mc_loop()
+
+We have many more similar issues to fix.
+
+WARNING: CPU: 1 PID: 1593 at net/core/sock.c:782 sk_mc_loop+0x165/0x260
+Modules linked in:
+CPU: 1 PID: 1593 Comm: kworker/1:3 Not tainted 6.1.40-syzkaller #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/26/2023
+Workqueue: events_power_efficient gc_worker
+RIP: 0010:sk_mc_loop+0x165/0x260 net/core/sock.c:782
+Code: 34 1b fd 49 81 c7 18 05 00 00 4c 89 f8 48 c1 e8 03 42 80 3c 20 00 74 08 4c 89 ff e8 25 36 6d fd 4d 8b 37 eb 13 e8 db 33 1b fd <0f> 0b b3 01 eb 34 e8 d0 33 1b fd 45 31 f6 49 83 c6 38 4c 89 f0 48
+RSP: 0018:ffffc90000388530 EFLAGS: 00010246
+RAX: ffffffff846d9b55 RBX: 0000000000000011 RCX: ffff88814f884980
+RDX: 0000000000000102 RSI: ffffffff87ae5160 RDI: 0000000000000011
+RBP: ffffc90000388550 R08: 0000000000000003 R09: ffffffff846d9a65
+R10: 0000000000000002 R11: ffff88814f884980 R12: dffffc0000000000
+R13: ffff88810dbee000 R14: 0000000000000010 R15: ffff888150084000
+FS: 0000000000000000(0000) GS:ffff8881f6b00000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 0000000020000180 CR3: 000000014ee5b000 CR4: 00000000003506e0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+<IRQ>
+[<ffffffff8507734f>] ip6_finish_output2+0x33f/0x1ae0 net/ipv6/ip6_output.c:83
+[<ffffffff85062766>] __ip6_finish_output net/ipv6/ip6_output.c:200 [inline]
+[<ffffffff85062766>] ip6_finish_output+0x6c6/0xb10 net/ipv6/ip6_output.c:211
+[<ffffffff85061f8c>] NF_HOOK_COND include/linux/netfilter.h:298 [inline]
+[<ffffffff85061f8c>] ip6_output+0x2bc/0x3d0 net/ipv6/ip6_output.c:232
+[<ffffffff852071cf>] dst_output include/net/dst.h:444 [inline]
+[<ffffffff852071cf>] ip6_local_out+0x10f/0x140 net/ipv6/output_core.c:161
+[<ffffffff83618fb4>] ipvlan_process_v6_outbound drivers/net/ipvlan/ipvlan_core.c:483 [inline]
+[<ffffffff83618fb4>] ipvlan_process_outbound drivers/net/ipvlan/ipvlan_core.c:529 [inline]
+[<ffffffff83618fb4>] ipvlan_xmit_mode_l3 drivers/net/ipvlan/ipvlan_core.c:602 [inline]
+[<ffffffff83618fb4>] ipvlan_queue_xmit+0x1174/0x1be0 drivers/net/ipvlan/ipvlan_core.c:677
+[<ffffffff8361ddd9>] ipvlan_start_xmit+0x49/0x100 drivers/net/ipvlan/ipvlan_main.c:229
+[<ffffffff84763fc0>] netdev_start_xmit include/linux/netdevice.h:4925 [inline]
+[<ffffffff84763fc0>] xmit_one net/core/dev.c:3644 [inline]
+[<ffffffff84763fc0>] dev_hard_start_xmit+0x320/0x980 net/core/dev.c:3660
+[<ffffffff8494c650>] sch_direct_xmit+0x2a0/0x9c0 net/sched/sch_generic.c:342
+[<ffffffff8494d883>] qdisc_restart net/sched/sch_generic.c:407 [inline]
+[<ffffffff8494d883>] __qdisc_run+0xb13/0x1e70 net/sched/sch_generic.c:415
+[<ffffffff8478c426>] qdisc_run+0xd6/0x260 include/net/pkt_sched.h:125
+[<ffffffff84796eac>] net_tx_action+0x7ac/0x940 net/core/dev.c:5247
+[<ffffffff858002bd>] __do_softirq+0x2bd/0x9bd kernel/softirq.c:599
+[<ffffffff814c3fe8>] invoke_softirq kernel/softirq.c:430 [inline]
+[<ffffffff814c3fe8>] __irq_exit_rcu+0xc8/0x170 kernel/softirq.c:683
+[<ffffffff814c3f09>] irq_exit_rcu+0x9/0x20 kernel/softirq.c:695
+
+Fixes: 7ad6848c7e81 ("ip: fix mc_loop checks for tunnels with multicast outer addresses")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://lore.kernel.org/r/20230830101244.1146934-1-edumazet@google.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/sock.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/net/core/sock.c b/net/core/sock.c
+index 1c5c01b116e6f..4ae68aa07e9fe 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -765,7 +765,8 @@ bool sk_mc_loop(struct sock *sk)
+               return false;
+       if (!sk)
+               return true;
+-      switch (sk->sk_family) {
++      /* IPV6_ADDRFORM can change sk->sk_family under us. */
++      switch (READ_ONCE(sk->sk_family)) {
+       case AF_INET:
+               return inet_sk(sk)->mc_loop;
+ #if IS_ENABLED(CONFIG_IPV6)
+-- 
+2.40.1
+
diff --git a/queue-6.5/net-sched-fq_pie-avoid-stalls-in-fq_pie_timer.patch b/queue-6.5/net-sched-fq_pie-avoid-stalls-in-fq_pie_timer.patch
new file mode 100644 (file)
index 0000000..7e1669b
--- /dev/null
@@ -0,0 +1,116 @@
+From ab6dade8133a725b8f7cdf2dc1290a0a1b4f4148 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 29 Aug 2023 12:35:41 +0000
+Subject: net/sched: fq_pie: avoid stalls in fq_pie_timer()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 8c21ab1bae945686c602c5bfa4e3f3352c2452c5 ]
+
+When setting a high number of flows (limit being 65536),
+fq_pie_timer() is currently using too much time as syzbot reported.
+
+Add logic to yield the cpu every 2048 flows (less than 150 usec
+on debug kernels).
+It should also help by not blocking qdisc fast paths for too long.
+Worst case (65536 flows) would need 31 jiffies for a complete scan.
+
+Relevant extract from syzbot report:
+
+rcu: INFO: rcu_preempt detected expedited stalls on CPUs/tasks: { 0-.... } 2663 jiffies s: 873 root: 0x1/.
+rcu: blocking rcu_node structures (internal RCU debug):
+Sending NMI from CPU 1 to CPUs 0:
+NMI backtrace for cpu 0
+CPU: 0 PID: 5177 Comm: syz-executor273 Not tainted 6.5.0-syzkaller-00453-g727dbda16b83 #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/26/2023
+RIP: 0010:check_kcov_mode kernel/kcov.c:173 [inline]
+RIP: 0010:write_comp_data+0x21/0x90 kernel/kcov.c:236
+Code: 2e 0f 1f 84 00 00 00 00 00 65 8b 05 01 b2 7d 7e 49 89 f1 89 c6 49 89 d2 81 e6 00 01 00 00 49 89 f8 65 48 8b 14 25 80 b9 03 00 <a9> 00 01 ff 00 74 0e 85 f6 74 59 8b 82 04 16 00 00 85 c0 74 4f 8b
+RSP: 0018:ffffc90000007bb8 EFLAGS: 00000206
+RAX: 0000000000000101 RBX: ffffc9000dc0d140 RCX: ffffffff885893b0
+RDX: ffff88807c075940 RSI: 0000000000000100 RDI: 0000000000000001
+RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000000 R12: ffffc9000dc0d178
+R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
+FS:  0000555555d54380(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007f6b442f6130 CR3: 000000006fe1c000 CR4: 00000000003506f0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ <NMI>
+ </NMI>
+ <IRQ>
+ pie_calculate_probability+0x480/0x850 net/sched/sch_pie.c:415
+ fq_pie_timer+0x1da/0x4f0 net/sched/sch_fq_pie.c:387
+ call_timer_fn+0x1a0/0x580 kernel/time/timer.c:1700
+
+Fixes: ec97ecf1ebe4 ("net: sched: add Flow Queue PIE packet scheduler")
+Link: https://lore.kernel.org/lkml/00000000000017ad3f06040bf394@google.com/
+Reported-by: syzbot+e46fbd5289363464bc13@syzkaller.appspotmail.com
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Michal Kubiak <michal.kubiak@intel.com>
+Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Link: https://lore.kernel.org/r/20230829123541.3745013-1-edumazet@google.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_fq_pie.c | 27 +++++++++++++++++++--------
+ 1 file changed, 19 insertions(+), 8 deletions(-)
+
+diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
+index 591d87d5e5c0f..68e6acd0f130d 100644
+--- a/net/sched/sch_fq_pie.c
++++ b/net/sched/sch_fq_pie.c
+@@ -61,6 +61,7 @@ struct fq_pie_sched_data {
+       struct pie_params p_params;
+       u32 ecn_prob;
+       u32 flows_cnt;
++      u32 flows_cursor;
+       u32 quantum;
+       u32 memory_limit;
+       u32 new_flow_count;
+@@ -375,22 +376,32 @@ static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt,
+ static void fq_pie_timer(struct timer_list *t)
+ {
+       struct fq_pie_sched_data *q = from_timer(q, t, adapt_timer);
++      unsigned long next, tupdate;
+       struct Qdisc *sch = q->sch;
+       spinlock_t *root_lock; /* to lock qdisc for probability calculations */
+-      u32 idx;
++      int max_cnt, i;
+       rcu_read_lock();
+       root_lock = qdisc_lock(qdisc_root_sleeping(sch));
+       spin_lock(root_lock);
+-      for (idx = 0; idx < q->flows_cnt; idx++)
+-              pie_calculate_probability(&q->p_params, &q->flows[idx].vars,
+-                                        q->flows[idx].backlog);
+-
+-      /* reset the timer to fire after 'tupdate' jiffies. */
+-      if (q->p_params.tupdate)
+-              mod_timer(&q->adapt_timer, jiffies + q->p_params.tupdate);
++      /* Limit this expensive loop to 2048 flows per round. */
++      max_cnt = min_t(int, q->flows_cnt - q->flows_cursor, 2048);
++      for (i = 0; i < max_cnt; i++) {
++              pie_calculate_probability(&q->p_params,
++                                        &q->flows[q->flows_cursor].vars,
++                                        q->flows[q->flows_cursor].backlog);
++              q->flows_cursor++;
++      }
++      tupdate = q->p_params.tupdate;
++      next = 0;
++      if (q->flows_cursor >= q->flows_cnt) {
++              q->flows_cursor = 0;
++              next = tupdate;
++      }
++      if (tupdate)
++              mod_timer(&q->adapt_timer, jiffies + next);
+       spin_unlock(root_lock);
+       rcu_read_unlock();
+ }
+-- 
+2.40.1
+
diff --git a/queue-6.5/net-sched-sch_qfq-fix-uaf-in-qfq_dequeue.patch b/queue-6.5/net-sched-sch_qfq-fix-uaf-in-qfq_dequeue.patch
new file mode 100644 (file)
index 0000000..a7c29b6
--- /dev/null
@@ -0,0 +1,242 @@
+From 9ee2b88274674efde00eccc42bfb4b4e8a6bbd5d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 1 Sep 2023 12:22:37 -0400
+Subject: net: sched: sch_qfq: Fix UAF in qfq_dequeue()
+
+From: valis <sec@valis.email>
+
+[ Upstream commit 8fc134fee27f2263988ae38920bc03da416b03d8 ]
+
+When the plug qdisc is used as a class of the qfq qdisc it could trigger a
+UAF. This issue can be reproduced with following commands:
+
+  tc qdisc add dev lo root handle 1: qfq
+  tc class add dev lo parent 1: classid 1:1 qfq weight 1 maxpkt 512
+  tc qdisc add dev lo parent 1:1 handle 2: plug
+  tc filter add dev lo parent 1: basic classid 1:1
+  ping -c1 127.0.0.1
+
+and boom:
+
+[  285.353793] BUG: KASAN: slab-use-after-free in qfq_dequeue+0xa7/0x7f0
+[  285.354910] Read of size 4 at addr ffff8880bad312a8 by task ping/144
+[  285.355903]
+[  285.356165] CPU: 1 PID: 144 Comm: ping Not tainted 6.5.0-rc3+ #4
+[  285.357112] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014
+[  285.358376] Call Trace:
+[  285.358773]  <IRQ>
+[  285.359109]  dump_stack_lvl+0x44/0x60
+[  285.359708]  print_address_description.constprop.0+0x2c/0x3c0
+[  285.360611]  kasan_report+0x10c/0x120
+[  285.361195]  ? qfq_dequeue+0xa7/0x7f0
+[  285.361780]  qfq_dequeue+0xa7/0x7f0
+[  285.362342]  __qdisc_run+0xf1/0x970
+[  285.362903]  net_tx_action+0x28e/0x460
+[  285.363502]  __do_softirq+0x11b/0x3de
+[  285.364097]  do_softirq.part.0+0x72/0x90
+[  285.364721]  </IRQ>
+[  285.365072]  <TASK>
+[  285.365422]  __local_bh_enable_ip+0x77/0x90
+[  285.366079]  __dev_queue_xmit+0x95f/0x1550
+[  285.366732]  ? __pfx_csum_and_copy_from_iter+0x10/0x10
+[  285.367526]  ? __pfx___dev_queue_xmit+0x10/0x10
+[  285.368259]  ? __build_skb_around+0x129/0x190
+[  285.368960]  ? ip_generic_getfrag+0x12c/0x170
+[  285.369653]  ? __pfx_ip_generic_getfrag+0x10/0x10
+[  285.370390]  ? csum_partial+0x8/0x20
+[  285.370961]  ? raw_getfrag+0xe5/0x140
+[  285.371559]  ip_finish_output2+0x539/0xa40
+[  285.372222]  ? __pfx_ip_finish_output2+0x10/0x10
+[  285.372954]  ip_output+0x113/0x1e0
+[  285.373512]  ? __pfx_ip_output+0x10/0x10
+[  285.374130]  ? icmp_out_count+0x49/0x60
+[  285.374739]  ? __pfx_ip_finish_output+0x10/0x10
+[  285.375457]  ip_push_pending_frames+0xf3/0x100
+[  285.376173]  raw_sendmsg+0xef5/0x12d0
+[  285.376760]  ? do_syscall_64+0x40/0x90
+[  285.377359]  ? __static_call_text_end+0x136578/0x136578
+[  285.378173]  ? do_syscall_64+0x40/0x90
+[  285.378772]  ? kasan_enable_current+0x11/0x20
+[  285.379469]  ? __pfx_raw_sendmsg+0x10/0x10
+[  285.380137]  ? __sock_create+0x13e/0x270
+[  285.380673]  ? __sys_socket+0xf3/0x180
+[  285.381174]  ? __x64_sys_socket+0x3d/0x50
+[  285.381725]  ? entry_SYSCALL_64_after_hwframe+0x6e/0xd8
+[  285.382425]  ? __rcu_read_unlock+0x48/0x70
+[  285.382975]  ? ip4_datagram_release_cb+0xd8/0x380
+[  285.383608]  ? __pfx_ip4_datagram_release_cb+0x10/0x10
+[  285.384295]  ? preempt_count_sub+0x14/0xc0
+[  285.384844]  ? __list_del_entry_valid+0x76/0x140
+[  285.385467]  ? _raw_spin_lock_bh+0x87/0xe0
+[  285.386014]  ? __pfx__raw_spin_lock_bh+0x10/0x10
+[  285.386645]  ? release_sock+0xa0/0xd0
+[  285.387148]  ? preempt_count_sub+0x14/0xc0
+[  285.387712]  ? freeze_secondary_cpus+0x348/0x3c0
+[  285.388341]  ? aa_sk_perm+0x177/0x390
+[  285.388856]  ? __pfx_aa_sk_perm+0x10/0x10
+[  285.389441]  ? check_stack_object+0x22/0x70
+[  285.390032]  ? inet_send_prepare+0x2f/0x120
+[  285.390603]  ? __pfx_inet_sendmsg+0x10/0x10
+[  285.391172]  sock_sendmsg+0xcc/0xe0
+[  285.391667]  __sys_sendto+0x190/0x230
+[  285.392168]  ? __pfx___sys_sendto+0x10/0x10
+[  285.392727]  ? kvm_clock_get_cycles+0x14/0x30
+[  285.393328]  ? set_normalized_timespec64+0x57/0x70
+[  285.393980]  ? _raw_spin_unlock_irq+0x1b/0x40
+[  285.394578]  ? __x64_sys_clock_gettime+0x11c/0x160
+[  285.395225]  ? __pfx___x64_sys_clock_gettime+0x10/0x10
+[  285.395908]  ? _copy_to_user+0x3e/0x60
+[  285.396432]  ? exit_to_user_mode_prepare+0x1a/0x120
+[  285.397086]  ? syscall_exit_to_user_mode+0x22/0x50
+[  285.397734]  ? do_syscall_64+0x71/0x90
+[  285.398258]  __x64_sys_sendto+0x74/0x90
+[  285.398786]  do_syscall_64+0x64/0x90
+[  285.399273]  ? exit_to_user_mode_prepare+0x1a/0x120
+[  285.399949]  ? syscall_exit_to_user_mode+0x22/0x50
+[  285.400605]  ? do_syscall_64+0x71/0x90
+[  285.401124]  entry_SYSCALL_64_after_hwframe+0x6e/0xd8
+[  285.401807] RIP: 0033:0x495726
+[  285.402233] Code: ff ff ff f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 11 b8 2c 00 00 00 0f 09
+[  285.404683] RSP: 002b:00007ffcc25fb618 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
+[  285.405677] RAX: ffffffffffffffda RBX: 0000000000000040 RCX: 0000000000495726
+[  285.406628] RDX: 0000000000000040 RSI: 0000000002518750 RDI: 0000000000000000
+[  285.407565] RBP: 00000000005205ef R08: 00000000005f8838 R09: 000000000000001c
+[  285.408523] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000002517634
+[  285.409460] R13: 00007ffcc25fb6f0 R14: 0000000000000003 R15: 0000000000000000
+[  285.410403]  </TASK>
+[  285.410704]
+[  285.410929] Allocated by task 144:
+[  285.411402]  kasan_save_stack+0x1e/0x40
+[  285.411926]  kasan_set_track+0x21/0x30
+[  285.412442]  __kasan_slab_alloc+0x55/0x70
+[  285.412973]  kmem_cache_alloc_node+0x187/0x3d0
+[  285.413567]  __alloc_skb+0x1b4/0x230
+[  285.414060]  __ip_append_data+0x17f7/0x1b60
+[  285.414633]  ip_append_data+0x97/0xf0
+[  285.415144]  raw_sendmsg+0x5a8/0x12d0
+[  285.415640]  sock_sendmsg+0xcc/0xe0
+[  285.416117]  __sys_sendto+0x190/0x230
+[  285.416626]  __x64_sys_sendto+0x74/0x90
+[  285.417145]  do_syscall_64+0x64/0x90
+[  285.417624]  entry_SYSCALL_64_after_hwframe+0x6e/0xd8
+[  285.418306]
+[  285.418531] Freed by task 144:
+[  285.418960]  kasan_save_stack+0x1e/0x40
+[  285.419469]  kasan_set_track+0x21/0x30
+[  285.419988]  kasan_save_free_info+0x27/0x40
+[  285.420556]  ____kasan_slab_free+0x109/0x1a0
+[  285.421146]  kmem_cache_free+0x1c2/0x450
+[  285.421680]  __netif_receive_skb_core+0x2ce/0x1870
+[  285.422333]  __netif_receive_skb_one_core+0x97/0x140
+[  285.423003]  process_backlog+0x100/0x2f0
+[  285.423537]  __napi_poll+0x5c/0x2d0
+[  285.424023]  net_rx_action+0x2be/0x560
+[  285.424510]  __do_softirq+0x11b/0x3de
+[  285.425034]
+[  285.425254] The buggy address belongs to the object at ffff8880bad31280
+[  285.425254]  which belongs to the cache skbuff_head_cache of size 224
+[  285.426993] The buggy address is located 40 bytes inside of
+[  285.426993]  freed 224-byte region [ffff8880bad31280, ffff8880bad31360)
+[  285.428572]
+[  285.428798] The buggy address belongs to the physical page:
+[  285.429540] page:00000000f4b77674 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0xbad31
+[  285.430758] flags: 0x100000000000200(slab|node=0|zone=1)
+[  285.431447] page_type: 0xffffffff()
+[  285.431934] raw: 0100000000000200 ffff88810094a8c0 dead000000000122 0000000000000000
+[  285.432757] raw: 0000000000000000 00000000800c000c 00000001ffffffff 0000000000000000
+[  285.433562] page dumped because: kasan: bad access detected
+[  285.434144]
+[  285.434320] Memory state around the buggy address:
+[  285.434828]  ffff8880bad31180: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+[  285.435580]  ffff8880bad31200: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+[  285.436264] >ffff8880bad31280: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[  285.436777]                                   ^
+[  285.437106]  ffff8880bad31300: fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc
+[  285.437616]  ffff8880bad31380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+[  285.438126] ==================================================================
+[  285.438662] Disabling lock debugging due to kernel taint
+
+Fix this by:
+1. Changing sch_plug's .peek handler to qdisc_peek_dequeued(), a
+function compatible with non-work-conserving qdiscs
+2. Checking the return value of qdisc_dequeue_peeked() in sch_qfq.
+
+Fixes: 462dbc9101ac ("pkt_sched: QFQ Plus: fair-queueing service at DRR cost")
+Reported-by: valis <sec@valis.email>
+Signed-off-by: valis <sec@valis.email>
+Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Link: https://lore.kernel.org/r/20230901162237.11525-1-jhs@mojatatu.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_plug.c |  2 +-
+ net/sched/sch_qfq.c  | 22 +++++++++++++++++-----
+ 2 files changed, 18 insertions(+), 6 deletions(-)
+
+diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c
+index ea8c4a7174bba..35f49edf63dbf 100644
+--- a/net/sched/sch_plug.c
++++ b/net/sched/sch_plug.c
+@@ -207,7 +207,7 @@ static struct Qdisc_ops plug_qdisc_ops __read_mostly = {
+       .priv_size   =       sizeof(struct plug_sched_data),
+       .enqueue     =       plug_enqueue,
+       .dequeue     =       plug_dequeue,
+-      .peek        =       qdisc_peek_head,
++      .peek        =       qdisc_peek_dequeued,
+       .init        =       plug_init,
+       .change      =       plug_change,
+       .reset       =       qdisc_reset_queue,
+diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
+index befaf74b33caa..09d2955baab10 100644
+--- a/net/sched/sch_qfq.c
++++ b/net/sched/sch_qfq.c
+@@ -974,10 +974,13 @@ static void qfq_update_eligible(struct qfq_sched *q)
+ }
+ /* Dequeue head packet of the head class in the DRR queue of the aggregate. */
+-static void agg_dequeue(struct qfq_aggregate *agg,
+-                      struct qfq_class *cl, unsigned int len)
++static struct sk_buff *agg_dequeue(struct qfq_aggregate *agg,
++                                 struct qfq_class *cl, unsigned int len)
+ {
+-      qdisc_dequeue_peeked(cl->qdisc);
++      struct sk_buff *skb = qdisc_dequeue_peeked(cl->qdisc);
++
++      if (!skb)
++              return NULL;
+       cl->deficit -= (int) len;
+@@ -987,6 +990,8 @@ static void agg_dequeue(struct qfq_aggregate *agg,
+               cl->deficit += agg->lmax;
+               list_move_tail(&cl->alist, &agg->active);
+       }
++
++      return skb;
+ }
+ static inline struct sk_buff *qfq_peek_skb(struct qfq_aggregate *agg,
+@@ -1132,11 +1137,18 @@ static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
+       if (!skb)
+               return NULL;
+-      qdisc_qstats_backlog_dec(sch, skb);
+       sch->q.qlen--;
++
++      skb = agg_dequeue(in_serv_agg, cl, len);
++
++      if (!skb) {
++              sch->q.qlen++;
++              return NULL;
++      }
++
++      qdisc_qstats_backlog_dec(sch, skb);
+       qdisc_bstats_update(sch, skb);
+-      agg_dequeue(in_serv_agg, cl, len);
+       /* If lmax is lowered, through qfq_change_class, for a class
+        * owning pending packets with larger size than the new value
+        * of lmax, then the following condition may hold.
+-- 
+2.40.1
+
diff --git a/queue-6.5/net-use-sk_forward_alloc_get-in-sk_get_meminfo.patch b/queue-6.5/net-use-sk_forward_alloc_get-in-sk_get_meminfo.patch
new file mode 100644 (file)
index 0000000..20dc6e9
--- /dev/null
@@ -0,0 +1,36 @@
+From 005e2a94ee5512a3c5a9133b53047d754b1018cb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 31 Aug 2023 13:52:08 +0000
+Subject: net: use sk_forward_alloc_get() in sk_get_meminfo()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 66d58f046c9d3a8f996b7138d02e965fd0617de0 ]
+
+inet_sk_diag_fill() has been changed to use sk_forward_alloc_get(),
+but sk_get_meminfo() was forgotten.
+
+Fixes: 292e6077b040 ("net: introduce sk_forward_alloc_get()")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/sock.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/core/sock.c b/net/core/sock.c
+index 4ae68aa07e9fe..3109eb0cd512e 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -3742,7 +3742,7 @@ void sk_get_meminfo(const struct sock *sk, u32 *mem)
+       mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf);
+       mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
+       mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf);
+-      mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
++      mem[SK_MEMINFO_FWD_ALLOC] = sk_forward_alloc_get(sk);
+       mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued);
+       mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
+       mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len);
+-- 
+2.40.1
+
diff --git a/queue-6.5/netfilter-nf_tables-audit-log-rule-reset.patch b/queue-6.5/netfilter-nf_tables-audit-log-rule-reset.patch
new file mode 100644 (file)
index 0000000..d82b03b
--- /dev/null
@@ -0,0 +1,93 @@
+From b91124c8fafd1423039859af55e50a971109938e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 29 Aug 2023 19:51:58 +0200
+Subject: netfilter: nf_tables: Audit log rule reset
+
+From: Phil Sutter <phil@nwl.cc>
+
+[ Upstream commit ea078ae9108e25fc881c84369f7c03931d22e555 ]
+
+Resetting rules' stateful data happens outside of the transaction logic,
+so 'get' and 'dump' handlers have to emit audit log entries themselves.
+
+Fixes: 8daa8fde3fc3f ("netfilter: nf_tables: Introduce NFT_MSG_GETRULE_RESET")
+Signed-off-by: Phil Sutter <phil@nwl.cc>
+Reviewed-by: Richard Guy Briggs <rgb@redhat.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/audit.h         |  1 +
+ kernel/auditsc.c              |  1 +
+ net/netfilter/nf_tables_api.c | 18 ++++++++++++++++++
+ 3 files changed, 20 insertions(+)
+
+diff --git a/include/linux/audit.h b/include/linux/audit.h
+index 192bf03aacc52..51b1b7054a233 100644
+--- a/include/linux/audit.h
++++ b/include/linux/audit.h
+@@ -118,6 +118,7 @@ enum audit_nfcfgop {
+       AUDIT_NFT_OP_FLOWTABLE_REGISTER,
+       AUDIT_NFT_OP_FLOWTABLE_UNREGISTER,
+       AUDIT_NFT_OP_SETELEM_RESET,
++      AUDIT_NFT_OP_RULE_RESET,
+       AUDIT_NFT_OP_INVALID,
+ };
+diff --git a/kernel/auditsc.c b/kernel/auditsc.c
+index 87342b7126bcd..eae5dfe9b9a01 100644
+--- a/kernel/auditsc.c
++++ b/kernel/auditsc.c
+@@ -144,6 +144,7 @@ static const struct audit_nfcfgop_tab audit_nfcfgs[] = {
+       { AUDIT_NFT_OP_FLOWTABLE_REGISTER,      "nft_register_flowtable"   },
+       { AUDIT_NFT_OP_FLOWTABLE_UNREGISTER,    "nft_unregister_flowtable" },
+       { AUDIT_NFT_OP_SETELEM_RESET,           "nft_reset_setelem"        },
++      { AUDIT_NFT_OP_RULE_RESET,              "nft_reset_rule"           },
+       { AUDIT_NFT_OP_INVALID,                 "nft_invalid"              },
+ };
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index 2e3844d5923f5..cc70482b94907 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -3422,6 +3422,18 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx,
+       nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
+ }
++static void audit_log_rule_reset(const struct nft_table *table,
++                               unsigned int base_seq,
++                               unsigned int nentries)
++{
++      char *buf = kasprintf(GFP_ATOMIC, "%s:%u",
++                            table->name, base_seq);
++
++      audit_log_nfcfg(buf, table->family, nentries,
++                      AUDIT_NFT_OP_RULE_RESET, GFP_ATOMIC);
++      kfree(buf);
++}
++
+ struct nft_rule_dump_ctx {
+       char *table;
+       char *chain;
+@@ -3528,6 +3540,9 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
+ done:
+       rcu_read_unlock();
++      if (reset && idx > cb->args[0])
++              audit_log_rule_reset(table, cb->seq, idx - cb->args[0]);
++
+       cb->args[0] = idx;
+       return skb->len;
+ }
+@@ -3635,6 +3650,9 @@ static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info,
+       if (err < 0)
+               goto err_fill_rule_info;
++      if (reset)
++              audit_log_rule_reset(table, nft_pernet(net)->base_seq, 1);
++
+       return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
+ err_fill_rule_info:
+-- 
+2.40.1
+
diff --git a/queue-6.5/netfilter-nf_tables-audit-log-setelem-reset.patch b/queue-6.5/netfilter-nf_tables-audit-log-setelem-reset.patch
new file mode 100644 (file)
index 0000000..4bdc691
--- /dev/null
@@ -0,0 +1,152 @@
+From 75e76d93af6d896af46b3118e60b7d69e3feba76 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 29 Aug 2023 19:51:57 +0200
+Subject: netfilter: nf_tables: Audit log setelem reset
+
+From: Phil Sutter <phil@nwl.cc>
+
+[ Upstream commit 7e9be1124dbe7888907e82cab20164578e3f9ab7 ]
+
+Since set element reset is not integrated into nf_tables' transaction
+logic, an explicit log call is needed, similar to NFT_MSG_GETOBJ_RESET
+handling.
+
+For the sake of simplicity, catchall element reset will always generate
+a dedicated log entry. This relieves nf_tables_dump_set() from having to
+adjust the logged element count depending on whether a catchall element
+was found or not.
+
+Fixes: 079cd633219d7 ("netfilter: nf_tables: Introduce NFT_MSG_GETSETELEM_RESET")
+Signed-off-by: Phil Sutter <phil@nwl.cc>
+Reviewed-by: Richard Guy Briggs <rgb@redhat.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/audit.h         |  1 +
+ kernel/auditsc.c              |  1 +
+ net/netfilter/nf_tables_api.c | 31 ++++++++++++++++++++++++++++---
+ 3 files changed, 30 insertions(+), 3 deletions(-)
+
+diff --git a/include/linux/audit.h b/include/linux/audit.h
+index 6a3a9e122bb5e..192bf03aacc52 100644
+--- a/include/linux/audit.h
++++ b/include/linux/audit.h
+@@ -117,6 +117,7 @@ enum audit_nfcfgop {
+       AUDIT_NFT_OP_OBJ_RESET,
+       AUDIT_NFT_OP_FLOWTABLE_REGISTER,
+       AUDIT_NFT_OP_FLOWTABLE_UNREGISTER,
++      AUDIT_NFT_OP_SETELEM_RESET,
+       AUDIT_NFT_OP_INVALID,
+ };
+diff --git a/kernel/auditsc.c b/kernel/auditsc.c
+index 8dfd581cd5543..87342b7126bcd 100644
+--- a/kernel/auditsc.c
++++ b/kernel/auditsc.c
+@@ -143,6 +143,7 @@ static const struct audit_nfcfgop_tab audit_nfcfgs[] = {
+       { AUDIT_NFT_OP_OBJ_RESET,               "nft_reset_obj"            },
+       { AUDIT_NFT_OP_FLOWTABLE_REGISTER,      "nft_register_flowtable"   },
+       { AUDIT_NFT_OP_FLOWTABLE_UNREGISTER,    "nft_unregister_flowtable" },
++      { AUDIT_NFT_OP_SETELEM_RESET,           "nft_reset_setelem"        },
+       { AUDIT_NFT_OP_INVALID,                 "nft_invalid"              },
+ };
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index eb8b1167dced2..2e3844d5923f5 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -102,6 +102,7 @@ static const u8 nft2audit_op[NFT_MSG_MAX] = { // enum nf_tables_msg_types
+       [NFT_MSG_NEWFLOWTABLE]  = AUDIT_NFT_OP_FLOWTABLE_REGISTER,
+       [NFT_MSG_GETFLOWTABLE]  = AUDIT_NFT_OP_INVALID,
+       [NFT_MSG_DELFLOWTABLE]  = AUDIT_NFT_OP_FLOWTABLE_UNREGISTER,
++      [NFT_MSG_GETSETELEM_RESET] = AUDIT_NFT_OP_SETELEM_RESET,
+ };
+ static void nft_validate_state_update(struct nft_table *table, u8 new_validate_state)
+@@ -5621,13 +5622,25 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
+       return nf_tables_fill_setelem(args->skb, set, elem, args->reset);
+ }
++static void audit_log_nft_set_reset(const struct nft_table *table,
++                                  unsigned int base_seq,
++                                  unsigned int nentries)
++{
++      char *buf = kasprintf(GFP_ATOMIC, "%s:%u", table->name, base_seq);
++
++      audit_log_nfcfg(buf, table->family, nentries,
++                      AUDIT_NFT_OP_SETELEM_RESET, GFP_ATOMIC);
++      kfree(buf);
++}
++
+ struct nft_set_dump_ctx {
+       const struct nft_set    *set;
+       struct nft_ctx          ctx;
+ };
+ static int nft_set_catchall_dump(struct net *net, struct sk_buff *skb,
+-                               const struct nft_set *set, bool reset)
++                               const struct nft_set *set, bool reset,
++                               unsigned int base_seq)
+ {
+       struct nft_set_elem_catchall *catchall;
+       u8 genmask = nft_genmask_cur(net);
+@@ -5643,6 +5656,8 @@ static int nft_set_catchall_dump(struct net *net, struct sk_buff *skb,
+               elem.priv = catchall->elem;
+               ret = nf_tables_fill_setelem(skb, set, &elem, reset);
++              if (reset && !ret)
++                      audit_log_nft_set_reset(set->table, base_seq, 1);
+               break;
+       }
+@@ -5722,12 +5737,17 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
+       set->ops->walk(&dump_ctx->ctx, set, &args.iter);
+       if (!args.iter.err && args.iter.count == cb->args[0])
+-              args.iter.err = nft_set_catchall_dump(net, skb, set, reset);
++              args.iter.err = nft_set_catchall_dump(net, skb, set,
++                                                    reset, cb->seq);
+       rcu_read_unlock();
+       nla_nest_end(skb, nest);
+       nlmsg_end(skb, nlh);
++      if (reset && args.iter.count > args.iter.skip)
++              audit_log_nft_set_reset(table, cb->seq,
++                                      args.iter.count - args.iter.skip);
++
+       if (args.iter.err && args.iter.err != -EMSGSIZE)
+               return args.iter.err;
+       if (args.iter.count == cb->args[0])
+@@ -5952,13 +5972,13 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
+       struct netlink_ext_ack *extack = info->extack;
+       u8 genmask = nft_genmask_cur(info->net);
+       u8 family = info->nfmsg->nfgen_family;
++      int rem, err = 0, nelems = 0;
+       struct net *net = info->net;
+       struct nft_table *table;
+       struct nft_set *set;
+       struct nlattr *attr;
+       struct nft_ctx ctx;
+       bool reset = false;
+-      int rem, err = 0;
+       table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family,
+                                genmask, 0);
+@@ -6001,8 +6021,13 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
+                       NL_SET_BAD_ATTR(extack, attr);
+                       break;
+               }
++              nelems++;
+       }
++      if (reset)
++              audit_log_nft_set_reset(table, nft_pernet(net)->base_seq,
++                                      nelems);
++
+       return err;
+ }
+-- 
+2.40.1
+
diff --git a/queue-6.5/netfilter-nf_tables-unbreak-audit-log-reset.patch b/queue-6.5/netfilter-nf_tables-unbreak-audit-log-reset.patch
new file mode 100644 (file)
index 0000000..5559a9d
--- /dev/null
@@ -0,0 +1,109 @@
+From 002a2faa7fad76a2ef208617ec5b6fb27f7ebca6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 6 Sep 2023 11:42:02 +0200
+Subject: netfilter: nf_tables: Unbreak audit log reset
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit 9b5ba5c9c5109bf89dc64a3f4734bd125d1ce52e ]
+
+Deliver audit log from __nf_tables_dump_rules(), table dereference at
+the end of the table list loop might point to the list head, leading to
+this crash.
+
+[ 4137.407349] BUG: unable to handle page fault for address: 00000000001f3c50
+[ 4137.407357] #PF: supervisor read access in kernel mode
+[ 4137.407359] #PF: error_code(0x0000) - not-present page
+[ 4137.407360] PGD 0 P4D 0
+[ 4137.407363] Oops: 0000 [#1] PREEMPT SMP PTI
+[ 4137.407365] CPU: 4 PID: 500177 Comm: nft Not tainted 6.5.0+ #277
+[ 4137.407369] RIP: 0010:string+0x49/0xd0
+[ 4137.407374] Code: ff 77 36 45 89 d1 31 f6 49 01 f9 66 45 85 d2 75 19 eb 1e 49 39 f8 76 02 88 07 48 83 c7 01 83 c6 01 48 83 c2 01 4c 39 cf 74 07 <0f> b6 02 84 c0 75 e2 4c 89 c2 e9 58 e5 ff ff 48 c7 c0 0e b2 ff 81
+[ 4137.407377] RSP: 0018:ffff8881179737f0 EFLAGS: 00010286
+[ 4137.407379] RAX: 00000000001f2c50 RBX: ffff888117973848 RCX: ffff0a00ffffff04
+[ 4137.407380] RDX: 00000000001f3c50 RSI: 0000000000000000 RDI: 0000000000000000
+[ 4137.407381] RBP: 0000000000000000 R08: 0000000000000000 R09: 00000000ffffffff
+[ 4137.407383] R10: ffffffffffffffff R11: ffff88813584d200 R12: 0000000000000000
+[ 4137.407384] R13: ffffffffa15cf709 R14: 0000000000000000 R15: ffffffffa15cf709
+[ 4137.407385] FS:  00007fcfc18bb580(0000) GS:ffff88840e700000(0000) knlGS:0000000000000000
+[ 4137.407387] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[ 4137.407388] CR2: 00000000001f3c50 CR3: 00000001055b2001 CR4: 00000000001706e0
+[ 4137.407390] Call Trace:
+[ 4137.407392]  <TASK>
+[ 4137.407393]  ? __die+0x1b/0x60
+[ 4137.407397]  ? page_fault_oops+0x6b/0xa0
+[ 4137.407399]  ? exc_page_fault+0x60/0x120
+[ 4137.407403]  ? asm_exc_page_fault+0x22/0x30
+[ 4137.407408]  ? string+0x49/0xd0
+[ 4137.407410]  vsnprintf+0x257/0x4f0
+[ 4137.407414]  kvasprintf+0x3e/0xb0
+[ 4137.407417]  kasprintf+0x3e/0x50
+[ 4137.407419]  nf_tables_dump_rules+0x1c0/0x360 [nf_tables]
+[ 4137.407439]  ? __alloc_skb+0xc3/0x170
+[ 4137.407442]  netlink_dump+0x170/0x330
+[ 4137.407447]  __netlink_dump_start+0x227/0x300
+[ 4137.407449]  nf_tables_getrule+0x205/0x390 [nf_tables]
+
+Deliver audit log only once at the end of the rule dump+reset for
+consistency with the set dump+reset.
+
+Ensure audit reset access to table under rcu read side lock. The table
+list iteration holds rcu read lock side, but recent audit code
+dereferences table object out of the rcu read lock side.
+
+Fixes: ea078ae9108e ("netfilter: nf_tables: Audit log rule reset")
+Fixes: 7e9be1124dbe ("netfilter: nf_tables: Audit log setelem reset")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Acked-by: Phil Sutter <phil@nwl.cc>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_api.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index cc70482b94907..a72934f00804e 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -3480,6 +3480,10 @@ static int __nf_tables_dump_rules(struct sk_buff *skb,
+ cont_skip:
+               (*idx)++;
+       }
++
++      if (reset && *idx)
++              audit_log_rule_reset(table, cb->seq, *idx);
++
+       return 0;
+ }
+@@ -3540,9 +3544,6 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
+ done:
+       rcu_read_unlock();
+-      if (reset && idx > cb->args[0])
+-              audit_log_rule_reset(table, cb->seq, idx - cb->args[0]);
+-
+       cb->args[0] = idx;
+       return skb->len;
+ }
+@@ -5757,8 +5758,6 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
+       if (!args.iter.err && args.iter.count == cb->args[0])
+               args.iter.err = nft_set_catchall_dump(net, skb, set,
+                                                     reset, cb->seq);
+-      rcu_read_unlock();
+-
+       nla_nest_end(skb, nest);
+       nlmsg_end(skb, nlh);
+@@ -5766,6 +5765,8 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
+               audit_log_nft_set_reset(table, cb->seq,
+                                       args.iter.count - args.iter.skip);
++      rcu_read_unlock();
++
+       if (args.iter.err && args.iter.err != -EMSGSIZE)
+               return args.iter.err;
+       if (args.iter.count == cb->args[0])
+-- 
+2.40.1
+
diff --git a/queue-6.5/netfilter-nfnetlink_osf-avoid-oob-read.patch b/queue-6.5/netfilter-nfnetlink_osf-avoid-oob-read.patch
new file mode 100644 (file)
index 0000000..2dc0129
--- /dev/null
@@ -0,0 +1,59 @@
+From 8f118d72c3bca837260b59caff8c3cf901fab668 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 1 Sep 2023 10:50:20 -0300
+Subject: netfilter: nfnetlink_osf: avoid OOB read
+
+From: Wander Lairson Costa <wander@redhat.com>
+
+[ Upstream commit f4f8a7803119005e87b716874bec07c751efafec ]
+
+The opt_num field is controlled by user mode and is not currently
+validated inside the kernel. An attacker can take advantage of this to
+trigger an OOB read and potentially leak information.
+
+BUG: KASAN: slab-out-of-bounds in nf_osf_match_one+0xbed/0xd10 net/netfilter/nfnetlink_osf.c:88
+Read of size 2 at addr ffff88804bc64272 by task poc/6431
+
+CPU: 1 PID: 6431 Comm: poc Not tainted 6.0.0-rc4 #1
+Call Trace:
+ nf_osf_match_one+0xbed/0xd10 net/netfilter/nfnetlink_osf.c:88
+ nf_osf_find+0x186/0x2f0 net/netfilter/nfnetlink_osf.c:281
+ nft_osf_eval+0x37f/0x590 net/netfilter/nft_osf.c:47
+ expr_call_ops_eval net/netfilter/nf_tables_core.c:214
+ nft_do_chain+0x2b0/0x1490 net/netfilter/nf_tables_core.c:264
+ nft_do_chain_ipv4+0x17c/0x1f0 net/netfilter/nft_chain_filter.c:23
+ [..]
+
+Also add validation to genre, subtype and version fields.
+
+Fixes: 11eeef41d5f6 ("netfilter: passive OS fingerprint xtables match")
+Reported-by: Lucas Leong <wmliang@infosec.exchange>
+Signed-off-by: Wander Lairson Costa <wander@redhat.com>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nfnetlink_osf.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c
+index 8f1bfa6ccc2d9..50723ba082890 100644
+--- a/net/netfilter/nfnetlink_osf.c
++++ b/net/netfilter/nfnetlink_osf.c
+@@ -315,6 +315,14 @@ static int nfnl_osf_add_callback(struct sk_buff *skb,
+       f = nla_data(osf_attrs[OSF_ATTR_FINGER]);
++      if (f->opt_num > ARRAY_SIZE(f->opt))
++              return -EINVAL;
++
++      if (!memchr(f->genre, 0, MAXGENRELEN) ||
++          !memchr(f->subtype, 0, MAXGENRELEN) ||
++          !memchr(f->version, 0, MAXGENRELEN))
++              return -EINVAL;
++
+       kf = kmalloc(sizeof(struct nf_osf_finger), GFP_KERNEL);
+       if (!kf)
+               return -ENOMEM;
+-- 
+2.40.1
+
diff --git a/queue-6.5/netfilter-nft_set_rbtree-skip-sync-gc-for-new-elemen.patch b/queue-6.5/netfilter-nft_set_rbtree-skip-sync-gc-for-new-elemen.patch
new file mode 100644 (file)
index 0000000..457c04d
--- /dev/null
@@ -0,0 +1,52 @@
+From 225ee3593161a9d5562f0989dd41e77ec32bf545 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 4 Sep 2023 02:14:36 +0200
+Subject: netfilter: nft_set_rbtree: skip sync GC for new elements in this
+ transaction
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit 2ee52ae94baabf7ee09cf2a8d854b990dac5d0e4 ]
+
+New elements in this transaction might expired before such transaction
+ends. Skip sync GC for such elements otherwise commit path might walk
+over an already released object. Once transaction is finished, async GC
+will collect such expired element.
+
+Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nft_set_rbtree.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
+index c6435e7092319..f250b5399344a 100644
+--- a/net/netfilter/nft_set_rbtree.c
++++ b/net/netfilter/nft_set_rbtree.c
+@@ -312,6 +312,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
+       struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL;
+       struct rb_node *node, *next, *parent, **p, *first = NULL;
+       struct nft_rbtree *priv = nft_set_priv(set);
++      u8 cur_genmask = nft_genmask_cur(net);
+       u8 genmask = nft_genmask_next(net);
+       int d, err;
+@@ -357,8 +358,11 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
+               if (!nft_set_elem_active(&rbe->ext, genmask))
+                       continue;
+-              /* perform garbage collection to avoid bogus overlap reports. */
+-              if (nft_set_elem_expired(&rbe->ext)) {
++              /* perform garbage collection to avoid bogus overlap reports
++               * but skip new elements in this transaction.
++               */
++              if (nft_set_elem_expired(&rbe->ext) &&
++                  nft_set_elem_active(&rbe->ext, cur_genmask)) {
+                       err = nft_rbtree_gc_elem(set, priv, rbe, genmask);
+                       if (err < 0)
+                               return err;
+-- 
+2.40.1
+
diff --git a/queue-6.5/netfilter-nftables-exthdr-fix-4-byte-stack-oob-write.patch b/queue-6.5/netfilter-nftables-exthdr-fix-4-byte-stack-oob-write.patch
new file mode 100644 (file)
index 0000000..c73cc5c
--- /dev/null
@@ -0,0 +1,96 @@
+From 34fb47018f5d7fe6a5d10225b49a7b6cc7150f8a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 5 Sep 2023 23:13:56 +0200
+Subject: netfilter: nftables: exthdr: fix 4-byte stack OOB write
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit fd94d9dadee58e09b49075240fe83423eb1dcd36 ]
+
+If priv->len is a multiple of 4, then dst[len / 4] can write past
+the destination array which leads to stack corruption.
+
+This construct is necessary to clean the remainder of the register
+in case ->len is NOT a multiple of the register size, so make it
+conditional just like nft_payload.c does.
+
+The bug was added in 4.1 cycle and then copied/inherited when
+tcp/sctp and ip option support was added.
+
+Bug reported by Zero Day Initiative project (ZDI-CAN-21950,
+ZDI-CAN-21951, ZDI-CAN-21961).
+
+Fixes: 49499c3e6e18 ("netfilter: nf_tables: switch registers to 32 bit addressing")
+Fixes: 935b7f643018 ("netfilter: nft_exthdr: add TCP option matching")
+Fixes: 133dc203d77d ("netfilter: nft_exthdr: Support SCTP chunks")
+Fixes: dbb5281a1f84 ("netfilter: nf_tables: add support for matching IPv4 options")
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nft_exthdr.c | 22 ++++++++++++++--------
+ 1 file changed, 14 insertions(+), 8 deletions(-)
+
+diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
+index a9844eefedebc..3fbaa7bf41f9c 100644
+--- a/net/netfilter/nft_exthdr.c
++++ b/net/netfilter/nft_exthdr.c
+@@ -35,6 +35,14 @@ static unsigned int optlen(const u8 *opt, unsigned int offset)
+               return opt[offset + 1];
+ }
++static int nft_skb_copy_to_reg(const struct sk_buff *skb, int offset, u32 *dest, unsigned int len)
++{
++      if (len % NFT_REG32_SIZE)
++              dest[len / NFT_REG32_SIZE] = 0;
++
++      return skb_copy_bits(skb, offset, dest, len);
++}
++
+ static void nft_exthdr_ipv6_eval(const struct nft_expr *expr,
+                                struct nft_regs *regs,
+                                const struct nft_pktinfo *pkt)
+@@ -56,8 +64,7 @@ static void nft_exthdr_ipv6_eval(const struct nft_expr *expr,
+       }
+       offset += priv->offset;
+-      dest[priv->len / NFT_REG32_SIZE] = 0;
+-      if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0)
++      if (nft_skb_copy_to_reg(pkt->skb, offset, dest, priv->len) < 0)
+               goto err;
+       return;
+ err:
+@@ -153,8 +160,7 @@ static void nft_exthdr_ipv4_eval(const struct nft_expr *expr,
+       }
+       offset += priv->offset;
+-      dest[priv->len / NFT_REG32_SIZE] = 0;
+-      if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0)
++      if (nft_skb_copy_to_reg(pkt->skb, offset, dest, priv->len) < 0)
+               goto err;
+       return;
+ err:
+@@ -210,7 +216,8 @@ static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
+               if (priv->flags & NFT_EXTHDR_F_PRESENT) {
+                       *dest = 1;
+               } else {
+-                      dest[priv->len / NFT_REG32_SIZE] = 0;
++                      if (priv->len % NFT_REG32_SIZE)
++                              dest[priv->len / NFT_REG32_SIZE] = 0;
+                       memcpy(dest, opt + offset, priv->len);
+               }
+@@ -388,9 +395,8 @@ static void nft_exthdr_sctp_eval(const struct nft_expr *expr,
+                           offset + ntohs(sch->length) > pkt->skb->len)
+                               break;
+-                      dest[priv->len / NFT_REG32_SIZE] = 0;
+-                      if (skb_copy_bits(pkt->skb, offset + priv->offset,
+-                                        dest, priv->len) < 0)
++                      if (nft_skb_copy_to_reg(pkt->skb, offset + priv->offset,
++                                              dest, priv->len) < 0)
+                               break;
+                       return;
+               }
+-- 
+2.40.1
+
diff --git a/queue-6.5/octeontx2-af-fix-truncation-of-smq-in-cn10k-nix-aq-e.patch b/queue-6.5/octeontx2-af-fix-truncation-of-smq-in-cn10k-nix-aq-e.patch
new file mode 100644 (file)
index 0000000..fae525d
--- /dev/null
@@ -0,0 +1,81 @@
+From f4df59977ebd8ccfca8b0dc5d1681aa3d9eee26f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 5 Sep 2023 12:18:16 +0530
+Subject: octeontx2-af: Fix truncation of smq in CN10K NIX AQ enqueue mbox
+ handler
+
+From: Geetha sowjanya <gakula@marvell.com>
+
+[ Upstream commit 29fe7a1b62717d58f033009874554d99d71f7d37 ]
+
+The smq value used in the CN10K NIX AQ instruction enqueue mailbox
+handler was truncated to 9-bit value from 10-bit value because of
+typecasting the CN10K mbox request structure to the CN9K structure.
+Though this hasn't caused any problems when programming the NIX SQ
+context to the HW because the context structure is the same size.
+However, this causes a problem when accessing the structure parameters.
+This patch reads the right smq value for each platform.
+
+Fixes: 30077d210c83 ("octeontx2-af: cn10k: Update NIX/NPA context structure")
+Signed-off-by: Geetha sowjanya <gakula@marvell.com>
+Signed-off-by: Sunil Kovvuri Goutham <sgoutham@marvell.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/marvell/octeontx2/af/rvu_nix.c   | 21 +++++++++++++++++--
+ 1 file changed, 19 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+index c2f68678e947e..23c2f2ed2fb83 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+@@ -846,6 +846,21 @@ static int nix_aq_enqueue_wait(struct rvu *rvu, struct rvu_block *block,
+       return 0;
+ }
++static void nix_get_aq_req_smq(struct rvu *rvu, struct nix_aq_enq_req *req,
++                             u16 *smq, u16 *smq_mask)
++{
++      struct nix_cn10k_aq_enq_req *aq_req;
++
++      if (!is_rvu_otx2(rvu)) {
++              aq_req = (struct nix_cn10k_aq_enq_req *)req;
++              *smq = aq_req->sq.smq;
++              *smq_mask = aq_req->sq_mask.smq;
++      } else {
++              *smq = req->sq.smq;
++              *smq_mask = req->sq_mask.smq;
++      }
++}
++
+ static int rvu_nix_blk_aq_enq_inst(struct rvu *rvu, struct nix_hw *nix_hw,
+                                  struct nix_aq_enq_req *req,
+                                  struct nix_aq_enq_rsp *rsp)
+@@ -857,6 +872,7 @@ static int rvu_nix_blk_aq_enq_inst(struct rvu *rvu, struct nix_hw *nix_hw,
+       struct rvu_block *block;
+       struct admin_queue *aq;
+       struct rvu_pfvf *pfvf;
++      u16 smq, smq_mask;
+       void *ctx, *mask;
+       bool ena;
+       u64 cfg;
+@@ -928,13 +944,14 @@ static int rvu_nix_blk_aq_enq_inst(struct rvu *rvu, struct nix_hw *nix_hw,
+       if (rc)
+               return rc;
++      nix_get_aq_req_smq(rvu, req, &smq, &smq_mask);
+       /* Check if SQ pointed SMQ belongs to this PF/VF or not */
+       if (req->ctype == NIX_AQ_CTYPE_SQ &&
+           ((req->op == NIX_AQ_INSTOP_INIT && req->sq.ena) ||
+            (req->op == NIX_AQ_INSTOP_WRITE &&
+-            req->sq_mask.ena && req->sq_mask.smq && req->sq.ena))) {
++            req->sq_mask.ena && req->sq.ena && smq_mask))) {
+               if (!is_valid_txschq(rvu, blkaddr, NIX_TXSCH_LVL_SMQ,
+-                                   pcifunc, req->sq.smq))
++                                   pcifunc, smq))
+                       return NIX_AF_ERR_AQ_ENQUEUE;
+       }
+-- 
+2.40.1
+
diff --git a/queue-6.5/perf-annotate-bpf-don-t-enclose-non-debug-code-with-.patch b/queue-6.5/perf-annotate-bpf-don-t-enclose-non-debug-code-with-.patch
new file mode 100644 (file)
index 0000000..1eee963
--- /dev/null
@@ -0,0 +1,114 @@
+From 5fd2d5ef0c64a53e793ecc4ce9da6d1fbd8282da Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Aug 2023 18:22:14 -0300
+Subject: perf annotate bpf: Don't enclose non-debug code with an assert()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Arnaldo Carvalho de Melo <acme@redhat.com>
+
+[ Upstream commit 979e9c9fc9c2a761303585e07fe2699bdd88182f ]
+
+In 616b14b47a86d880 ("perf build: Conditionally define NDEBUG") we
+started using NDEBUG=1 when DEBUG=1 isn't present, so code that is
+enclosed with assert() is not called.
+
+In dd317df072071903 ("perf build: Make binutil libraries opt in") we
+stopped linking against binutils-devel, for licensing reasons.
+
+Recently people asked me why annotation of BPF programs wasn't working,
+i.e. this:
+
+  $ perf annotate bpf_prog_5280546344e3f45c_kfree_skb
+
+was returning:
+
+  case SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF:
+     scnprintf(buf, buflen, "Please link with binutils's libopcode to enable BPF annotation");
+
+This was on a fedora rpm, so its new enough that I had to try to test by
+rebuilding using BUILD_NONDISTRO=1, only to get it segfaulting on me.
+
+This combination made this libopcode function not to be called:
+
+        assert(bfd_check_format(bfdf, bfd_object));
+
+Changing it to:
+
+       if (!bfd_check_format(bfdf, bfd_object))
+               abort();
+
+Made it work, looking at this "check" function made me realize it
+changes the 'bfdf' internal state, i.e. we better call it.
+
+So stop using assert() on it, just call it and abort if it fails.
+
+Probably it is better to propagate the error, etc, but it seems it is
+unlikely to fail from the usage done so far and we really need to stop
+using libopcodes, so do the quick fix above and move on.
+
+With it we have BPF annotation back working when built with
+BUILD_NONDISTRO=1:
+
+  â¬¢[acme@toolbox perf-tools-next]$ perf annotate --stdio2 bpf_prog_5280546344e3f45c_kfree_skb   | head
+  No kallsyms or vmlinux with build-id 939bc71a1a51cdc434e60af93c7e734f7d5c0e7e was found
+  Samples: 12  of event 'cpu-clock:ppp', 4000 Hz, Event count (approx.): 3000000, [percent: local period]
+  bpf_prog_5280546344e3f45c_kfree_skb() bpf_prog_5280546344e3f45c_kfree_skb
+  Percent      int kfree_skb(struct trace_event_raw_kfree_skb *args) {
+                 nop
+   33.33         xchg   %ax,%ax
+                 push   %rbp
+                 mov    %rsp,%rbp
+                 sub    $0x180,%rsp
+                 push   %rbx
+                 push   %r13
+  â¬¢[acme@toolbox perf-tools-next]$
+
+Fixes: 6987561c9e86eace ("perf annotate: Enable annotation of BPF programs")
+Cc: Adrian Hunter <adrian.hunter@intel.com>
+Cc: Ian Rogers <irogers@google.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Mohamed Mahmoud <mmahmoud@redhat.com>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: Dave Tucker <datucker@redhat.com>
+Cc: Derek Barbosa <debarbos@redhat.com>
+Cc: Song Liu <songliubraving@fb.com>
+Link: https://lore.kernel.org/lkml/ZMrMzoQBe0yqMek1@kernel.org
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/util/annotate.c | 10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
+index ba988a13dacb6..82956adf99632 100644
+--- a/tools/perf/util/annotate.c
++++ b/tools/perf/util/annotate.c
+@@ -1846,8 +1846,11 @@ static int symbol__disassemble_bpf(struct symbol *sym,
+       perf_exe(tpath, sizeof(tpath));
+       bfdf = bfd_openr(tpath, NULL);
+-      assert(bfdf);
+-      assert(bfd_check_format(bfdf, bfd_object));
++      if (bfdf == NULL)
++              abort();
++
++      if (!bfd_check_format(bfdf, bfd_object))
++              abort();
+       s = open_memstream(&buf, &buf_size);
+       if (!s) {
+@@ -1895,7 +1898,8 @@ static int symbol__disassemble_bpf(struct symbol *sym,
+ #else
+       disassemble = disassembler(bfdf);
+ #endif
+-      assert(disassemble);
++      if (disassemble == NULL)
++              abort();
+       fflush(s);
+       do {
+-- 
+2.40.1
+
diff --git a/queue-6.5/perf-bpf-filter-fix-sample-flag-check-with.patch b/queue-6.5/perf-bpf-filter-fix-sample-flag-check-with.patch
new file mode 100644 (file)
index 0000000..bccb7d0
--- /dev/null
@@ -0,0 +1,62 @@
+From 6895edb3735a24c36f1be5cbfb4be0e4d161ec6c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 10 Aug 2023 19:58:21 -0700
+Subject: perf bpf-filter: Fix sample flag check with ||
+
+From: Namhyung Kim <namhyung@kernel.org>
+
+[ Upstream commit dc7f01f1bceca38839992b3371e0be8a3c9d5acf ]
+
+For logical OR operator, the actual sample_flags are in the 'groups'
+list so it needs to check entries in the list instead.  Otherwise it
+would show the following error message.
+
+  $ sudo perf record -a -e cycles:p --filter 'period > 100 || weight > 0' sleep 1
+  Error: cycles:p event does not have sample flags 0
+  failed to set filter "BPF" on event cycles:p with 2 (No such file or directory)
+
+Actually it should warn on 'weight' is used without WEIGHT flag.
+
+  Error: cycles:p event does not have PERF_SAMPLE_WEIGHT
+   Hint: please add -W option to perf record
+  failed to set filter "BPF" on event cycles:p with 2 (No such file or directory)
+
+Fixes: 4310551b76e0d676 ("perf bpf filter: Show warning for missing sample flags")
+Reviewed-by: Ian Rogers <irogers@google.com>
+Signed-off-by: Namhyung Kim <namhyung@kernel.org>
+Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Cc: Adrian Hunter <adrian.hunter@intel.com>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20230811025822.3859771-1-namhyung@kernel.org
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/util/bpf-filter.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+diff --git a/tools/perf/util/bpf-filter.c b/tools/perf/util/bpf-filter.c
+index 0b30688d78a7f..a1f076ef653d3 100644
+--- a/tools/perf/util/bpf-filter.c
++++ b/tools/perf/util/bpf-filter.c
+@@ -62,6 +62,16 @@ static int check_sample_flags(struct evsel *evsel, struct perf_bpf_filter_expr *
+       if (evsel->core.attr.sample_type & expr->sample_flags)
+               return 0;
++      if (expr->op == PBF_OP_GROUP_BEGIN) {
++              struct perf_bpf_filter_expr *group;
++
++              list_for_each_entry(group, &expr->groups, list) {
++                      if (check_sample_flags(evsel, group) < 0)
++                              return -1;
++              }
++              return 0;
++      }
++
+       info = get_sample_info(expr->sample_flags);
+       if (info == NULL) {
+               pr_err("Error: %s event does not have sample flags %lx\n",
+-- 
+2.40.1
+
diff --git a/queue-6.5/perf-dlfilter-add-al_cleanup.patch b/queue-6.5/perf-dlfilter-add-al_cleanup.patch
new file mode 100644 (file)
index 0000000..88e01f6
--- /dev/null
@@ -0,0 +1,735 @@
+From 22609c9b1f933b9114199b49d4301e009cf2b323 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 31 Jul 2023 12:18:57 +0300
+Subject: perf dlfilter: Add al_cleanup()
+
+From: Adrian Hunter <adrian.hunter@intel.com>
+
+[ Upstream commit 82b0a10390e5f198a4e23c9cc6a7307d2cf099f3 ]
+
+Add perf_dlfilter_fns.al_cleanup() to do addr_location__exit() on data
+passed via perf_dlfilter_fns.resolve_address().
+
+Add dlfilter-test-api-v2 to the "dlfilter C API" test to test it.
+
+Update documentation, clarifying that data returned by APIs should not
+be dereferenced after filter_event() and filter_event_early() return.
+
+Fixes: 0dd5041c9a0eaf8c ("perf addr_location: Add init/exit/copy functions")
+Reviewed-by: Ian Rogers <irogers@google.com>
+Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Link: https://lore.kernel.org/r/20230731091857.10681-3-adrian.hunter@intel.com
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/Documentation/perf-dlfilter.txt  |  22 +-
+ tools/perf/Makefile.perf                    |   2 +-
+ tools/perf/dlfilters/dlfilter-test-api-v2.c | 377 ++++++++++++++++++++
+ tools/perf/include/perf/perf_dlfilter.h     |  11 +-
+ tools/perf/tests/dlfilter-test.c            |  38 +-
+ tools/perf/util/dlfilter.c                  |  29 ++
+ 6 files changed, 464 insertions(+), 15 deletions(-)
+ create mode 100644 tools/perf/dlfilters/dlfilter-test-api-v2.c
+
+diff --git a/tools/perf/Documentation/perf-dlfilter.txt b/tools/perf/Documentation/perf-dlfilter.txt
+index fb22e3b31dc5c..8887cc20a809e 100644
+--- a/tools/perf/Documentation/perf-dlfilter.txt
++++ b/tools/perf/Documentation/perf-dlfilter.txt
+@@ -64,6 +64,12 @@ internal filtering.
+ If implemented, 'filter_description' should return a one-line description
+ of the filter, and optionally a longer description.
++Do not assume the 'sample' argument is valid (dereferenceable)
++after 'filter_event' and 'filter_event_early' return.
++
++Do not assume data referenced by pointers in struct perf_dlfilter_sample
++is valid (dereferenceable) after 'filter_event' and 'filter_event_early' return.
++
+ The perf_dlfilter_sample structure
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@@ -150,7 +156,8 @@ struct perf_dlfilter_fns {
+       const char *(*srcline)(void *ctx, __u32 *line_number);
+       struct perf_event_attr *(*attr)(void *ctx);
+       __s32 (*object_code)(void *ctx, __u64 ip, void *buf, __u32 len);
+-      void *(*reserved[120])(void *);
++      void (*al_cleanup)(void *ctx, struct perf_dlfilter_al *al);
++      void *(*reserved[119])(void *);
+ };
+ ----
+@@ -161,7 +168,8 @@ struct perf_dlfilter_fns {
+ 'args' returns arguments from --dlarg options.
+ 'resolve_address' provides information about 'address'. al->size must be set
+-before calling. Returns 0 on success, -1 otherwise.
++before calling. Returns 0 on success, -1 otherwise. Call al_cleanup() (if present,
++see below) when 'al' data is no longer needed.
+ 'insn' returns instruction bytes and length.
+@@ -171,6 +179,12 @@ before calling. Returns 0 on success, -1 otherwise.
+ 'object_code' reads object code and returns the number of bytes read.
++'al_cleanup' must be called (if present, so check perf_dlfilter_fns.al_cleanup != NULL)
++after resolve_address() to free any associated resources.
++
++Do not assume pointers obtained via perf_dlfilter_fns are valid (dereferenceable)
++after 'filter_event' and 'filter_event_early' return.
++
+ The perf_dlfilter_al structure
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@@ -197,9 +211,13 @@ struct perf_dlfilter_al {
+       /* Below members are only populated by resolve_ip() */
+       __u8 filtered; /* true if this sample event will be filtered out */
+       const char *comm;
++      void *priv; /* Private data. Do not change */
+ };
+ ----
++Do not assume data referenced by pointers in struct perf_dlfilter_al
++is valid (dereferenceable) after 'filter_event' and 'filter_event_early' return.
++
+ perf_dlfilter_sample flags
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~
+diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
+index 097316ef38e6a..f178b36c69402 100644
+--- a/tools/perf/Makefile.perf
++++ b/tools/perf/Makefile.perf
+@@ -381,7 +381,7 @@ ifndef NO_JVMTI
+ PROGRAMS += $(OUTPUT)$(LIBJVMTI)
+ endif
+-DLFILTERS := dlfilter-test-api-v0.so dlfilter-show-cycles.so
++DLFILTERS := dlfilter-test-api-v0.so dlfilter-test-api-v2.so dlfilter-show-cycles.so
+ DLFILTERS := $(patsubst %,$(OUTPUT)dlfilters/%,$(DLFILTERS))
+ # what 'all' will build and 'install' will install, in perfexecdir
+diff --git a/tools/perf/dlfilters/dlfilter-test-api-v2.c b/tools/perf/dlfilters/dlfilter-test-api-v2.c
+new file mode 100644
+index 0000000000000..38e593d92920c
+--- /dev/null
++++ b/tools/perf/dlfilters/dlfilter-test-api-v2.c
+@@ -0,0 +1,377 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Test v2 API for perf --dlfilter shared object
++ * Copyright (c) 2023, Intel Corporation.
++ */
++#include <stdio.h>
++#include <stdlib.h>
++#include <string.h>
++#include <stdbool.h>
++
++/*
++ * Copy v2 API instead of including current API
++ */
++#include <linux/perf_event.h>
++#include <linux/types.h>
++
++/*
++ * The following macro can be used to determine if this header defines
++ * perf_dlfilter_sample machine_pid and vcpu.
++ */
++#define PERF_DLFILTER_HAS_MACHINE_PID
++
++/* Definitions for perf_dlfilter_sample flags */
++enum {
++      PERF_DLFILTER_FLAG_BRANCH       = 1ULL << 0,
++      PERF_DLFILTER_FLAG_CALL         = 1ULL << 1,
++      PERF_DLFILTER_FLAG_RETURN       = 1ULL << 2,
++      PERF_DLFILTER_FLAG_CONDITIONAL  = 1ULL << 3,
++      PERF_DLFILTER_FLAG_SYSCALLRET   = 1ULL << 4,
++      PERF_DLFILTER_FLAG_ASYNC        = 1ULL << 5,
++      PERF_DLFILTER_FLAG_INTERRUPT    = 1ULL << 6,
++      PERF_DLFILTER_FLAG_TX_ABORT     = 1ULL << 7,
++      PERF_DLFILTER_FLAG_TRACE_BEGIN  = 1ULL << 8,
++      PERF_DLFILTER_FLAG_TRACE_END    = 1ULL << 9,
++      PERF_DLFILTER_FLAG_IN_TX        = 1ULL << 10,
++      PERF_DLFILTER_FLAG_VMENTRY      = 1ULL << 11,
++      PERF_DLFILTER_FLAG_VMEXIT       = 1ULL << 12,
++};
++
++/*
++ * perf sample event information (as per perf script and <linux/perf_event.h>)
++ */
++struct perf_dlfilter_sample {
++      __u32 size; /* Size of this structure (for compatibility checking) */
++      __u16 ins_lat;          /* Refer PERF_SAMPLE_WEIGHT_TYPE in <linux/perf_event.h> */
++      __u16 p_stage_cyc;      /* Refer PERF_SAMPLE_WEIGHT_TYPE in <linux/perf_event.h> */
++      __u64 ip;
++      __s32 pid;
++      __s32 tid;
++      __u64 time;
++      __u64 addr;
++      __u64 id;
++      __u64 stream_id;
++      __u64 period;
++      __u64 weight;           /* Refer PERF_SAMPLE_WEIGHT_TYPE in <linux/perf_event.h> */
++      __u64 transaction;      /* Refer PERF_SAMPLE_TRANSACTION in <linux/perf_event.h> */
++      __u64 insn_cnt; /* For instructions-per-cycle (IPC) */
++      __u64 cyc_cnt;          /* For instructions-per-cycle (IPC) */
++      __s32 cpu;
++      __u32 flags;            /* Refer PERF_DLFILTER_FLAG_* above */
++      __u64 data_src;         /* Refer PERF_SAMPLE_DATA_SRC in <linux/perf_event.h> */
++      __u64 phys_addr;        /* Refer PERF_SAMPLE_PHYS_ADDR in <linux/perf_event.h> */
++      __u64 data_page_size;   /* Refer PERF_SAMPLE_DATA_PAGE_SIZE in <linux/perf_event.h> */
++      __u64 code_page_size;   /* Refer PERF_SAMPLE_CODE_PAGE_SIZE in <linux/perf_event.h> */
++      __u64 cgroup;           /* Refer PERF_SAMPLE_CGROUP in <linux/perf_event.h> */
++      __u8  cpumode;          /* Refer CPUMODE_MASK etc in <linux/perf_event.h> */
++      __u8  addr_correlates_sym; /* True => resolve_addr() can be called */
++      __u16 misc;             /* Refer perf_event_header in <linux/perf_event.h> */
++      __u32 raw_size;         /* Refer PERF_SAMPLE_RAW in <linux/perf_event.h> */
++      const void *raw_data;   /* Refer PERF_SAMPLE_RAW in <linux/perf_event.h> */
++      __u64 brstack_nr;       /* Number of brstack entries */
++      const struct perf_branch_entry *brstack; /* Refer <linux/perf_event.h> */
++      __u64 raw_callchain_nr; /* Number of raw_callchain entries */
++      const __u64 *raw_callchain; /* Refer <linux/perf_event.h> */
++      const char *event;
++      __s32 machine_pid;
++      __s32 vcpu;
++};
++
++/*
++ * Address location (as per perf script)
++ */
++struct perf_dlfilter_al {
++      __u32 size; /* Size of this structure (for compatibility checking) */
++      __u32 symoff;
++      const char *sym;
++      __u64 addr; /* Mapped address (from dso) */
++      __u64 sym_start;
++      __u64 sym_end;
++      const char *dso;
++      __u8  sym_binding; /* STB_LOCAL, STB_GLOBAL or STB_WEAK, refer <elf.h> */
++      __u8  is_64_bit; /* Only valid if dso is not NULL */
++      __u8  is_kernel_ip; /* True if in kernel space */
++      __u32 buildid_size;
++      __u8 *buildid;
++      /* Below members are only populated by resolve_ip() */
++      __u8 filtered; /* True if this sample event will be filtered out */
++      const char *comm;
++      void *priv; /* Private data (v2 API) */
++};
++
++struct perf_dlfilter_fns {
++      /* Return information about ip */
++      const struct perf_dlfilter_al *(*resolve_ip)(void *ctx);
++      /* Return information about addr (if addr_correlates_sym) */
++      const struct perf_dlfilter_al *(*resolve_addr)(void *ctx);
++      /* Return arguments from --dlarg option */
++      char **(*args)(void *ctx, int *dlargc);
++      /*
++       * Return information about address (al->size must be set before
++       * calling). Returns 0 on success, -1 otherwise. Call al_cleanup()
++       * when 'al' data is no longer needed.
++       */
++      __s32 (*resolve_address)(void *ctx, __u64 address, struct perf_dlfilter_al *al);
++      /* Return instruction bytes and length */
++      const __u8 *(*insn)(void *ctx, __u32 *length);
++      /* Return source file name and line number */
++      const char *(*srcline)(void *ctx, __u32 *line_number);
++      /* Return perf_event_attr, refer <linux/perf_event.h> */
++      struct perf_event_attr *(*attr)(void *ctx);
++      /* Read object code, return numbers of bytes read */
++      __s32 (*object_code)(void *ctx, __u64 ip, void *buf, __u32 len);
++      /*
++       * If present (i.e. must check al_cleanup != NULL), call after
++       * resolve_address() to free any associated resources. (v2 API)
++       */
++      void (*al_cleanup)(void *ctx, struct perf_dlfilter_al *al);
++      /* Reserved */
++      void *(*reserved[119])(void *);
++};
++
++struct perf_dlfilter_fns perf_dlfilter_fns;
++
++static int verbose;
++
++#define pr_debug(fmt, ...) do { \
++              if (verbose > 0) \
++                      fprintf(stderr, fmt, ##__VA_ARGS__); \
++      } while (0)
++
++static int test_fail(const char *msg)
++{
++      pr_debug("%s\n", msg);
++      return -1;
++}
++
++#define CHECK(x) do { \
++              if (!(x)) \
++                      return test_fail("Check '" #x "' failed\n"); \
++      } while (0)
++
++struct filter_data {
++      __u64 ip;
++      __u64 addr;
++      int do_early;
++      int early_filter_cnt;
++      int filter_cnt;
++};
++
++static struct filter_data *filt_dat;
++
++int start(void **data, void *ctx)
++{
++      int dlargc;
++      char **dlargv;
++      struct filter_data *d;
++      static bool called;
++
++      verbose = 1;
++
++      CHECK(!filt_dat && !called);
++      called = true;
++
++      d = calloc(1, sizeof(*d));
++      if (!d)
++              test_fail("Failed to allocate memory");
++      filt_dat = d;
++      *data = d;
++
++      dlargv = perf_dlfilter_fns.args(ctx, &dlargc);
++
++      CHECK(dlargc == 6);
++      CHECK(!strcmp(dlargv[0], "first"));
++      verbose = strtol(dlargv[1], NULL, 0);
++      d->ip = strtoull(dlargv[2], NULL, 0);
++      d->addr = strtoull(dlargv[3], NULL, 0);
++      d->do_early = strtol(dlargv[4], NULL, 0);
++      CHECK(!strcmp(dlargv[5], "last"));
++
++      pr_debug("%s API\n", __func__);
++
++      return 0;
++}
++
++#define CHECK_SAMPLE(x) do { \
++              if (sample->x != expected.x) \
++                      return test_fail("'" #x "' not expected value\n"); \
++      } while (0)
++
++static int check_sample(struct filter_data *d, const struct perf_dlfilter_sample *sample)
++{
++      struct perf_dlfilter_sample expected = {
++              .ip             = d->ip,
++              .pid            = 12345,
++              .tid            = 12346,
++              .time           = 1234567890,
++              .addr           = d->addr,
++              .id             = 99,
++              .stream_id      = 101,
++              .period         = 543212345,
++              .cpu            = 31,
++              .cpumode        = PERF_RECORD_MISC_USER,
++              .addr_correlates_sym = 1,
++              .misc           = PERF_RECORD_MISC_USER,
++      };
++
++      CHECK(sample->size >= sizeof(struct perf_dlfilter_sample));
++
++      CHECK_SAMPLE(ip);
++      CHECK_SAMPLE(pid);
++      CHECK_SAMPLE(tid);
++      CHECK_SAMPLE(time);
++      CHECK_SAMPLE(addr);
++      CHECK_SAMPLE(id);
++      CHECK_SAMPLE(stream_id);
++      CHECK_SAMPLE(period);
++      CHECK_SAMPLE(cpu);
++      CHECK_SAMPLE(cpumode);
++      CHECK_SAMPLE(addr_correlates_sym);
++      CHECK_SAMPLE(misc);
++
++      CHECK(!sample->raw_data);
++      CHECK_SAMPLE(brstack_nr);
++      CHECK(!sample->brstack);
++      CHECK_SAMPLE(raw_callchain_nr);
++      CHECK(!sample->raw_callchain);
++
++#define EVENT_NAME "branches:"
++      CHECK(!strncmp(sample->event, EVENT_NAME, strlen(EVENT_NAME)));
++
++      return 0;
++}
++
++static int check_al(void *ctx)
++{
++      const struct perf_dlfilter_al *al;
++
++      al = perf_dlfilter_fns.resolve_ip(ctx);
++      if (!al)
++              return test_fail("resolve_ip() failed");
++
++      CHECK(al->sym && !strcmp("foo", al->sym));
++      CHECK(!al->symoff);
++
++      return 0;
++}
++
++static int check_addr_al(void *ctx)
++{
++      const struct perf_dlfilter_al *addr_al;
++
++      addr_al = perf_dlfilter_fns.resolve_addr(ctx);
++      if (!addr_al)
++              return test_fail("resolve_addr() failed");
++
++      CHECK(addr_al->sym && !strcmp("bar", addr_al->sym));
++      CHECK(!addr_al->symoff);
++
++      return 0;
++}
++
++static int check_address_al(void *ctx, const struct perf_dlfilter_sample *sample)
++{
++      struct perf_dlfilter_al address_al;
++      const struct perf_dlfilter_al *al;
++
++      al = perf_dlfilter_fns.resolve_ip(ctx);
++      if (!al)
++              return test_fail("resolve_ip() failed");
++
++      address_al.size = sizeof(address_al);
++      if (perf_dlfilter_fns.resolve_address(ctx, sample->ip, &address_al))
++              return test_fail("resolve_address() failed");
++
++      CHECK(address_al.sym && al->sym);
++      CHECK(!strcmp(address_al.sym, al->sym));
++      CHECK(address_al.addr == al->addr);
++      CHECK(address_al.sym_start == al->sym_start);
++      CHECK(address_al.sym_end == al->sym_end);
++      CHECK(address_al.dso && al->dso);
++      CHECK(!strcmp(address_al.dso, al->dso));
++
++      /* al_cleanup() is v2 API so may not be present */
++      if (perf_dlfilter_fns.al_cleanup)
++              perf_dlfilter_fns.al_cleanup(ctx, &address_al);
++
++      return 0;
++}
++
++static int check_attr(void *ctx)
++{
++      struct perf_event_attr *attr = perf_dlfilter_fns.attr(ctx);
++
++      CHECK(attr);
++      CHECK(attr->type == PERF_TYPE_HARDWARE);
++      CHECK(attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
++
++      return 0;
++}
++
++static int do_checks(void *data, const struct perf_dlfilter_sample *sample, void *ctx, bool early)
++{
++      struct filter_data *d = data;
++
++      CHECK(data && filt_dat == data);
++
++      if (early) {
++              CHECK(!d->early_filter_cnt);
++              d->early_filter_cnt += 1;
++      } else {
++              CHECK(!d->filter_cnt);
++              CHECK(d->early_filter_cnt);
++              CHECK(d->do_early != 2);
++              d->filter_cnt += 1;
++      }
++
++      if (check_sample(data, sample))
++              return -1;
++
++      if (check_attr(ctx))
++              return -1;
++
++      if (early && !d->do_early)
++              return 0;
++
++      if (check_al(ctx) || check_addr_al(ctx) || check_address_al(ctx, sample))
++              return -1;
++
++      if (early)
++              return d->do_early == 2;
++
++      return 1;
++}
++
++int filter_event_early(void *data, const struct perf_dlfilter_sample *sample, void *ctx)
++{
++      pr_debug("%s API\n", __func__);
++
++      return do_checks(data, sample, ctx, true);
++}
++
++int filter_event(void *data, const struct perf_dlfilter_sample *sample, void *ctx)
++{
++      pr_debug("%s API\n", __func__);
++
++      return do_checks(data, sample, ctx, false);
++}
++
++int stop(void *data, void *ctx)
++{
++      static bool called;
++
++      pr_debug("%s API\n", __func__);
++
++      CHECK(data && filt_dat == data && !called);
++      called = true;
++
++      free(data);
++      filt_dat = NULL;
++      return 0;
++}
++
++const char *filter_description(const char **long_description)
++{
++      *long_description = "Filter used by the 'dlfilter C API' perf test";
++      return "dlfilter to test v2 C API";
++}
+diff --git a/tools/perf/include/perf/perf_dlfilter.h b/tools/perf/include/perf/perf_dlfilter.h
+index a26e2f129f83e..16fc4568ac53b 100644
+--- a/tools/perf/include/perf/perf_dlfilter.h
++++ b/tools/perf/include/perf/perf_dlfilter.h
+@@ -91,6 +91,7 @@ struct perf_dlfilter_al {
+       /* Below members are only populated by resolve_ip() */
+       __u8 filtered; /* True if this sample event will be filtered out */
+       const char *comm;
++      void *priv; /* Private data. Do not change */
+ };
+ struct perf_dlfilter_fns {
+@@ -102,7 +103,8 @@ struct perf_dlfilter_fns {
+       char **(*args)(void *ctx, int *dlargc);
+       /*
+        * Return information about address (al->size must be set before
+-       * calling). Returns 0 on success, -1 otherwise.
++       * calling). Returns 0 on success, -1 otherwise. Call al_cleanup()
++       * when 'al' data is no longer needed.
+        */
+       __s32 (*resolve_address)(void *ctx, __u64 address, struct perf_dlfilter_al *al);
+       /* Return instruction bytes and length */
+@@ -113,8 +115,13 @@ struct perf_dlfilter_fns {
+       struct perf_event_attr *(*attr)(void *ctx);
+       /* Read object code, return numbers of bytes read */
+       __s32 (*object_code)(void *ctx, __u64 ip, void *buf, __u32 len);
++      /*
++       * If present (i.e. must check al_cleanup != NULL), call after
++       * resolve_address() to free any associated resources.
++       */
++      void (*al_cleanup)(void *ctx, struct perf_dlfilter_al *al);
+       /* Reserved */
+-      void *(*reserved[120])(void *);
++      void *(*reserved[119])(void *);
+ };
+ /*
+diff --git a/tools/perf/tests/dlfilter-test.c b/tools/perf/tests/dlfilter-test.c
+index 086fd2179e41f..da3a9b50b1b1f 100644
+--- a/tools/perf/tests/dlfilter-test.c
++++ b/tools/perf/tests/dlfilter-test.c
+@@ -1,7 +1,7 @@
+ // SPDX-License-Identifier: GPL-2.0
+ /*
+  * Test dlfilter C API. A perf.data file is synthesized and then processed
+- * by perf script with a dlfilter named dlfilter-test-api-v0.so. Also a C file
++ * by perf script with dlfilters named dlfilter-test-api-v*.so. Also a C file
+  * is compiled to provide a dso to match the synthesized perf.data file.
+  */
+@@ -37,6 +37,8 @@
+ #define MAP_START 0x400000
++#define DLFILTER_TEST_NAME_MAX 128
++
+ struct test_data {
+       struct perf_tool tool;
+       struct machine *machine;
+@@ -45,6 +47,8 @@ struct test_data {
+       u64 bar;
+       u64 ip;
+       u64 addr;
++      char name[DLFILTER_TEST_NAME_MAX];
++      char desc[DLFILTER_TEST_NAME_MAX];
+       char perf[PATH_MAX];
+       char perf_data_file_name[PATH_MAX];
+       char c_file_name[PATH_MAX];
+@@ -215,7 +219,7 @@ static int write_prog(char *file_name)
+       return err ? -1 : 0;
+ }
+-static int get_dlfilters_path(char *buf, size_t sz)
++static int get_dlfilters_path(const char *name, char *buf, size_t sz)
+ {
+       char perf[PATH_MAX];
+       char path[PATH_MAX];
+@@ -224,12 +228,12 @@ static int get_dlfilters_path(char *buf, size_t sz)
+       perf_exe(perf, sizeof(perf));
+       perf_path = dirname(perf);
+-      snprintf(path, sizeof(path), "%s/dlfilters/dlfilter-test-api-v0.so", perf_path);
++      snprintf(path, sizeof(path), "%s/dlfilters/%s", perf_path, name);
+       if (access(path, R_OK)) {
+               exec_path = get_argv_exec_path();
+               if (!exec_path)
+                       return -1;
+-              snprintf(path, sizeof(path), "%s/dlfilters/dlfilter-test-api-v0.so", exec_path);
++              snprintf(path, sizeof(path), "%s/dlfilters/%s", exec_path, name);
+               free(exec_path);
+               if (access(path, R_OK))
+                       return -1;
+@@ -244,9 +248,9 @@ static int check_filter_desc(struct test_data *td)
+       char *desc = NULL;
+       int ret;
+-      if (get_filter_desc(td->dlfilters, "dlfilter-test-api-v0.so", &desc, &long_desc) &&
++      if (get_filter_desc(td->dlfilters, td->name, &desc, &long_desc) &&
+           long_desc && !strcmp(long_desc, "Filter used by the 'dlfilter C API' perf test") &&
+-          desc && !strcmp(desc, "dlfilter to test v0 C API"))
++          desc && !strcmp(desc, td->desc))
+               ret = 0;
+       else
+               ret = -1;
+@@ -284,7 +288,7 @@ static int get_ip_addr(struct test_data *td)
+ static int do_run_perf_script(struct test_data *td, int do_early)
+ {
+       return system_cmd("%s script -i %s "
+-                        "--dlfilter %s/dlfilter-test-api-v0.so "
++                        "--dlfilter %s/%s "
+                         "--dlarg first "
+                         "--dlarg %d "
+                         "--dlarg %" PRIu64 " "
+@@ -292,7 +296,7 @@ static int do_run_perf_script(struct test_data *td, int do_early)
+                         "--dlarg %d "
+                         "--dlarg last",
+                         td->perf, td->perf_data_file_name, td->dlfilters,
+-                        verbose, td->ip, td->addr, do_early);
++                        td->name, verbose, td->ip, td->addr, do_early);
+ }
+ static int run_perf_script(struct test_data *td)
+@@ -321,7 +325,7 @@ static int test__dlfilter_test(struct test_data *td)
+       u64 id = 99;
+       int err;
+-      if (get_dlfilters_path(td->dlfilters, PATH_MAX))
++      if (get_dlfilters_path(td->name, td->dlfilters, PATH_MAX))
+               return test_result("dlfilters not found", TEST_SKIP);
+       if (check_filter_desc(td))
+@@ -399,14 +403,18 @@ static void test_data__free(struct test_data *td)
+       }
+ }
+-static int test__dlfilter(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
++static int test__dlfilter_ver(int ver)
+ {
+       struct test_data td = {.fd = -1};
+       int pid = getpid();
+       int err;
++      pr_debug("\n-- Testing version %d API --\n", ver);
++
+       perf_exe(td.perf, sizeof(td.perf));
++      snprintf(td.name, sizeof(td.name), "dlfilter-test-api-v%d.so", ver);
++      snprintf(td.desc, sizeof(td.desc), "dlfilter to test v%d C API", ver);
+       snprintf(td.perf_data_file_name, PATH_MAX, "/tmp/dlfilter-test-%u-perf-data", pid);
+       snprintf(td.c_file_name, PATH_MAX, "/tmp/dlfilter-test-%u-prog.c", pid);
+       snprintf(td.prog_file_name, PATH_MAX, "/tmp/dlfilter-test-%u-prog", pid);
+@@ -416,4 +424,14 @@ static int test__dlfilter(struct test_suite *test __maybe_unused, int subtest __
+       return err;
+ }
++static int test__dlfilter(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
++{
++      int err = test__dlfilter_ver(0);
++
++      if (err)
++              return err;
++      /* No test for version 1 */
++      return test__dlfilter_ver(2);
++}
++
+ DEFINE_SUITE("dlfilter C API", dlfilter);
+diff --git a/tools/perf/util/dlfilter.c b/tools/perf/util/dlfilter.c
+index 798a53d7e6c9d..e0f822ebb9b97 100644
+--- a/tools/perf/util/dlfilter.c
++++ b/tools/perf/util/dlfilter.c
+@@ -10,6 +10,8 @@
+ #include <subcmd/exec-cmd.h>
+ #include <linux/zalloc.h>
+ #include <linux/build_bug.h>
++#include <linux/kernel.h>
++#include <linux/string.h>
+ #include "debug.h"
+ #include "event.h"
+@@ -63,6 +65,7 @@ static void al_to_d_al(struct addr_location *al, struct perf_dlfilter_al *d_al)
+       d_al->addr = al->addr;
+       d_al->comm = NULL;
+       d_al->filtered = 0;
++      d_al->priv = NULL;
+ }
+ static struct addr_location *get_al(struct dlfilter *d)
+@@ -151,6 +154,11 @@ static char **dlfilter__args(void *ctx, int *dlargc)
+       return d->dlargv;
+ }
++static bool has_priv(struct perf_dlfilter_al *d_al_p)
++{
++      return d_al_p->size >= offsetof(struct perf_dlfilter_al, priv) + sizeof(d_al_p->priv);
++}
++
+ static __s32 dlfilter__resolve_address(void *ctx, __u64 address, struct perf_dlfilter_al *d_al_p)
+ {
+       struct dlfilter *d = (struct dlfilter *)ctx;
+@@ -177,9 +185,29 @@ static __s32 dlfilter__resolve_address(void *ctx, __u64 address, struct perf_dlf
+       memcpy(d_al_p, &d_al, min((size_t)sz, sizeof(d_al)));
+       d_al_p->size = sz;
++      if (has_priv(d_al_p))
++              d_al_p->priv = memdup(&al, sizeof(al));
++
+       return 0;
+ }
++static void dlfilter__al_cleanup(void *ctx __maybe_unused, struct perf_dlfilter_al *d_al_p)
++{
++      struct addr_location *al;
++
++      /* Ensure backward compatibility */
++      if (!has_priv(d_al_p) || !d_al_p->priv)
++              return;
++
++      al = d_al_p->priv;
++
++      d_al_p->priv = NULL;
++
++      addr_location__exit(al);
++
++      free(al);
++}
++
+ static const __u8 *dlfilter__insn(void *ctx, __u32 *len)
+ {
+       struct dlfilter *d = (struct dlfilter *)ctx;
+@@ -297,6 +325,7 @@ static const struct perf_dlfilter_fns perf_dlfilter_fns = {
+       .resolve_addr    = dlfilter__resolve_addr,
+       .args            = dlfilter__args,
+       .resolve_address = dlfilter__resolve_address,
++      .al_cleanup      = dlfilter__al_cleanup,
+       .insn            = dlfilter__insn,
+       .srcline         = dlfilter__srcline,
+       .attr            = dlfilter__attr,
+-- 
+2.40.1
+
diff --git a/queue-6.5/perf-dlfilter-initialize-addr_location-before-passin.patch b/queue-6.5/perf-dlfilter-initialize-addr_location-before-passin.patch
new file mode 100644 (file)
index 0000000..cd1c9a6
--- /dev/null
@@ -0,0 +1,49 @@
+From 11536c421a46c96e3576b8269959645ac5fc2f82 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 31 Jul 2023 12:18:56 +0300
+Subject: perf dlfilter: Initialize addr_location before passing it to
+ thread__find_symbol_fb()
+
+From: Arnaldo Carvalho de Melo <acme@kernel.org>
+
+[ Upstream commit 42c6dd9d23019ff339d0aca80a444eb71087050e ]
+
+As thread__find_symbol_fb() will end up calling thread__find_map() and
+it in turn will call these on uninitialized memory:
+
+        maps__zput(al->maps);
+        map__zput(al->map);
+        thread__zput(al->thread);
+
+Fixes: 0dd5041c9a0eaf8c ("perf addr_location: Add init/exit/copy functions")
+Reviewed-by: Ian Rogers <irogers@google.com>
+Cc: Adrian Hunter <adrian.hunter@intel.com>
+Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
+Cc: Disha Goel <disgoel@linux.vnet.ibm.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Kajol Jain <kjain@linux.ibm.com>
+Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Link: https://lore.kernel.org/r/20230731091857.10681-2-adrian.hunter@intel.com
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/util/dlfilter.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/tools/perf/util/dlfilter.c b/tools/perf/util/dlfilter.c
+index 46f74b2344dbb..798a53d7e6c9d 100644
+--- a/tools/perf/util/dlfilter.c
++++ b/tools/perf/util/dlfilter.c
+@@ -166,6 +166,7 @@ static __s32 dlfilter__resolve_address(void *ctx, __u64 address, struct perf_dlf
+       if (!thread)
+               return -1;
++      addr_location__init(&al);
+       thread__find_symbol_fb(thread, d->sample->cpumode, address, &al);
+       al_to_d_al(&al, &d_al);
+-- 
+2.40.1
+
diff --git a/queue-6.5/perf-header-fix-missing-pmu-caps.patch b/queue-6.5/perf-header-fix-missing-pmu-caps.patch
new file mode 100644 (file)
index 0000000..7105675
--- /dev/null
@@ -0,0 +1,108 @@
+From 21b10f53515ab092024a608c048d598bae2db9f8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 24 Aug 2023 19:39:57 -0700
+Subject: perf header: Fix missing PMU caps
+
+From: Ian Rogers <irogers@google.com>
+
+[ Upstream commit 9897009eecae821efc684ecdd1d04584f5501509 ]
+
+PMU caps are written as HEADER_PMU_CAPS or for the special case of the
+PMU "cpu" as HEADER_CPU_PMU_CAPS. As the PMU "cpu" is special, and not
+any "core" PMU, the logic had become broken and core PMUs not called
+"cpu" were not having their caps written.
+
+This affects ARM and s390 non-hybrid PMUs.
+
+Simplify the PMU caps writing logic to scan one fewer time and to be
+more explicit in its behavior.
+
+Fixes: 178ddf3bad981380 ("perf header: Avoid hybrid PMU list in write_pmu_caps")
+Reported-by: Wei Li <liwei391@huawei.com>
+Signed-off-by: Ian Rogers <irogers@google.com>
+Cc: Adrian Hunter <adrian.hunter@intel.com>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
+Cc: Huacai Chen <chenhuacai@kernel.org>
+Cc: Ian Rogers <irogers@google.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: James Clark <james.clark@arm.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: John Garry <john.g.garry@oracle.com>
+Cc: K Prateek Nayak <kprateek.nayak@amd.com>
+Cc: Kajol Jain <kjain@linux.ibm.com>
+Cc: Kan Liang <kan.liang@linux.intel.com>
+Cc: Leo Yan <leo.yan@linaro.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mike Leach <mike.leach@linaro.org>
+Cc: Ming Wang <wangming01@loongson.cn>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Ravi Bangoria <ravi.bangoria@amd.com>
+Cc: Sean Christopherson <seanjc@google.com>
+Cc: Suzuki Poulouse <suzuki.poulose@arm.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: linux-arm-kernel@lists.infradead.org
+Link: https://lore.kernel.org/r/20230825024002.801955-2-irogers@google.com
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/util/header.c | 31 ++++++++++++++++---------------
+ 1 file changed, 16 insertions(+), 15 deletions(-)
+
+diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
+index 52fbf526fe74a..13c71d28e0eb3 100644
+--- a/tools/perf/util/header.c
++++ b/tools/perf/util/header.c
+@@ -1605,8 +1605,15 @@ static int write_pmu_caps(struct feat_fd *ff,
+       int ret;
+       while ((pmu = perf_pmus__scan(pmu))) {
+-              if (!pmu->name || !strcmp(pmu->name, "cpu") ||
+-                  perf_pmu__caps_parse(pmu) <= 0)
++              if (!strcmp(pmu->name, "cpu")) {
++                      /*
++                       * The "cpu" PMU is special and covered by
++                       * HEADER_CPU_PMU_CAPS. Note, core PMUs are
++                       * counted/written here for ARM, s390 and Intel hybrid.
++                       */
++                      continue;
++              }
++              if (perf_pmu__caps_parse(pmu) <= 0)
+                       continue;
+               nr_pmu++;
+       }
+@@ -1619,23 +1626,17 @@ static int write_pmu_caps(struct feat_fd *ff,
+               return 0;
+       /*
+-       * Write hybrid pmu caps first to maintain compatibility with
+-       * older perf tool.
++       * Note older perf tools assume core PMUs come first, this is a property
++       * of perf_pmus__scan.
+        */
+-      if (perf_pmus__num_core_pmus() > 1) {
+-              pmu = NULL;
+-              while ((pmu = perf_pmus__scan_core(pmu))) {
+-                      ret = __write_pmu_caps(ff, pmu, true);
+-                      if (ret < 0)
+-                              return ret;
+-              }
+-      }
+-
+       pmu = NULL;
+       while ((pmu = perf_pmus__scan(pmu))) {
+-              if (pmu->is_core || !pmu->nr_caps)
++              if (!strcmp(pmu->name, "cpu")) {
++                      /* Skip as above. */
++                      continue;
++              }
++              if (perf_pmu__caps_parse(pmu) <= 0)
+                       continue;
+-
+               ret = __write_pmu_caps(ff, pmu, true);
+               if (ret < 0)
+                       return ret;
+-- 
+2.40.1
+
diff --git a/queue-6.5/perf-lock-don-t-pass-an-err_ptr-directly-to-perf_ses.patch b/queue-6.5/perf-lock-don-t-pass-an-err_ptr-directly-to-perf_ses.patch
new file mode 100644 (file)
index 0000000..f6c56cc
--- /dev/null
@@ -0,0 +1,86 @@
+From c8e9e637e7914254e38d0df15c3629d3ec3bd547 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Aug 2023 09:11:21 -0300
+Subject: perf lock: Don't pass an ERR_PTR() directly to perf_session__delete()
+
+From: Arnaldo Carvalho de Melo <acme@redhat.com>
+
+[ Upstream commit abaf1e0355abb050f9c11d2d13a513caec80f7ad ]
+
+While debugging a segfault on 'perf lock contention' without an
+available perf.data file I noticed that it was basically calling:
+
+       perf_session__delete(ERR_PTR(-1))
+
+Resulting in:
+
+  (gdb) run lock contention
+  Starting program: /root/bin/perf lock contention
+  [Thread debugging using libthread_db enabled]
+  Using host libthread_db library "/lib64/libthread_db.so.1".
+  failed to open perf.data: No such file or directory  (try 'perf record' first)
+  Initializing perf session failed
+
+  Program received signal SIGSEGV, Segmentation fault.
+  0x00000000005e7515 in auxtrace__free (session=0xffffffffffffffff) at util/auxtrace.c:2858
+  2858         if (!session->auxtrace)
+  (gdb) p session
+  $1 = (struct perf_session *) 0xffffffffffffffff
+  (gdb) bt
+  #0  0x00000000005e7515 in auxtrace__free (session=0xffffffffffffffff) at util/auxtrace.c:2858
+  #1  0x000000000057bb4d in perf_session__delete (session=0xffffffffffffffff) at util/session.c:300
+  #2  0x000000000047c421 in __cmd_contention (argc=0, argv=0x7fffffffe200) at builtin-lock.c:2161
+  #3  0x000000000047dc95 in cmd_lock (argc=0, argv=0x7fffffffe200) at builtin-lock.c:2604
+  #4  0x0000000000501466 in run_builtin (p=0xe597a8 <commands+552>, argc=2, argv=0x7fffffffe200) at perf.c:322
+  #5  0x00000000005016d5 in handle_internal_command (argc=2, argv=0x7fffffffe200) at perf.c:375
+  #6  0x0000000000501824 in run_argv (argcp=0x7fffffffe02c, argv=0x7fffffffe020) at perf.c:419
+  #7  0x0000000000501b11 in main (argc=2, argv=0x7fffffffe200) at perf.c:535
+  (gdb)
+
+So just set it to NULL after using PTR_ERR(session) to decode the error
+as perf_session__delete(NULL) is supported.
+
+Fixes: eef4fee5e52071d5 ("perf lock: Dynamically allocate lockhash_table")
+Cc: Adrian Hunter <adrian.hunter@intel.com>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Ian Rogers <irogers@google.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: K Prateek Nayak <kprateek.nayak@amd.com>
+Cc: Kan Liang <kan.liang@linux.intel.com>
+Cc: Leo Yan <leo.yan@linaro.org>
+Cc: Mamatha Inamdar <mamatha4@linux.vnet.ibm.com>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Ravi Bangoria <ravi.bangoria@amd.com>
+Cc: Ross Zwisler <zwisler@chromium.org>
+Cc: Sean Christopherson <seanjc@google.com>
+Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Cc: Tiezhu Yang <yangtiezhu@loongson.cn>
+Cc: Yang Jihong <yangjihong1@huawei.com>
+Link: https://lore.kernel.org/lkml/ZN4R1AYfsD2J8lRs@kernel.org
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/builtin-lock.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
+index c15386cb10331..0cf1c5a2e0323 100644
+--- a/tools/perf/builtin-lock.c
++++ b/tools/perf/builtin-lock.c
+@@ -2052,6 +2052,7 @@ static int __cmd_contention(int argc, const char **argv)
+       if (IS_ERR(session)) {
+               pr_err("Initializing perf session failed\n");
+               err = PTR_ERR(session);
++              session = NULL;
+               goto out_delete;
+       }
+-- 
+2.40.1
+
diff --git a/queue-6.5/perf-parse-events-additional-error-reporting.patch b/queue-6.5/perf-parse-events-additional-error-reporting.patch
new file mode 100644 (file)
index 0000000..e44b7d5
--- /dev/null
@@ -0,0 +1,118 @@
+From ba164be9630e048498c6f9bf31d06fb56002782e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 27 Jun 2023 11:10:27 -0700
+Subject: perf parse-events: Additional error reporting
+
+From: Ian Rogers <irogers@google.com>
+
+[ Upstream commit b30d4f0b695428f513c561eeaea52e042ef48550 ]
+
+When no events or PMUs match report an error for event_pmu:
+
+Before:
+```
+$ perf stat -e 'asdfasdf' -a sleep 1
+Run 'perf list' for a list of valid events
+
+ Usage: perf stat [<options>] [<command>]
+
+    -e, --event <event>   event selector. use 'perf list' to list available events
+```
+
+After:
+```
+$ perf stat -e 'asdfasdf' -a sleep 1
+event syntax error: 'asdfasdf'
+                     \___ Bad event name
+
+Unabled to find PMU or event on a PMU of 'asdfasdf'
+Run 'perf list' for a list of valid events
+
+ Usage: perf stat [<options>] [<command>]
+
+    -e, --event <event>   event selector. use 'perf list' to list available events
+```
+
+Fixes the inadvertent removal when hybrid parsing was modified.
+
+Fixes: 70c90e4a6b2fbe77 ("perf parse-events: Avoid scanning PMUs before parsing")
+Signed-off-by: Ian Rogers <irogers@google.com>
+Cc: Adrian Hunter <adrian.hunter@intel.com>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
+Cc: Ian Rogers <irogers@google.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Kan Liang <kan.liang@linux.intel.com>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: bpf@vger.kernel.org
+Link: https://lore.kernel.org/r/20230627181030.95608-11-irogers@google.com
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/util/parse-events.y | 25 +++++++++++++++++++------
+ 1 file changed, 19 insertions(+), 6 deletions(-)
+
+diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
+index 24274c6cf85f1..c590cf7f02a45 100644
+--- a/tools/perf/util/parse-events.y
++++ b/tools/perf/util/parse-events.y
+@@ -293,7 +293,6 @@ PE_NAME opt_pmu_config
+ {
+       struct parse_events_state *parse_state = _parse_state;
+       struct list_head *list = NULL, *orig_terms = NULL, *terms= NULL;
+-      struct parse_events_error *error = parse_state->error;
+       char *pattern = NULL;
+ #define CLEANUP                                               \
+@@ -305,9 +304,6 @@ PE_NAME opt_pmu_config
+               free(pattern);                          \
+       } while(0)
+-      if (error)
+-              error->idx = @1.first_column;
+-
+       if (parse_events_copy_term_list($2, &orig_terms)) {
+               CLEANUP;
+               YYNOMEM;
+@@ -362,6 +358,14 @@ PE_NAME opt_pmu_config
+                       $2 = NULL;
+               }
+               if (!ok) {
++                      struct parse_events_error *error = parse_state->error;
++                      char *help;
++
++                      if (asprintf(&help, "Unabled to find PMU or event on a PMU of '%s'", $1) < 0)
++                              help = NULL;
++                      parse_events_error__handle(error, @1.first_column,
++                                                 strdup("Bad event or PMU"),
++                                                 help);
+                       CLEANUP;
+                       YYABORT;
+               }
+@@ -390,9 +394,18 @@ PE_NAME sep_dc
+       int err;
+       err = parse_events_multi_pmu_add(_parse_state, $1, NULL, &list);
+-      free($1);
+-      if (err < 0)
++      if (err < 0) {
++              struct parse_events_state *parse_state = _parse_state;
++              struct parse_events_error *error = parse_state->error;
++              char *help;
++
++              if (asprintf(&help, "Unabled to find PMU or event on a PMU of '%s'", $1) < 0)
++                      help = NULL;
++              parse_events_error__handle(error, @1.first_column, strdup("Bad event name"), help);
++              free($1);
+               PE_ABORT(err);
++      }
++      free($1);
+       $$ = list;
+ }
+ |
+-- 
+2.40.1
+
diff --git a/queue-6.5/perf-parse-events-move-instances-of-yyabort-to-yynom.patch b/queue-6.5/perf-parse-events-move-instances-of-yyabort-to-yynom.patch
new file mode 100644 (file)
index 0000000..ba5359c
--- /dev/null
@@ -0,0 +1,222 @@
+From 3ec7ba65ce1a9072c1e4376b195f3e0eda99cb45 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 27 Jun 2023 11:10:25 -0700
+Subject: perf parse-events: Move instances of YYABORT to YYNOMEM
+
+From: Ian Rogers <irogers@google.com>
+
+[ Upstream commit 77cdd787fc45e3426b8e0b5038b85c276540dfb4 ]
+
+Migration to improve error reporting as YYABORT cases should carry
+event parsing errors.
+
+Signed-off-by: Ian Rogers <irogers@google.com>
+Cc: Adrian Hunter <adrian.hunter@intel.com>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Kan Liang <kan.liang@linux.intel.com>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: bpf@vger.kernel.org
+Link: https://lore.kernel.org/r/20230627181030.95608-9-irogers@google.com
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Stable-dep-of: b30d4f0b6954 ("perf parse-events: Additional error reporting")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/util/parse-events.y | 58 +++++++++++++++++++++++-----------
+ 1 file changed, 40 insertions(+), 18 deletions(-)
+
+diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
+index 6b996f22dee3a..78c1f49d8d7e4 100644
+--- a/tools/perf/util/parse-events.y
++++ b/tools/perf/util/parse-events.y
+@@ -455,7 +455,8 @@ value_sym '/' event_config '/'
+       bool wildcard = (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE);
+       list = alloc_list();
+-      ABORT_ON(!list);
++      if (!list)
++              YYNOMEM;
+       err = parse_events_add_numeric(_parse_state, list, type, config, $3, wildcard);
+       parse_events_terms__delete($3);
+       if (err) {
+@@ -473,7 +474,8 @@ value_sym sep_slash_slash_dc
+       bool wildcard = (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE);
+       list = alloc_list();
+-      ABORT_ON(!list);
++      if (!list)
++              YYNOMEM;
+       ABORT_ON(parse_events_add_numeric(_parse_state, list, type, config,
+                                         /*head_config=*/NULL, wildcard));
+       $$ = list;
+@@ -484,7 +486,8 @@ PE_VALUE_SYM_TOOL sep_slash_slash_dc
+       struct list_head *list;
+       list = alloc_list();
+-      ABORT_ON(!list);
++      if (!list)
++              YYNOMEM;
+       ABORT_ON(parse_events_add_tool(_parse_state, list, $1));
+       $$ = list;
+ }
+@@ -497,7 +500,9 @@ PE_LEGACY_CACHE opt_event_config
+       int err;
+       list = alloc_list();
+-      ABORT_ON(!list);
++      if (!list)
++              YYNOMEM;
++
+       err = parse_events_add_cache(list, &parse_state->idx, $1, parse_state, $2);
+       parse_events_terms__delete($2);
+@@ -516,7 +521,9 @@ PE_PREFIX_MEM PE_VALUE PE_BP_SLASH PE_VALUE PE_BP_COLON PE_MODIFIER_BP opt_event
+       int err;
+       list = alloc_list();
+-      ABORT_ON(!list);
++      if (!list)
++              YYNOMEM;
++
+       err = parse_events_add_breakpoint(_parse_state, list,
+                                         $2, $6, $4, $7);
+       parse_events_terms__delete($7);
+@@ -534,7 +541,9 @@ PE_PREFIX_MEM PE_VALUE PE_BP_SLASH PE_VALUE opt_event_config
+       int err;
+       list = alloc_list();
+-      ABORT_ON(!list);
++      if (!list)
++              YYNOMEM;
++
+       err = parse_events_add_breakpoint(_parse_state, list,
+                                         $2, NULL, $4, $5);
+       parse_events_terms__delete($5);
+@@ -551,7 +560,9 @@ PE_PREFIX_MEM PE_VALUE PE_BP_COLON PE_MODIFIER_BP opt_event_config
+       int err;
+       list = alloc_list();
+-      ABORT_ON(!list);
++      if (!list)
++              YYNOMEM;
++
+       err = parse_events_add_breakpoint(_parse_state, list,
+                                         $2, $4, 0, $5);
+       parse_events_terms__delete($5);
+@@ -569,7 +580,8 @@ PE_PREFIX_MEM PE_VALUE opt_event_config
+       int err;
+       list = alloc_list();
+-      ABORT_ON(!list);
++      if (!list)
++              YYNOMEM;
+       err = parse_events_add_breakpoint(_parse_state, list,
+                                         $2, NULL, 0, $3);
+       parse_events_terms__delete($3);
+@@ -589,7 +601,8 @@ tracepoint_name opt_event_config
+       int err;
+       list = alloc_list();
+-      ABORT_ON(!list);
++      if (!list)
++              YYNOMEM;
+       if (error)
+               error->idx = @1.first_column;
+@@ -621,7 +634,8 @@ PE_VALUE ':' PE_VALUE opt_event_config
+       int err;
+       list = alloc_list();
+-      ABORT_ON(!list);
++      if (!list)
++              YYNOMEM;
+       err = parse_events_add_numeric(_parse_state, list, (u32)$1, $3, $4,
+                                      /*wildcard=*/false);
+       parse_events_terms__delete($4);
+@@ -640,7 +654,8 @@ PE_RAW opt_event_config
+       u64 num;
+       list = alloc_list();
+-      ABORT_ON(!list);
++      if (!list)
++              YYNOMEM;
+       errno = 0;
+       num = strtoull($1 + 1, NULL, 16);
+       ABORT_ON(errno);
+@@ -663,7 +678,8 @@ PE_BPF_OBJECT opt_event_config
+       int err;
+       list = alloc_list();
+-      ABORT_ON(!list);
++      if (!list)
++              YYNOMEM;
+       err = parse_events_load_bpf(parse_state, list, $1, false, $2);
+       parse_events_terms__delete($2);
+       free($1);
+@@ -680,7 +696,8 @@ PE_BPF_SOURCE opt_event_config
+       int err;
+       list = alloc_list();
+-      ABORT_ON(!list);
++      if (!list)
++              YYNOMEM;
+       err = parse_events_load_bpf(_parse_state, list, $1, true, $2);
+       parse_events_terms__delete($2);
+       if (err) {
+@@ -745,7 +762,8 @@ event_term
+       struct list_head *head = malloc(sizeof(*head));
+       struct parse_events_term *term = $1;
+-      ABORT_ON(!head);
++      if (!head)
++              YYNOMEM;
+       INIT_LIST_HEAD(head);
+       list_add_tail(&term->list, head);
+       $$ = head;
+@@ -922,7 +940,8 @@ PE_DRV_CFG_TERM
+       struct parse_events_term *term;
+       char *config = strdup($1);
+-      ABORT_ON(!config);
++      if (!config)
++              YYNOMEM;
+       if (parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_DRV_CFG,
+                                       config, $1, &@1, NULL)) {
+               free($1);
+@@ -953,7 +972,8 @@ array_terms ',' array_term
+       new_array.ranges = realloc($1.ranges,
+                               sizeof(new_array.ranges[0]) *
+                               new_array.nr_ranges);
+-      ABORT_ON(!new_array.ranges);
++      if (!new_array.ranges)
++              YYNOMEM;
+       memcpy(&new_array.ranges[$1.nr_ranges], $3.ranges,
+              $3.nr_ranges * sizeof(new_array.ranges[0]));
+       free($3.ranges);
+@@ -969,7 +989,8 @@ PE_VALUE
+       array.nr_ranges = 1;
+       array.ranges = malloc(sizeof(array.ranges[0]));
+-      ABORT_ON(!array.ranges);
++      if (!array.ranges)
++              YYNOMEM;
+       array.ranges[0].start = $1;
+       array.ranges[0].length = 1;
+       $$ = array;
+@@ -982,7 +1003,8 @@ PE_VALUE PE_ARRAY_RANGE PE_VALUE
+       ABORT_ON($3 < $1);
+       array.nr_ranges = 1;
+       array.ranges = malloc(sizeof(array.ranges[0]));
+-      ABORT_ON(!array.ranges);
++      if (!array.ranges)
++              YYNOMEM;
+       array.ranges[0].start = $1;
+       array.ranges[0].length = $3 - $1 + 1;
+       $$ = array;
+-- 
+2.40.1
+
diff --git a/queue-6.5/perf-parse-events-separate-enomem-memory-handling.patch b/queue-6.5/perf-parse-events-separate-enomem-memory-handling.patch
new file mode 100644 (file)
index 0000000..ba1dbe0
--- /dev/null
@@ -0,0 +1,420 @@
+From a0e926e7d9fcd12defbf6881c3ecc876a4194789 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 27 Jun 2023 11:10:26 -0700
+Subject: perf parse-events: Separate ENOMEM memory handling
+
+From: Ian Rogers <irogers@google.com>
+
+[ Upstream commit b52cb995f1a559bc6e1a7cdc0ed0375503528541 ]
+
+Add PE_ABORT that will YYNOMEM or YYABORT accordingly.
+
+Signed-off-by: Ian Rogers <irogers@google.com>
+Cc: Adrian Hunter <adrian.hunter@intel.com>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Kan Liang <kan.liang@linux.intel.com>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: bpf@vger.kernel.org
+Link: https://lore.kernel.org/r/20230627181030.95608-10-irogers@google.com
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Stable-dep-of: b30d4f0b6954 ("perf parse-events: Additional error reporting")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/util/parse-events.y | 134 ++++++++++++++++++++-------------
+ 1 file changed, 82 insertions(+), 52 deletions(-)
+
+diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
+index 78c1f49d8d7e4..24274c6cf85f1 100644
+--- a/tools/perf/util/parse-events.y
++++ b/tools/perf/util/parse-events.y
+@@ -28,6 +28,13 @@ do { \
+               YYABORT; \
+ } while (0)
++#define PE_ABORT(val) \
++do { \
++      if (val == -ENOMEM) \
++              YYNOMEM; \
++      YYABORT; \
++} while (0)
++
+ static struct list_head* alloc_list(void)
+ {
+       struct list_head *list;
+@@ -385,7 +392,7 @@ PE_NAME sep_dc
+       err = parse_events_multi_pmu_add(_parse_state, $1, NULL, &list);
+       free($1);
+       if (err < 0)
+-              YYABORT;
++              PE_ABORT(err);
+       $$ = list;
+ }
+ |
+@@ -461,7 +468,7 @@ value_sym '/' event_config '/'
+       parse_events_terms__delete($3);
+       if (err) {
+               free_list_evsel(list);
+-              YYABORT;
++              PE_ABORT(err);
+       }
+       $$ = list;
+ }
+@@ -472,23 +479,28 @@ value_sym sep_slash_slash_dc
+       int type = $1 >> 16;
+       int config = $1 & 255;
+       bool wildcard = (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE);
++      int err;
+       list = alloc_list();
+       if (!list)
+               YYNOMEM;
+-      ABORT_ON(parse_events_add_numeric(_parse_state, list, type, config,
+-                                        /*head_config=*/NULL, wildcard));
++      err = parse_events_add_numeric(_parse_state, list, type, config, /*head_config=*/NULL, wildcard);
++      if (err)
++              PE_ABORT(err);
+       $$ = list;
+ }
+ |
+ PE_VALUE_SYM_TOOL sep_slash_slash_dc
+ {
+       struct list_head *list;
++      int err;
+       list = alloc_list();
+       if (!list)
+               YYNOMEM;
+-      ABORT_ON(parse_events_add_tool(_parse_state, list, $1));
++      err = parse_events_add_tool(_parse_state, list, $1);
++      if (err)
++              YYNOMEM;
+       $$ = list;
+ }
+@@ -509,7 +521,7 @@ PE_LEGACY_CACHE opt_event_config
+       free($1);
+       if (err) {
+               free_list_evsel(list);
+-              YYABORT;
++              PE_ABORT(err);
+       }
+       $$ = list;
+ }
+@@ -530,7 +542,7 @@ PE_PREFIX_MEM PE_VALUE PE_BP_SLASH PE_VALUE PE_BP_COLON PE_MODIFIER_BP opt_event
+       free($6);
+       if (err) {
+               free(list);
+-              YYABORT;
++              PE_ABORT(err);
+       }
+       $$ = list;
+ }
+@@ -549,7 +561,7 @@ PE_PREFIX_MEM PE_VALUE PE_BP_SLASH PE_VALUE opt_event_config
+       parse_events_terms__delete($5);
+       if (err) {
+               free(list);
+-              YYABORT;
++              PE_ABORT(err);
+       }
+       $$ = list;
+ }
+@@ -569,7 +581,7 @@ PE_PREFIX_MEM PE_VALUE PE_BP_COLON PE_MODIFIER_BP opt_event_config
+       free($4);
+       if (err) {
+               free(list);
+-              YYABORT;
++              PE_ABORT(err);
+       }
+       $$ = list;
+ }
+@@ -587,7 +599,7 @@ PE_PREFIX_MEM PE_VALUE opt_event_config
+       parse_events_terms__delete($3);
+       if (err) {
+               free(list);
+-              YYABORT;
++              PE_ABORT(err);
+       }
+       $$ = list;
+ }
+@@ -614,7 +626,7 @@ tracepoint_name opt_event_config
+       free($1.event);
+       if (err) {
+               free(list);
+-              YYABORT;
++              PE_ABORT(err);
+       }
+       $$ = list;
+ }
+@@ -641,7 +653,7 @@ PE_VALUE ':' PE_VALUE opt_event_config
+       parse_events_terms__delete($4);
+       if (err) {
+               free(list);
+-              YYABORT;
++              PE_ABORT(err);
+       }
+       $$ = list;
+ }
+@@ -665,7 +677,7 @@ PE_RAW opt_event_config
+       parse_events_terms__delete($2);
+       if (err) {
+               free(list);
+-              YYABORT;
++              PE_ABORT(err);
+       }
+       $$ = list;
+ }
+@@ -685,7 +697,7 @@ PE_BPF_OBJECT opt_event_config
+       free($1);
+       if (err) {
+               free(list);
+-              YYABORT;
++              PE_ABORT(err);
+       }
+       $$ = list;
+ }
+@@ -702,7 +714,7 @@ PE_BPF_SOURCE opt_event_config
+       parse_events_terms__delete($2);
+       if (err) {
+               free(list);
+-              YYABORT;
++              PE_ABORT(err);
+       }
+       $$ = list;
+ }
+@@ -777,11 +789,12 @@ event_term:
+ PE_RAW
+ {
+       struct parse_events_term *term;
++      int err = parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_RAW,
++                                       strdup("raw"), $1, &@1, &@1);
+-      if (parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_RAW,
+-                                      strdup("raw"), $1, &@1, &@1)) {
++      if (err) {
+               free($1);
+-              YYABORT;
++              PE_ABORT(err);
+       }
+       $$ = term;
+ }
+@@ -789,12 +802,12 @@ PE_RAW
+ name_or_raw '=' name_or_legacy
+ {
+       struct parse_events_term *term;
++      int err = parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER, $1, $3, &@1, &@3);
+-      if (parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER,
+-                                      $1, $3, &@1, &@3)) {
++      if (err) {
+               free($1);
+               free($3);
+-              YYABORT;
++              PE_ABORT(err);
+       }
+       $$ = term;
+ }
+@@ -802,11 +815,12 @@ name_or_raw '=' name_or_legacy
+ name_or_raw '=' PE_VALUE
+ {
+       struct parse_events_term *term;
++      int err = parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
++                                       $1, $3, false, &@1, &@3);
+-      if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
+-                                      $1, $3, false, &@1, &@3)) {
++      if (err) {
+               free($1);
+-              YYABORT;
++              PE_ABORT(err);
+       }
+       $$ = term;
+ }
+@@ -814,12 +828,13 @@ name_or_raw '=' PE_VALUE
+ name_or_raw '=' PE_TERM_HW
+ {
+       struct parse_events_term *term;
++      int err = parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER,
++                                       $1, $3.str, &@1, &@3);
+-      if (parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER,
+-                                      $1, $3.str, &@1, &@3)) {
++      if (err) {
+               free($1);
+               free($3.str);
+-              YYABORT;
++              PE_ABORT(err);
+       }
+       $$ = term;
+ }
+@@ -827,11 +842,12 @@ name_or_raw '=' PE_TERM_HW
+ PE_LEGACY_CACHE
+ {
+       struct parse_events_term *term;
++      int err = parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE,
++                                       $1, 1, true, &@1, NULL);
+-      if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE,
+-                                      $1, 1, true, &@1, NULL)) {
++      if (err) {
+               free($1);
+-              YYABORT;
++              PE_ABORT(err);
+       }
+       $$ = term;
+ }
+@@ -839,11 +855,12 @@ PE_LEGACY_CACHE
+ PE_NAME
+ {
+       struct parse_events_term *term;
++      int err = parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
++                                       $1, 1, true, &@1, NULL);
+-      if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
+-                                      $1, 1, true, &@1, NULL)) {
++      if (err) {
+               free($1);
+-              YYABORT;
++              PE_ABORT(err);
+       }
+       $$ = term;
+ }
+@@ -851,11 +868,12 @@ PE_NAME
+ PE_TERM_HW
+ {
+       struct parse_events_term *term;
++      int err = parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_HARDWARE,
++                                       $1.str, $1.num & 255, false, &@1, NULL);
+-      if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_HARDWARE,
+-                                 $1.str, $1.num & 255, false, &@1, NULL)) {
++      if (err) {
+               free($1.str);
+-              YYABORT;
++              PE_ABORT(err);
+       }
+       $$ = term;
+ }
+@@ -863,10 +881,11 @@ PE_TERM_HW
+ PE_TERM '=' name_or_legacy
+ {
+       struct parse_events_term *term;
++      int err = parse_events_term__str(&term, (int)$1, NULL, $3, &@1, &@3);
+-      if (parse_events_term__str(&term, (int)$1, NULL, $3, &@1, &@3)) {
++      if (err) {
+               free($3);
+-              YYABORT;
++              PE_ABORT(err);
+       }
+       $$ = term;
+ }
+@@ -874,10 +893,11 @@ PE_TERM '=' name_or_legacy
+ PE_TERM '=' PE_TERM_HW
+ {
+       struct parse_events_term *term;
++      int err = parse_events_term__str(&term, (int)$1, NULL, $3.str, &@1, &@3);
+-      if (parse_events_term__str(&term, (int)$1, NULL, $3.str, &@1, &@3)) {
++      if (err) {
+               free($3.str);
+-              YYABORT;
++              PE_ABORT(err);
+       }
+       $$ = term;
+ }
+@@ -885,37 +905,46 @@ PE_TERM '=' PE_TERM_HW
+ PE_TERM '=' PE_TERM
+ {
+       struct parse_events_term *term;
++      int err = parse_events_term__term(&term, (int)$1, (int)$3, &@1, &@3);
++
++      if (err)
++              PE_ABORT(err);
+-      ABORT_ON(parse_events_term__term(&term, (int)$1, (int)$3, &@1, &@3));
+       $$ = term;
+ }
+ |
+ PE_TERM '=' PE_VALUE
+ {
+       struct parse_events_term *term;
++      int err = parse_events_term__num(&term, (int)$1, NULL, $3, false, &@1, &@3);
++
++      if (err)
++              PE_ABORT(err);
+-      ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, $3, false, &@1, &@3));
+       $$ = term;
+ }
+ |
+ PE_TERM
+ {
+       struct parse_events_term *term;
++      int err = parse_events_term__num(&term, (int)$1, NULL, 1, true, &@1, NULL);
++
++      if (err)
++              PE_ABORT(err);
+-      ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, 1, true, &@1, NULL));
+       $$ = term;
+ }
+ |
+ name_or_raw array '=' name_or_legacy
+ {
+       struct parse_events_term *term;
++      int err = parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER, $1, $4, &@1, &@4);
+-      if (parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER,
+-                                      $1, $4, &@1, &@4)) {
++      if (err) {
+               free($1);
+               free($4);
+               free($2.ranges);
+-              YYABORT;
++              PE_ABORT(err);
+       }
+       term->array = $2;
+       $$ = term;
+@@ -924,12 +953,12 @@ name_or_raw array '=' name_or_legacy
+ name_or_raw array '=' PE_VALUE
+ {
+       struct parse_events_term *term;
++      int err = parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER, $1, $4, false, &@1, &@4);
+-      if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
+-                                      $1, $4, false, &@1, &@4)) {
++      if (err) {
+               free($1);
+               free($2.ranges);
+-              YYABORT;
++              PE_ABORT(err);
+       }
+       term->array = $2;
+       $$ = term;
+@@ -939,14 +968,15 @@ PE_DRV_CFG_TERM
+ {
+       struct parse_events_term *term;
+       char *config = strdup($1);
++      int err;
+       if (!config)
+               YYNOMEM;
+-      if (parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_DRV_CFG,
+-                                      config, $1, &@1, NULL)) {
++      err = parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_DRV_CFG, config, $1, &@1, NULL);
++      if (err) {
+               free($1);
+               free(config);
+-              YYABORT;
++              PE_ABORT(err);
+       }
+       $$ = term;
+ }
+-- 
+2.40.1
+
diff --git a/queue-6.5/perf-parse-events-separate-yyabort-and-yynomem-cases.patch b/queue-6.5/perf-parse-events-separate-yyabort-and-yynomem-cases.patch
new file mode 100644 (file)
index 0000000..c961ff0
--- /dev/null
@@ -0,0 +1,126 @@
+From 7c0a5b85371d2c0d55a3d5f8671cb8a6b0a3acb8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 27 Jun 2023 11:10:24 -0700
+Subject: perf parse-events: Separate YYABORT and YYNOMEM cases
+
+From: Ian Rogers <irogers@google.com>
+
+[ Upstream commit a7a3252dad354a9e5c173156dab959e4019b9467 ]
+
+Split cases in event_pmu for greater accuracy.
+
+Signed-off-by: Ian Rogers <irogers@google.com>
+Cc: Adrian Hunter <adrian.hunter@intel.com>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Kan Liang <kan.liang@linux.intel.com>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: bpf@vger.kernel.org
+Link: https://lore.kernel.org/r/20230627181030.95608-8-irogers@google.com
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Stable-dep-of: b30d4f0b6954 ("perf parse-events: Additional error reporting")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/util/parse-events.y | 45 ++++++++++++++++++++--------------
+ 1 file changed, 26 insertions(+), 19 deletions(-)
+
+diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
+index 9f28d4b5502f1..6b996f22dee3a 100644
+--- a/tools/perf/util/parse-events.y
++++ b/tools/perf/util/parse-events.y
+@@ -285,37 +285,42 @@ event_pmu:
+ PE_NAME opt_pmu_config
+ {
+       struct parse_events_state *parse_state = _parse_state;
+-      struct parse_events_error *error = parse_state->error;
+       struct list_head *list = NULL, *orig_terms = NULL, *terms= NULL;
++      struct parse_events_error *error = parse_state->error;
+       char *pattern = NULL;
+-#define CLEANUP_YYABORT                                       \
++#define CLEANUP                                               \
+       do {                                            \
+               parse_events_terms__delete($2);         \
+               parse_events_terms__delete(orig_terms); \
+               free(list);                             \
+               free($1);                               \
+               free(pattern);                          \
+-              YYABORT;                                \
+       } while(0)
+-      if (parse_events_copy_term_list($2, &orig_terms))
+-              CLEANUP_YYABORT;
+-
+       if (error)
+               error->idx = @1.first_column;
++      if (parse_events_copy_term_list($2, &orig_terms)) {
++              CLEANUP;
++              YYNOMEM;
++      }
++
+       list = alloc_list();
+-      if (!list)
+-              CLEANUP_YYABORT;
++      if (!list) {
++              CLEANUP;
++              YYNOMEM;
++      }
+       /* Attempt to add to list assuming $1 is a PMU name. */
+       if (parse_events_add_pmu(parse_state, list, $1, $2, /*auto_merge_stats=*/false)) {
+               struct perf_pmu *pmu = NULL;
+               int ok = 0;
+               /* Failure to add, try wildcard expansion of $1 as a PMU name. */
+-              if (asprintf(&pattern, "%s*", $1) < 0)
+-                      CLEANUP_YYABORT;
++              if (asprintf(&pattern, "%s*", $1) < 0) {
++                      CLEANUP;
++                      YYNOMEM;
++              }
+               while ((pmu = perf_pmus__scan(pmu)) != NULL) {
+                       char *name = pmu->name;
+@@ -330,8 +335,10 @@ PE_NAME opt_pmu_config
+                           !perf_pmu__match(pattern, pmu->alias_name, $1)) {
+                               bool auto_merge_stats = perf_pmu__auto_merge_stats(pmu);
+-                              if (parse_events_copy_term_list(orig_terms, &terms))
+-                                      CLEANUP_YYABORT;
++                              if (parse_events_copy_term_list(orig_terms, &terms)) {
++                                      CLEANUP;
++                                      YYNOMEM;
++                              }
+                               if (!parse_events_add_pmu(parse_state, list, pmu->name, terms,
+                                                         auto_merge_stats)) {
+                                       ok++;
+@@ -347,15 +354,15 @@ PE_NAME opt_pmu_config
+                       ok = !parse_events_multi_pmu_add(parse_state, $1, $2, &list);
+                       $2 = NULL;
+               }
+-              if (!ok)
+-                      CLEANUP_YYABORT;
++              if (!ok) {
++                      CLEANUP;
++                      YYABORT;
++              }
+       }
+-      parse_events_terms__delete($2);
+-      parse_events_terms__delete(orig_terms);
+-      free(pattern);
+-      free($1);
+       $$ = list;
+-#undef CLEANUP_YYABORT
++      list = NULL;
++      CLEANUP;
++#undef CLEANUP
+ }
+ |
+ PE_KERNEL_PMU_EVENT sep_dc
+-- 
+2.40.1
+
diff --git a/queue-6.5/perf-script-print-cgroup-field-on-the-same-line-as-c.patch b/queue-6.5/perf-script-print-cgroup-field-on-the-same-line-as-c.patch
new file mode 100644 (file)
index 0000000..9ccc491
--- /dev/null
@@ -0,0 +1,109 @@
+From 83a0be4ed36aefb4c05ed1696da45134d8f377f4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Jul 2023 17:07:37 -0700
+Subject: perf script: Print "cgroup" field on the same line as "comm"
+
+From: Ivan Babrou <ivan@cloudflare.com>
+
+[ Upstream commit 8c49c6e1a7b790c4cb9f464c5485117451d91c60 ]
+
+Commit 3fd7a168bf51 ("perf script: Add 'cgroup' field for output")
+added support for printing cgroup path in perf script output.
+
+It was okay if you didn't want any stacks:
+
+    $ sudo perf script --comms jpegtran:23f4bf -F comm,tid,cpu,time,cgroup
+    jpegtran:23f4bf 3321915 [013] 404718.587488:  /idle.slice/polish.service
+    jpegtran:23f4bf 3321915 [031] 404718.592073:  /idle.slice/polish.service
+
+With stacks it gets messier as cgroup is printed after the stack:
+
+    $ perf script --comms jpegtran:23f4bf -F comm,tid,cpu,time,cgroup,ip,sym
+    jpegtran:23f4bf 3321915 [013] 404718.587488:
+                    5c554 compress_output
+                    570d9 jpeg_finish_compress
+                    3476e jpegtran_main
+                    330ee jpegtran::main
+                    326e2 core::ops::function::FnOnce::call_once (inlined)
+                    326e2 std::sys_common::backtrace::__rust_begin_short_backtrace
+    /idle.slice/polish.service
+    jpegtran:23f4bf 3321915 [031] 404718.592073:
+                    8474d jsimd_encode_mcu_AC_first_prepare_sse2.PADDING
+                55af68e62fff [unknown]
+    /idle.slice/polish.service
+
+Let's instead print cgroup on the same line as comm:
+
+    $ perf script --comms jpegtran:23f4bf -F comm,tid,cpu,time,cgroup,ip,sym
+    jpegtran:23f4bf 3321915 [013] 404718.587488:  /idle.slice/polish.service
+                    5c554 compress_output
+                    570d9 jpeg_finish_compress
+                    3476e jpegtran_main
+                    330ee jpegtran::main
+                    326e2 core::ops::function::FnOnce::call_once (inlined)
+                    326e2 std::sys_common::backtrace::__rust_begin_short_backtrace
+
+    jpegtran:23f4bf 3321915 [031] 404718.592073:  /idle.slice/polish.service
+                    8474d jsimd_encode_mcu_AC_first_prepare_sse2.PADDING
+                55af68e62fff [unknown]
+
+Fixes: 3fd7a168bf514979 ("perf script: Add 'cgroup' field for output")
+Signed-off-by: Ivan Babrou <ivan@cloudflare.com>
+Acked-by: Ian Rogers <irogers@google.com>
+Acked-by: Namhyung Kim <namhyung@kernel.org>
+Cc: Adrian Hunter <adrian.hunter@intel.com>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: kernel-team@cloudflare.com
+Link: https://lore.kernel.org/r/20230718000737.49077-1-ivan@cloudflare.com
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/builtin-script.c | 22 +++++++++++-----------
+ 1 file changed, 11 insertions(+), 11 deletions(-)
+
+diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
+index 200b3e7ea8dad..517bf25750c8b 100644
+--- a/tools/perf/builtin-script.c
++++ b/tools/perf/builtin-script.c
+@@ -2199,6 +2199,17 @@ static void process_event(struct perf_script *script,
+       if (PRINT_FIELD(RETIRE_LAT))
+               fprintf(fp, "%16" PRIu16, sample->retire_lat);
++      if (PRINT_FIELD(CGROUP)) {
++              const char *cgrp_name;
++              struct cgroup *cgrp = cgroup__find(machine->env,
++                                                 sample->cgroup);
++              if (cgrp != NULL)
++                      cgrp_name = cgrp->name;
++              else
++                      cgrp_name = "unknown";
++              fprintf(fp, " %s", cgrp_name);
++      }
++
+       if (PRINT_FIELD(IP)) {
+               struct callchain_cursor *cursor = NULL;
+@@ -2243,17 +2254,6 @@ static void process_event(struct perf_script *script,
+       if (PRINT_FIELD(CODE_PAGE_SIZE))
+               fprintf(fp, " %s", get_page_size_name(sample->code_page_size, str));
+-      if (PRINT_FIELD(CGROUP)) {
+-              const char *cgrp_name;
+-              struct cgroup *cgrp = cgroup__find(machine->env,
+-                                                 sample->cgroup);
+-              if (cgrp != NULL)
+-                      cgrp_name = cgrp->name;
+-              else
+-                      cgrp_name = "unknown";
+-              fprintf(fp, " %s", cgrp_name);
+-      }
+-
+       perf_sample__fprintf_ipc(sample, attr, fp);
+       fprintf(fp, "\n");
+-- 
+2.40.1
+
diff --git a/queue-6.5/perf-test-stat_bpf_counters_cgrp-enhance-perf-stat-c.patch b/queue-6.5/perf-test-stat_bpf_counters_cgrp-enhance-perf-stat-c.patch
new file mode 100644 (file)
index 0000000..729f6b3
--- /dev/null
@@ -0,0 +1,51 @@
+From 8d405e9da61c9be1d5a5c549262a6a0bf0a0d42f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 25 Aug 2023 09:41:52 -0700
+Subject: perf test stat_bpf_counters_cgrp: Enhance perf stat cgroup BPF
+ counter test
+
+From: Namhyung Kim <namhyung@kernel.org>
+
+[ Upstream commit a84260e314029e6dc9904fd6eabf8d9fd7965351 ]
+
+It has system-wide test and cpu-list test but the cpu-list test fails
+sometimes.  It runs sleep command on CPU1 and measure both user.slice
+and system.slice cgroups by default (on systemd-based systems).
+
+But if the system was idle enough, sometime the system.slice gets no
+count and it makes the test failing.  Maybe that's because it only looks
+at the CPU1, let's add CPU0 to increase the chance it finds some tasks.
+
+Fixes: 7901086014bbaa3a ("perf test: Add a new test for perf stat cgroup BPF counter")
+Reported-by: Arnaldo Carvalho de Melo <acme@kernel.org>
+Signed-off-by: Namhyung Kim <namhyung@kernel.org>
+Cc: Adrian Hunter <adrian.hunter@intel.com>
+Cc: Ian Rogers <irogers@google.com>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: bpf@vger.kernel.org
+Link: https://lore.kernel.org/r/20230825164152.165610-3-namhyung@kernel.org
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/tests/shell/stat_bpf_counters_cgrp.sh | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/perf/tests/shell/stat_bpf_counters_cgrp.sh b/tools/perf/tests/shell/stat_bpf_counters_cgrp.sh
+index a74440a00b6b6..e75d0780dc788 100755
+--- a/tools/perf/tests/shell/stat_bpf_counters_cgrp.sh
++++ b/tools/perf/tests/shell/stat_bpf_counters_cgrp.sh
+@@ -60,7 +60,7 @@ check_system_wide_counted()
+ check_cpu_list_counted()
+ {
+-      check_cpu_list_counted_output=$(perf stat -C 1 --bpf-counters --for-each-cgroup ${test_cgroups} -e cpu-clock -x, taskset -c 1 sleep 1  2>&1)
++      check_cpu_list_counted_output=$(perf stat -C 0,1 --bpf-counters --for-each-cgroup ${test_cgroups} -e cpu-clock -x, taskset -c 1 sleep 1  2>&1)
+       if echo ${check_cpu_list_counted_output} | grep -q -F "<not "; then
+               echo "Some CPU events are not counted"
+               if [ "${verbose}" = "1" ]; then
+-- 
+2.40.1
+
diff --git a/queue-6.5/perf-test-stat_bpf_counters_cgrp-fix-shellcheck-issu.patch b/queue-6.5/perf-test-stat_bpf_counters_cgrp-fix-shellcheck-issu.patch
new file mode 100644 (file)
index 0000000..b3a7efe
--- /dev/null
@@ -0,0 +1,121 @@
+From 5497f55181aca6db9a15b6e535cbfc00f10a0c52 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 9 Jul 2023 23:57:39 +0530
+Subject: perf test stat_bpf_counters_cgrp: Fix shellcheck issue about logical
+ operators
+
+From: Kajol Jain <kjain@linux.ibm.com>
+
+[ Upstream commit 0dd1f815545d7210150642741c364521cc5cf116 ]
+
+Running shellcheck on lock_contention.sh generates below warning:
+
+In stat_bpf_counters_cgrp.sh line 28:
+       if [ -d /sys/fs/cgroup/system.slice -a -d /sys/fs/cgroup/user.slice ]; then
+                                            ^-- SC2166 (warning): Prefer [ p ] && [ q ] as [ p -a q ] is not well defined.
+
+In stat_bpf_counters_cgrp.sh line 34:
+       local self_cgrp=$(grep perf_event /proc/self/cgroup | cut -d: -f3)
+        ^-------------^ SC3043 (warning): In POSIX sh, 'local' is undefined.
+              ^-------^ SC2155 (warning): Declare and assign separately to avoid masking return values.
+                        ^-- SC2046 (warning): Quote this to prevent word splitting.
+
+In stat_bpf_counters_cgrp.sh line 51:
+       local output
+        ^----------^ SC3043 (warning): In POSIX sh, 'local' is undefined.
+
+In stat_bpf_counters_cgrp.sh line 65:
+       local output
+        ^----------^ SC3043 (warning): In POSIX sh, 'local' is undefined.
+
+Fixed above warnings by:
+- Changing the expression [p -a q] to [p] && [q].
+- Fixing shellcheck warnings for local usage, by prefixing
+  function name to the variable.
+
+Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
+Acked-by: Ian Rogers <irogers@google.com>
+Cc: Disha Goel <disgoel@linux.vnet.ibm.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: linuxppc-dev@lists.ozlabs.org
+Link: https://lore.kernel.org/r/20230709182800.53002-6-atrajeev@linux.vnet.ibm.com
+Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Stable-dep-of: a84260e31402 ("perf test stat_bpf_counters_cgrp: Enhance perf stat cgroup BPF counter test")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../tests/shell/stat_bpf_counters_cgrp.sh     | 28 ++++++++-----------
+ 1 file changed, 12 insertions(+), 16 deletions(-)
+
+diff --git a/tools/perf/tests/shell/stat_bpf_counters_cgrp.sh b/tools/perf/tests/shell/stat_bpf_counters_cgrp.sh
+index d724855d097c2..a74440a00b6b6 100755
+--- a/tools/perf/tests/shell/stat_bpf_counters_cgrp.sh
++++ b/tools/perf/tests/shell/stat_bpf_counters_cgrp.sh
+@@ -25,22 +25,22 @@ check_bpf_counter()
+ find_cgroups()
+ {
+       # try usual systemd slices first
+-      if [ -d /sys/fs/cgroup/system.slice -a -d /sys/fs/cgroup/user.slice ]; then
++      if [ -d /sys/fs/cgroup/system.slice ] && [ -d /sys/fs/cgroup/user.slice ]; then
+               test_cgroups="system.slice,user.slice"
+               return
+       fi
+       # try root and self cgroups
+-      local self_cgrp=$(grep perf_event /proc/self/cgroup | cut -d: -f3)
+-      if [ -z ${self_cgrp} ]; then
++      find_cgroups_self_cgrp=$(grep perf_event /proc/self/cgroup | cut -d: -f3)
++      if [ -z ${find_cgroups_self_cgrp} ]; then
+               # cgroup v2 doesn't specify perf_event
+-              self_cgrp=$(grep ^0: /proc/self/cgroup | cut -d: -f3)
++              find_cgroups_self_cgrp=$(grep ^0: /proc/self/cgroup | cut -d: -f3)
+       fi
+-      if [ -z ${self_cgrp} ]; then
++      if [ -z ${find_cgroups_self_cgrp} ]; then
+               test_cgroups="/"
+       else
+-              test_cgroups="/,${self_cgrp}"
++              test_cgroups="/,${find_cgroups_self_cgrp}"
+       fi
+ }
+@@ -48,13 +48,11 @@ find_cgroups()
+ # Just check if it runs without failure and has non-zero results.
+ check_system_wide_counted()
+ {
+-      local output
+-
+-      output=$(perf stat -a --bpf-counters --for-each-cgroup ${test_cgroups} -e cpu-clock -x, sleep 1  2>&1)
+-      if echo ${output} | grep -q -F "<not "; then
++      check_system_wide_counted_output=$(perf stat -a --bpf-counters --for-each-cgroup ${test_cgroups} -e cpu-clock -x, sleep 1  2>&1)
++      if echo ${check_system_wide_counted_output} | grep -q -F "<not "; then
+               echo "Some system-wide events are not counted"
+               if [ "${verbose}" = "1" ]; then
+-                      echo ${output}
++                      echo ${check_system_wide_counted_output}
+               fi
+               exit 1
+       fi
+@@ -62,13 +60,11 @@ check_system_wide_counted()
+ check_cpu_list_counted()
+ {
+-      local output
+-
+-      output=$(perf stat -C 1 --bpf-counters --for-each-cgroup ${test_cgroups} -e cpu-clock -x, taskset -c 1 sleep 1  2>&1)
+-      if echo ${output} | grep -q -F "<not "; then
++      check_cpu_list_counted_output=$(perf stat -C 1 --bpf-counters --for-each-cgroup ${test_cgroups} -e cpu-clock -x, taskset -c 1 sleep 1  2>&1)
++      if echo ${check_cpu_list_counted_output} | grep -q -F "<not "; then
+               echo "Some CPU events are not counted"
+               if [ "${verbose}" = "1" ]; then
+-                      echo ${output}
++                      echo ${check_cpu_list_counted_output}
+               fi
+               exit 1
+       fi
+-- 
+2.40.1
+
diff --git a/queue-6.5/perf-top-don-t-pass-an-err_ptr-directly-to-perf_sess.patch b/queue-6.5/perf-top-don-t-pass-an-err_ptr-directly-to-perf_sess.patch
new file mode 100644 (file)
index 0000000..7b5a111
--- /dev/null
@@ -0,0 +1,85 @@
+From fcb15a593a27bad0dadeb567c0e72ecae3d9a997 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Aug 2023 09:11:21 -0300
+Subject: perf top: Don't pass an ERR_PTR() directly to perf_session__delete()
+
+From: Arnaldo Carvalho de Melo <acme@redhat.com>
+
+[ Upstream commit ef23cb593304bde0cc046fd4cc83ae7ea2e24f16 ]
+
+While debugging a segfault on 'perf lock contention' without an
+available perf.data file I noticed that it was basically calling:
+
+       perf_session__delete(ERR_PTR(-1))
+
+Resulting in:
+
+  (gdb) run lock contention
+  Starting program: /root/bin/perf lock contention
+  [Thread debugging using libthread_db enabled]
+  Using host libthread_db library "/lib64/libthread_db.so.1".
+  failed to open perf.data: No such file or directory  (try 'perf record' first)
+  Initializing perf session failed
+
+  Program received signal SIGSEGV, Segmentation fault.
+  0x00000000005e7515 in auxtrace__free (session=0xffffffffffffffff) at util/auxtrace.c:2858
+  2858         if (!session->auxtrace)
+  (gdb) p session
+  $1 = (struct perf_session *) 0xffffffffffffffff
+  (gdb) bt
+  #0  0x00000000005e7515 in auxtrace__free (session=0xffffffffffffffff) at util/auxtrace.c:2858
+  #1  0x000000000057bb4d in perf_session__delete (session=0xffffffffffffffff) at util/session.c:300
+  #2  0x000000000047c421 in __cmd_contention (argc=0, argv=0x7fffffffe200) at builtin-lock.c:2161
+  #3  0x000000000047dc95 in cmd_lock (argc=0, argv=0x7fffffffe200) at builtin-lock.c:2604
+  #4  0x0000000000501466 in run_builtin (p=0xe597a8 <commands+552>, argc=2, argv=0x7fffffffe200) at perf.c:322
+  #5  0x00000000005016d5 in handle_internal_command (argc=2, argv=0x7fffffffe200) at perf.c:375
+  #6  0x0000000000501824 in run_argv (argcp=0x7fffffffe02c, argv=0x7fffffffe020) at perf.c:419
+  #7  0x0000000000501b11 in main (argc=2, argv=0x7fffffffe200) at perf.c:535
+  (gdb)
+
+So just set it to NULL after using PTR_ERR(session) to decode the error
+as perf_session__delete(NULL) is supported.
+
+The same problem was found in 'perf top' after an audit of all
+perf_session__new() failure handling.
+
+Fixes: 6ef81c55a2b6584c ("perf session: Return error code for perf_session__new() function on failure")
+Cc: Adrian Hunter <adrian.hunter@intel.com>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Alexey Budankov <alexey.budankov@linux.intel.com>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Kate Stewart <kstewart@linuxfoundation.org>
+Cc: Mamatha Inamdar <mamatha4@linux.vnet.ibm.com>
+Cc: Mukesh Ojha <mojha@codeaurora.org>
+Cc: Nageswara R Sastry <rnsastry@linux.vnet.ibm.com>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
+Cc: Shawn Landden <shawn@git.icu>
+Cc: Song Liu <songliubraving@fb.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Tzvetomir Stoyanov <tstoyanov@vmware.com>
+Link: https://lore.kernel.org/lkml/ZN4Q2rxxsL08A8rd@kernel.org
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/builtin-top.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
+index 1baa2acb3cedd..ea8c7eca5eeed 100644
+--- a/tools/perf/builtin-top.c
++++ b/tools/perf/builtin-top.c
+@@ -1805,6 +1805,7 @@ int cmd_top(int argc, const char **argv)
+       top.session = perf_session__new(NULL, NULL);
+       if (IS_ERR(top.session)) {
+               status = PTR_ERR(top.session);
++              top.session = NULL;
+               goto out_delete_evlist;
+       }
+-- 
+2.40.1
+
diff --git a/queue-6.5/perf-trace-really-free-the-evsel-priv-area.patch b/queue-6.5/perf-trace-really-free-the-evsel-priv-area.patch
new file mode 100644 (file)
index 0000000..342661b
--- /dev/null
@@ -0,0 +1,100 @@
+From 442a507b7c82f9eba3931a311da3a947e2cd91aa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jul 2023 15:37:14 -0300
+Subject: perf trace: Really free the evsel->priv area
+
+From: Arnaldo Carvalho de Melo <acme@redhat.com>
+
+[ Upstream commit 7962ef13651a9163f07b530607392ea123482e8a ]
+
+In 3cb4d5e00e037c70 ("perf trace: Free syscall tp fields in
+evsel->priv") it only was freeing if strcmp(evsel->tp_format->system,
+"syscalls") returned zero, while the corresponding initialization of
+evsel->priv was being performed if it was _not_ zero, i.e. if the tp
+system wasn't 'syscalls'.
+
+Just stop looking for that and free it if evsel->priv was set, which
+should be equivalent.
+
+Also use the pre-existing evsel_trace__delete() function.
+
+This resolves these leaks, detected with:
+
+  $ make EXTRA_CFLAGS="-fsanitize=address" BUILD_BPF_SKEL=1 CORESIGHT=1 O=/tmp/build/perf-tools-next -C tools/perf install-bin
+
+  =================================================================
+  ==481565==ERROR: LeakSanitizer: detected memory leaks
+
+  Direct leak of 40 byte(s) in 1 object(s) allocated from:
+      #0 0x7f7343cba097 in calloc (/lib64/libasan.so.8+0xba097)
+      #1 0x987966 in zalloc (/home/acme/bin/perf+0x987966)
+      #2 0x52f9b9 in evsel_trace__new /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:307
+      #3 0x52f9b9 in evsel__syscall_tp /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:333
+      #4 0x52f9b9 in evsel__init_raw_syscall_tp /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:458
+      #5 0x52f9b9 in perf_evsel__raw_syscall_newtp /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:480
+      #6 0x540e8b in trace__add_syscall_newtp /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:3212
+      #7 0x540e8b in trace__run /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:3891
+      #8 0x540e8b in cmd_trace /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:5156
+      #9 0x5ef262 in run_builtin /home/acme/git/perf-tools-next/tools/perf/perf.c:323
+      #10 0x4196da in handle_internal_command /home/acme/git/perf-tools-next/tools/perf/perf.c:377
+      #11 0x4196da in run_argv /home/acme/git/perf-tools-next/tools/perf/perf.c:421
+      #12 0x4196da in main /home/acme/git/perf-tools-next/tools/perf/perf.c:537
+      #13 0x7f7342c4a50f in __libc_start_call_main (/lib64/libc.so.6+0x2750f)
+
+  Direct leak of 40 byte(s) in 1 object(s) allocated from:
+      #0 0x7f7343cba097 in calloc (/lib64/libasan.so.8+0xba097)
+      #1 0x987966 in zalloc (/home/acme/bin/perf+0x987966)
+      #2 0x52f9b9 in evsel_trace__new /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:307
+      #3 0x52f9b9 in evsel__syscall_tp /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:333
+      #4 0x52f9b9 in evsel__init_raw_syscall_tp /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:458
+      #5 0x52f9b9 in perf_evsel__raw_syscall_newtp /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:480
+      #6 0x540dd1 in trace__add_syscall_newtp /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:3205
+      #7 0x540dd1 in trace__run /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:3891
+      #8 0x540dd1 in cmd_trace /home/acme/git/perf-tools-next/tools/perf/builtin-trace.c:5156
+      #9 0x5ef262 in run_builtin /home/acme/git/perf-tools-next/tools/perf/perf.c:323
+      #10 0x4196da in handle_internal_command /home/acme/git/perf-tools-next/tools/perf/perf.c:377
+      #11 0x4196da in run_argv /home/acme/git/perf-tools-next/tools/perf/perf.c:421
+      #12 0x4196da in main /home/acme/git/perf-tools-next/tools/perf/perf.c:537
+      #13 0x7f7342c4a50f in __libc_start_call_main (/lib64/libc.so.6+0x2750f)
+
+  SUMMARY: AddressSanitizer: 80 byte(s) leaked in 2 allocation(s).
+  [root@quaco ~]#
+
+With this we plug all leaks with "perf trace sleep 1".
+
+Fixes: 3cb4d5e00e037c70 ("perf trace: Free syscall tp fields in evsel->priv")
+Acked-by: Ian Rogers <irogers@google.com>
+Cc: Adrian Hunter <adrian.hunter@intel.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: Riccardo Mancini <rickyman7@gmail.com>
+Link: https://lore.kernel.org/lkml/20230719202951.534582-5-acme@kernel.org
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/builtin-trace.c | 9 ++-------
+ 1 file changed, 2 insertions(+), 7 deletions(-)
+
+diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
+index 6e73d0e957152..4aba576512a15 100644
+--- a/tools/perf/builtin-trace.c
++++ b/tools/perf/builtin-trace.c
+@@ -3136,13 +3136,8 @@ static void evlist__free_syscall_tp_fields(struct evlist *evlist)
+       struct evsel *evsel;
+       evlist__for_each_entry(evlist, evsel) {
+-              struct evsel_trace *et = evsel->priv;
+-
+-              if (!et || !evsel->tp_format || strcmp(evsel->tp_format->system, "syscalls"))
+-                      continue;
+-
+-              zfree(&et->fmt);
+-              free(et);
++              evsel_trace__delete(evsel->priv);
++              evsel->priv = NULL;
+       }
+ }
+-- 
+2.40.1
+
diff --git a/queue-6.5/perf-vendor-events-arm64-remove-l1d_cache_lmiss-from.patch b/queue-6.5/perf-vendor-events-arm64-remove-l1d_cache_lmiss-from.patch
new file mode 100644 (file)
index 0000000..55e2c07
--- /dev/null
@@ -0,0 +1,57 @@
+From c8a5781f6f6f222ed3f03fabf94114951e6d5513 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 3 Aug 2023 14:13:28 -0700
+Subject: perf vendor events arm64: Remove L1D_CACHE_LMISS from AmpereOne list
+
+From: Ilkka Koskinen <ilkka@os.amperecomputing.com>
+
+[ Upstream commit b8af10062df3c23fe002c3f187389bb263b3eb20 ]
+
+amperene/cache.json file tried to include L1D_CACHE_LMISS while it
+doesn't exist in common-and-microarch.json. While this bug doesn't seem to
+cause issue in newer kernels with jevents.py script, it prevents building
+older perf tools with the backported patch.
+
+Fixes: a9650b7f6fc09d16 ("perf vendor events arm64: Add AmpereOne core PMU events")
+Reported-by: Dave Kleikamp <dave.kleikamp@oracle.com>
+Reviewed-by: Ian Rogers <irogers@google.com>
+Reviewed-by: John Garry <john.g.garry@oracle.com>
+Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
+Cc: Adrian Hunter <adrian.hunter@intel.com>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Ilkka Koskinen <ilkka@os.amperecomputing.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: James Clark <james.clark@arm.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Leo Yan <leo.yan@linaro.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mike Leach <mike.leach@linaro.org>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will@kernel.org>
+Cc: linux-arm-kernel@lists.infradead.org
+Closes: https://lore.kernel.org/all/76bb2e47-ce44-76ae-838e-53279047084d@oracle.com/
+Link: https://lore.kernel.org/r/20230803211331.140553-2-ilkka@os.amperecomputing.com
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/pmu-events/arch/arm64/ampere/ampereone/cache.json | 3 ---
+ 1 file changed, 3 deletions(-)
+
+diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/cache.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/cache.json
+index fc06330542116..7a2b7b200f144 100644
+--- a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/cache.json
++++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/cache.json
+@@ -92,9 +92,6 @@
+     {
+         "ArchStdEvent": "L1D_CACHE_LMISS_RD"
+     },
+-    {
+-        "ArchStdEvent": "L1D_CACHE_LMISS"
+-    },
+     {
+         "ArchStdEvent": "L1I_CACHE_LMISS"
+     },
+-- 
+2.40.1
+
diff --git a/queue-6.5/perf-vendor-events-drop-some-of-the-json-events-for-.patch b/queue-6.5/perf-vendor-events-drop-some-of-the-json-events-for-.patch
new file mode 100644 (file)
index 0000000..3869799
--- /dev/null
@@ -0,0 +1,137 @@
+From 71f40157e1acdfc99d4993d64eff9612eabca517 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 14 Aug 2023 16:57:58 +0530
+Subject: perf vendor events: Drop some of the JSON/events for power10 platform
+
+From: Kajol Jain <kjain@linux.ibm.com>
+
+[ Upstream commit e104df97b8dcfbab2e42de634b99bf03f0805d85 ]
+
+Drop some of the JSON/events for power10 platform due to counter
+data mismatch.
+
+Fixes: 32daa5d7899e0343 ("perf vendor events: Initial JSON/events list for power10 platform")
+Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
+Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
+Cc: Disha Goel <disgoel@linux.ibm.com>
+Cc: Ian Rogers <irogers@google.com>
+Cc: Kajol Jain <kjain@linux.ibm.com>
+Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: linuxppc-dev@lists.ozlabs.org
+Link: https://lore.kernel.org/r/20230814112803.1508296-2-kjain@linux.ibm.com
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../arch/powerpc/power10/floating_point.json           |  7 -------
+ tools/perf/pmu-events/arch/powerpc/power10/marked.json | 10 ----------
+ tools/perf/pmu-events/arch/powerpc/power10/others.json |  5 -----
+ .../perf/pmu-events/arch/powerpc/power10/pipeline.json | 10 ----------
+ .../pmu-events/arch/powerpc/power10/translation.json   |  5 -----
+ 5 files changed, 37 deletions(-)
+ delete mode 100644 tools/perf/pmu-events/arch/powerpc/power10/floating_point.json
+
+diff --git a/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json b/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json
+deleted file mode 100644
+index 54acb55e2c8c6..0000000000000
+--- a/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json
++++ /dev/null
+@@ -1,7 +0,0 @@
+-[
+-  {
+-    "EventCode": "0x4016E",
+-    "EventName": "PM_THRESH_NOT_MET",
+-    "BriefDescription": "Threshold counter did not meet threshold."
+-  }
+-]
+diff --git a/tools/perf/pmu-events/arch/powerpc/power10/marked.json b/tools/perf/pmu-events/arch/powerpc/power10/marked.json
+index 131f8d0e88317..f2436fc5537ce 100644
+--- a/tools/perf/pmu-events/arch/powerpc/power10/marked.json
++++ b/tools/perf/pmu-events/arch/powerpc/power10/marked.json
+@@ -19,11 +19,6 @@
+     "EventName": "PM_MRK_BR_TAKEN_CMPL",
+     "BriefDescription": "Marked Branch Taken instruction completed."
+   },
+-  {
+-    "EventCode": "0x20112",
+-    "EventName": "PM_MRK_NTF_FIN",
+-    "BriefDescription": "The marked instruction became the oldest in the pipeline before it finished. It excludes instructions that finish at dispatch."
+-  },
+   {
+     "EventCode": "0x2C01C",
+     "EventName": "PM_EXEC_STALL_DMISS_OFF_CHIP",
+@@ -64,11 +59,6 @@
+     "EventName": "PM_L1_ICACHE_MISS",
+     "BriefDescription": "Demand instruction cache miss."
+   },
+-  {
+-    "EventCode": "0x30130",
+-    "EventName": "PM_MRK_INST_FIN",
+-    "BriefDescription": "marked instruction finished. Excludes instructions that finish at dispatch. Note that stores always finish twice since the address gets issued to the LSU and the data gets issued to the VSU."
+-  },
+   {
+     "EventCode": "0x34146",
+     "EventName": "PM_MRK_LD_CMPL",
+diff --git a/tools/perf/pmu-events/arch/powerpc/power10/others.json b/tools/perf/pmu-events/arch/powerpc/power10/others.json
+index a5319cdba89b3..17c5424ef1ac1 100644
+--- a/tools/perf/pmu-events/arch/powerpc/power10/others.json
++++ b/tools/perf/pmu-events/arch/powerpc/power10/others.json
+@@ -29,11 +29,6 @@
+     "EventName": "PM_DISP_SS0_2_INSTR_CYC",
+     "BriefDescription": "Cycles in which Superslice 0 dispatches either 1 or 2 instructions."
+   },
+-  {
+-    "EventCode": "0x1F15C",
+-    "EventName": "PM_MRK_STCX_L2_CYC",
+-    "BriefDescription": "Cycles spent in the nest portion of a marked Stcx instruction. It starts counting when the operation starts to drain to the L2 and it stops counting when the instruction retires from the Instruction Completion Table (ICT) in the Instruction Sequencing Unit (ISU)."
+-  },
+   {
+     "EventCode": "0x10066",
+     "EventName": "PM_ADJUNCT_CYC",
+diff --git a/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json b/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json
+index 449f57e8ba6af..799893c56f32b 100644
+--- a/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json
++++ b/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json
+@@ -194,11 +194,6 @@
+     "EventName": "PM_TLBIE_FIN",
+     "BriefDescription": "TLBIE instruction finished in the LSU. Two TLBIEs can finish each cycle. All will be counted."
+   },
+-  {
+-    "EventCode": "0x3D058",
+-    "EventName": "PM_SCALAR_FSQRT_FDIV_ISSUE",
+-    "BriefDescription": "Scalar versions of four floating point operations: fdiv,fsqrt (xvdivdp, xvdivsp, xvsqrtdp, xvsqrtsp)."
+-  },
+   {
+     "EventCode": "0x30066",
+     "EventName": "PM_LSU_FIN",
+@@ -269,11 +264,6 @@
+     "EventName": "PM_IC_MISS_CMPL",
+     "BriefDescription": "Non-speculative instruction cache miss, counted at completion."
+   },
+-  {
+-    "EventCode": "0x4D050",
+-    "EventName": "PM_VSU_NON_FLOP_CMPL",
+-    "BriefDescription": "Non-floating point VSU instructions completed."
+-  },
+   {
+     "EventCode": "0x4D052",
+     "EventName": "PM_2FLOP_CMPL",
+diff --git a/tools/perf/pmu-events/arch/powerpc/power10/translation.json b/tools/perf/pmu-events/arch/powerpc/power10/translation.json
+index 3e47b804a0a8f..961e2491e73f6 100644
+--- a/tools/perf/pmu-events/arch/powerpc/power10/translation.json
++++ b/tools/perf/pmu-events/arch/powerpc/power10/translation.json
+@@ -4,11 +4,6 @@
+     "EventName": "PM_MRK_START_PROBE_NOP_CMPL",
+     "BriefDescription": "Marked Start probe nop (AND R0,R0,R0) completed."
+   },
+-  {
+-    "EventCode": "0x20016",
+-    "EventName": "PM_ST_FIN",
+-    "BriefDescription": "Store finish count. Includes speculative activity."
+-  },
+   {
+     "EventCode": "0x20018",
+     "EventName": "PM_ST_FWD",
+-- 
+2.40.1
+
diff --git a/queue-6.5/perf-vendor-events-drop-stores_per_inst-metric-event.patch b/queue-6.5/perf-vendor-events-drop-stores_per_inst-metric-event.patch
new file mode 100644 (file)
index 0000000..bdef11c
--- /dev/null
@@ -0,0 +1,50 @@
+From c54a1b1ea28b6ec07ce4ec67325193f111ef343a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 14 Aug 2023 16:57:59 +0530
+Subject: perf vendor events: Drop STORES_PER_INST metric event for power10
+ platform
+
+From: Kajol Jain <kjain@linux.ibm.com>
+
+[ Upstream commit 4836b9a85ef148c7c9779b66fab3f7279e488d90 ]
+
+Drop STORES_PER_INST metric event for the power10 platform, as the
+metric expression of STORES_PER_INST metric event using dropped event
+PM_ST_FIN.
+
+Fixes: 3ca3af7d1f230d1f ("perf vendor events power10: Add metric events JSON file for power10 platform")
+Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
+Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
+Cc: Disha Goel <disgoel@linux.ibm.com>
+Cc: Ian Rogers <irogers@google.com>
+Cc: Kajol Jain <kjain@linux.ibm.com>
+Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: linuxppc-dev@lists.ozlabs.org
+Link: https://lore.kernel.org/r/20230814112803.1508296-3-kjain@linux.ibm.com
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/pmu-events/arch/powerpc/power10/metrics.json | 6 ------
+ 1 file changed, 6 deletions(-)
+
+diff --git a/tools/perf/pmu-events/arch/powerpc/power10/metrics.json b/tools/perf/pmu-events/arch/powerpc/power10/metrics.json
+index 6f53583a0c62c..e3087eb1ccff8 100644
+--- a/tools/perf/pmu-events/arch/powerpc/power10/metrics.json
++++ b/tools/perf/pmu-events/arch/powerpc/power10/metrics.json
+@@ -453,12 +453,6 @@
+         "MetricGroup": "General",
+         "MetricName": "LOADS_PER_INST"
+     },
+-    {
+-        "BriefDescription": "Average number of finished stores per completed instruction",
+-        "MetricExpr": "PM_ST_FIN / PM_RUN_INST_CMPL",
+-        "MetricGroup": "General",
+-        "MetricName": "STORES_PER_INST"
+-    },
+     {
+         "BriefDescription": "Percentage of demand loads that reloaded from beyond the L2 per completed instruction",
+         "MetricExpr": "PM_DATA_FROM_L2MISS / PM_RUN_INST_CMPL * 100",
+-- 
+2.40.1
+
diff --git a/queue-6.5/perf-vendor-events-move-json-events-to-appropriate-f.patch b/queue-6.5/perf-vendor-events-move-json-events-to-appropriate-f.patch
new file mode 100644 (file)
index 0000000..c7d6646
--- /dev/null
@@ -0,0 +1,1765 @@
+From 1ae45b6405f795e3da0039894bb7179c3559439c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 14 Aug 2023 16:58:00 +0530
+Subject: perf vendor events: Move JSON/events to appropriate files for power10
+ platform
+
+From: Kajol Jain <kjain@linux.ibm.com>
+
+[ Upstream commit 7d473f475b2aff7e7c5d63b6f701c54590f84781 ]
+
+Move some of the power10 JSON/events to appropriate files.
+
+Fixes: 32daa5d7899e0343 ("perf vendor events: Initial JSON/events list for power10 platform")
+Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
+Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
+Cc: Disha Goel <disgoel@linux.ibm.com>
+Cc: Ian Rogers <irogers@google.com>
+Cc: Kajol Jain <kjain@linux.ibm.com>
+Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: linuxppc-dev@lists.ozlabs.org
+Link: https://lore.kernel.org/r/20230814112803.1508296-4-kjain@linux.ibm.com
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../arch/powerpc/power10/cache.json           |  45 ----
+ .../arch/powerpc/power10/floating_point.json  |  67 +++++
+ .../arch/powerpc/power10/frontend.json        | 180 -------------
+ .../arch/powerpc/power10/marked.json          | 186 ++++++++++---
+ .../arch/powerpc/power10/memory.json          |  85 ------
+ .../arch/powerpc/power10/others.json          | 192 ++------------
+ .../arch/powerpc/power10/pipeline.json        | 247 ++++++++++++++----
+ .../pmu-events/arch/powerpc/power10/pmc.json  | 193 +++++++++++++-
+ .../arch/powerpc/power10/translation.json     |  35 ---
+ 9 files changed, 616 insertions(+), 614 deletions(-)
+ create mode 100644 tools/perf/pmu-events/arch/powerpc/power10/floating_point.json
+
+diff --git a/tools/perf/pmu-events/arch/powerpc/power10/cache.json b/tools/perf/pmu-events/arch/powerpc/power10/cache.json
+index 9cb929bb64afd..839ae26945fb2 100644
+--- a/tools/perf/pmu-events/arch/powerpc/power10/cache.json
++++ b/tools/perf/pmu-events/arch/powerpc/power10/cache.json
+@@ -1,54 +1,9 @@
+ [
+-  {
+-    "EventCode": "0x1003C",
+-    "EventName": "PM_EXEC_STALL_DMISS_L2L3",
+-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from either the local L2 or local L3."
+-  },
+-  {
+-    "EventCode": "0x1E054",
+-    "EventName": "PM_EXEC_STALL_DMISS_L21_L31",
+-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from another core's L2 or L3 on the same chip."
+-  },
+-  {
+-    "EventCode": "0x34054",
+-    "EventName": "PM_EXEC_STALL_DMISS_L2L3_NOCONFLICT",
+-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local L2 or local L3, without a dispatch conflict."
+-  },
+-  {
+-    "EventCode": "0x34056",
+-    "EventName": "PM_EXEC_STALL_LOAD_FINISH",
+-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was finishing a load after its data was reloaded from a data source beyond the local L1; cycles in which the LSU was processing an L1-hit; cycles in which the next-to-finish (NTF) instruction merged with another load in the LMQ; cycles in which the NTF instruction is waiting for a data reload for a load miss, but the data comes back with a non-NTF instruction."
+-  },
+-  {
+-    "EventCode": "0x3006C",
+-    "EventName": "PM_RUN_CYC_SMT2_MODE",
+-    "BriefDescription": "Cycles when this thread's run latch is set and the core is in SMT2 mode."
+-  },
+   {
+     "EventCode": "0x300F4",
+     "EventName": "PM_RUN_INST_CMPL_CONC",
+     "BriefDescription": "PowerPC instruction completed by this thread when all threads in the core had the run-latch set."
+   },
+-  {
+-    "EventCode": "0x4C016",
+-    "EventName": "PM_EXEC_STALL_DMISS_L2L3_CONFLICT",
+-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local L2 or local L3, with a dispatch conflict."
+-  },
+-  {
+-    "EventCode": "0x4D014",
+-    "EventName": "PM_EXEC_STALL_LOAD",
+-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a load instruction executing in the Load Store Unit."
+-  },
+-  {
+-    "EventCode": "0x4D016",
+-    "EventName": "PM_EXEC_STALL_PTESYNC",
+-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a PTESYNC instruction executing in the Load Store Unit."
+-  },
+-  {
+-    "EventCode": "0x401EA",
+-    "EventName": "PM_THRESH_EXC_128",
+-    "BriefDescription": "Threshold counter exceeded a value of 128."
+-  },
+   {
+     "EventCode": "0x400F6",
+     "EventName": "PM_BR_MPRED_CMPL",
+diff --git a/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json b/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json
+new file mode 100644
+index 0000000000000..e816cd10c1293
+--- /dev/null
++++ b/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json
+@@ -0,0 +1,67 @@
++[
++  {
++    "EventCode": "0x100F4",
++    "EventName": "PM_FLOP_CMPL",
++    "BriefDescription": "Floating Point Operations Completed. Includes any type. It counts once for each 1, 2, 4 or 8 flop instruction. Use PM_1|2|4|8_FLOP_CMPL events to count flops."
++  },
++  {
++    "EventCode": "0x45050",
++    "EventName": "PM_1FLOP_CMPL",
++    "BriefDescription": "One floating point instruction completed (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg)."
++  },
++  {
++    "EventCode": "0x45052",
++    "EventName": "PM_4FLOP_CMPL",
++    "BriefDescription": "Four floating point instruction completed (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg)."
++  },
++  {
++    "EventCode": "0x45054",
++    "EventName": "PM_FMA_CMPL",
++    "BriefDescription": "Two floating point instruction completed (FMA class of instructions: fmadd, fnmadd, fmsub, fnmsub). Scalar instructions only."
++  },
++  {
++    "EventCode": "0x45056",
++    "EventName": "PM_SCALAR_FLOP_CMPL",
++    "BriefDescription": "Scalar floating point instruction completed."
++  },
++  {
++    "EventCode": "0x4505A",
++    "EventName": "PM_SP_FLOP_CMPL",
++    "BriefDescription": "Single Precision floating point instruction completed."
++  },
++  {
++    "EventCode": "0x4505C",
++    "EventName": "PM_MATH_FLOP_CMPL",
++    "BriefDescription": "Math floating point instruction completed."
++  },
++  {
++    "EventCode": "0x4D052",
++    "EventName": "PM_2FLOP_CMPL",
++    "BriefDescription": "Double Precision vector version of fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg completed."
++  },
++  {
++    "EventCode": "0x4D054",
++    "EventName": "PM_8FLOP_CMPL",
++    "BriefDescription": "Four Double Precision vector instruction completed."
++  },
++  {
++    "EventCode": "0x4D056",
++    "EventName": "PM_NON_FMA_FLOP_CMPL",
++    "BriefDescription": "Non FMA instruction completed."
++  },
++  {
++    "EventCode": "0x4D058",
++    "EventName": "PM_VECTOR_FLOP_CMPL",
++    "BriefDescription": "Vector floating point instruction completed."
++  },
++  {
++    "EventCode": "0x4D05A",
++    "EventName": "PM_NON_MATH_FLOP_CMPL",
++    "BriefDescription": "Non Math instruction completed."
++  },
++  {
++    "EventCode": "0x4D05C",
++    "EventName": "PM_DPP_FLOP_CMPL",
++    "BriefDescription": "Double-Precision or Quad-Precision instruction completed."
++  }
++]
+diff --git a/tools/perf/pmu-events/arch/powerpc/power10/frontend.json b/tools/perf/pmu-events/arch/powerpc/power10/frontend.json
+index 61e9e0222c873..dc0bb6c6338bf 100644
+--- a/tools/perf/pmu-events/arch/powerpc/power10/frontend.json
++++ b/tools/perf/pmu-events/arch/powerpc/power10/frontend.json
+@@ -1,64 +1,9 @@
+ [
+-  {
+-    "EventCode": "0x10004",
+-    "EventName": "PM_EXEC_STALL_TRANSLATION",
+-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered a TLB miss or ERAT miss and waited for it to resolve."
+-  },
+-  {
+-    "EventCode": "0x10006",
+-    "EventName": "PM_DISP_STALL_HELD_OTHER_CYC",
+-    "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch for any other reason."
+-  },
+-  {
+-    "EventCode": "0x10010",
+-    "EventName": "PM_PMC4_OVERFLOW",
+-    "BriefDescription": "The event selected for PMC4 caused the event counter to overflow."
+-  },
+-  {
+-    "EventCode": "0x10020",
+-    "EventName": "PM_PMC4_REWIND",
+-    "BriefDescription": "The speculative event selected for PMC4 rewinds and the counter for PMC4 is not charged."
+-  },
+-  {
+-    "EventCode": "0x10038",
+-    "EventName": "PM_DISP_STALL_TRANSLATION",
+-    "BriefDescription": "Cycles when dispatch was stalled for this thread because the MMU was handling a translation miss."
+-  },
+-  {
+-    "EventCode": "0x1003A",
+-    "EventName": "PM_DISP_STALL_BR_MPRED_IC_L2",
+-    "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L2 after suffering a branch mispredict."
+-  },
+-  {
+-    "EventCode": "0x1D05E",
+-    "EventName": "PM_DISP_STALL_HELD_HALT_CYC",
+-    "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch because of power management."
+-  },
+-  {
+-    "EventCode": "0x1E050",
+-    "EventName": "PM_DISP_STALL_HELD_STF_MAPPER_CYC",
+-    "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch because the STF mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR."
+-  },
+   {
+     "EventCode": "0x1F054",
+     "EventName": "PM_DTLB_HIT",
+     "BriefDescription": "The PTE required by the instruction was resident in the TLB (data TLB access). When MMCR1[16]=0 this event counts only demand hits. When MMCR1[16]=1 this event includes demand and prefetch. Applies to both HPT and RPT."
+   },
+-  {
+-    "EventCode": "0x10064",
+-    "EventName": "PM_DISP_STALL_IC_L2",
+-    "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L2."
+-  },
+-  {
+-    "EventCode": "0x101E8",
+-    "EventName": "PM_THRESH_EXC_256",
+-    "BriefDescription": "Threshold counter exceeded a count of 256."
+-  },
+-  {
+-    "EventCode": "0x101EC",
+-    "EventName": "PM_THRESH_MET",
+-    "BriefDescription": "Threshold exceeded."
+-  },
+   {
+     "EventCode": "0x100F2",
+     "EventName": "PM_1PLUS_PPC_CMPL",
+@@ -69,56 +14,6 @@
+     "EventName": "PM_IERAT_MISS",
+     "BriefDescription": "IERAT Reloaded to satisfy an IERAT miss. All page sizes are counted by this event. This event only counts instruction demand access."
+   },
+-  {
+-    "EventCode": "0x100F8",
+-    "EventName": "PM_DISP_STALL_CYC",
+-    "BriefDescription": "Cycles the ICT has no itags assigned to this thread (no instructions were dispatched during these cycles)."
+-  },
+-  {
+-    "EventCode": "0x20006",
+-    "EventName": "PM_DISP_STALL_HELD_ISSQ_FULL_CYC",
+-    "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch due to Issue queue full. Includes issue queue and branch queue."
+-  },
+-  {
+-    "EventCode": "0x20114",
+-    "EventName": "PM_MRK_L2_RC_DISP",
+-    "BriefDescription": "Marked instruction RC dispatched in L2."
+-  },
+-  {
+-    "EventCode": "0x2C010",
+-    "EventName": "PM_EXEC_STALL_LSU",
+-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the Load Store Unit. This does not include simple fixed point instructions."
+-  },
+-  {
+-    "EventCode": "0x2C016",
+-    "EventName": "PM_DISP_STALL_IERAT_ONLY_MISS",
+-    "BriefDescription": "Cycles when dispatch was stalled while waiting to resolve an instruction ERAT miss."
+-  },
+-  {
+-    "EventCode": "0x2C01E",
+-    "EventName": "PM_DISP_STALL_BR_MPRED_IC_L3",
+-    "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L3 after suffering a branch mispredict."
+-  },
+-  {
+-    "EventCode": "0x2D01A",
+-    "EventName": "PM_DISP_STALL_IC_MISS",
+-    "BriefDescription": "Cycles when dispatch was stalled for this thread due to an instruction cache miss."
+-  },
+-  {
+-    "EventCode": "0x2E018",
+-    "EventName": "PM_DISP_STALL_FETCH",
+-    "BriefDescription": "Cycles when dispatch was stalled for this thread because Fetch was being held."
+-  },
+-  {
+-    "EventCode": "0x2E01A",
+-    "EventName": "PM_DISP_STALL_HELD_XVFC_MAPPER_CYC",
+-    "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch because the XVFC mapper/SRB was full."
+-  },
+-  {
+-    "EventCode": "0x2C142",
+-    "EventName": "PM_MRK_XFER_FROM_SRC_PMC2",
+-    "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[15:27]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
+-  },
+   {
+     "EventCode": "0x24050",
+     "EventName": "PM_IOPS_DISP",
+@@ -134,11 +29,6 @@
+     "EventName": "PM_BR_TAKEN_CMPL",
+     "BriefDescription": "Branch Taken instruction completed."
+   },
+-  {
+-    "EventCode": "0x30004",
+-    "EventName": "PM_DISP_STALL_FLUSH",
+-    "BriefDescription": "Cycles when dispatch was stalled because of a flush that happened to an instruction(s) that was not yet next-to-complete (NTC). PM_EXEC_STALL_NTC_FLUSH only includes instructions that were flushed after becoming NTC."
+-  },
+   {
+     "EventCode": "0x3000A",
+     "EventName": "PM_DISP_STALL_ITLB_MISS",
+@@ -149,56 +39,16 @@
+     "EventName": "PM_FLUSH_COMPLETION",
+     "BriefDescription": "The instruction that was next to complete (oldest in the pipeline) did not complete because it suffered a flush."
+   },
+-  {
+-    "EventCode": "0x30014",
+-    "EventName": "PM_EXEC_STALL_STORE",
+-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a store instruction executing in the Load Store Unit."
+-  },
+-  {
+-    "EventCode": "0x30018",
+-    "EventName": "PM_DISP_STALL_HELD_SCOREBOARD_CYC",
+-    "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch while waiting on the Scoreboard. This event combines VSCR and FPSCR together."
+-  },
+-  {
+-    "EventCode": "0x30026",
+-    "EventName": "PM_EXEC_STALL_STORE_MISS",
+-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a store whose cache line was not resident in the L1 and was waiting for allocation of the missing line into the L1."
+-  },
+-  {
+-    "EventCode": "0x3012A",
+-    "EventName": "PM_MRK_L2_RC_DONE",
+-    "BriefDescription": "L2 RC machine completed the transaction for the marked instruction."
+-  },
+   {
+     "EventCode": "0x3F046",
+     "EventName": "PM_ITLB_HIT_1G",
+     "BriefDescription": "Instruction TLB hit (IERAT reload) page size 1G, which implies Radix Page Table translation is in use. When MMCR1[17]=0 this event counts only for demand misses. When MMCR1[17]=1 this event includes demand misses and prefetches."
+   },
+-  {
+-    "EventCode": "0x34058",
+-    "EventName": "PM_DISP_STALL_BR_MPRED_ICMISS",
+-    "BriefDescription": "Cycles when dispatch was stalled after a mispredicted branch resulted in an instruction cache miss."
+-  },
+-  {
+-    "EventCode": "0x3D05C",
+-    "EventName": "PM_DISP_STALL_HELD_RENAME_CYC",
+-    "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch because the mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR and XVFC."
+-  },
+-  {
+-    "EventCode": "0x3E052",
+-    "EventName": "PM_DISP_STALL_IC_L3",
+-    "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L3."
+-  },
+   {
+     "EventCode": "0x3E054",
+     "EventName": "PM_LD_MISS_L1",
+     "BriefDescription": "Load missed L1, counted at finish time. LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load."
+   },
+-  {
+-    "EventCode": "0x301EA",
+-    "EventName": "PM_THRESH_EXC_1024",
+-    "BriefDescription": "Threshold counter exceeded a value of 1024."
+-  },
+   {
+     "EventCode": "0x300FA",
+     "EventName": "PM_INST_FROM_L3MISS",
+@@ -209,36 +59,6 @@
+     "EventName": "PM_ISSUE_KILL",
+     "BriefDescription": "Cycles in which an instruction or group of instructions were cancelled after being issued. This event increments once per occurrence, regardless of how many instructions are included in the issue group."
+   },
+-  {
+-    "EventCode": "0x40116",
+-    "EventName": "PM_MRK_LARX_FIN",
+-    "BriefDescription": "Marked load and reserve instruction (LARX) finished. LARX and STCX are instructions used to acquire a lock."
+-  },
+-  {
+-    "EventCode": "0x4C010",
+-    "EventName": "PM_DISP_STALL_BR_MPRED_IC_L3MISS",
+-    "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from sources beyond the local L3 after suffering a mispredicted branch."
+-  },
+-  {
+-    "EventCode": "0x4D01E",
+-    "EventName": "PM_DISP_STALL_BR_MPRED",
+-    "BriefDescription": "Cycles when dispatch was stalled for this thread due to a mispredicted branch."
+-  },
+-  {
+-    "EventCode": "0x4E010",
+-    "EventName": "PM_DISP_STALL_IC_L3MISS",
+-    "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from any source beyond the local L3."
+-  },
+-  {
+-    "EventCode": "0x4E01A",
+-    "EventName": "PM_DISP_STALL_HELD_CYC",
+-    "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch for any reason."
+-  },
+-  {
+-    "EventCode": "0x4003C",
+-    "EventName": "PM_DISP_STALL_HELD_SYNC_CYC",
+-    "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch because of a synchronizing instruction that requires the ICT to be empty before dispatch."
+-  },
+   {
+     "EventCode": "0x44056",
+     "EventName": "PM_VECTOR_ST_CMPL",
+diff --git a/tools/perf/pmu-events/arch/powerpc/power10/marked.json b/tools/perf/pmu-events/arch/powerpc/power10/marked.json
+index f2436fc5537ce..913b6515b8701 100644
+--- a/tools/perf/pmu-events/arch/powerpc/power10/marked.json
++++ b/tools/perf/pmu-events/arch/powerpc/power10/marked.json
+@@ -1,14 +1,29 @@
+ [
+-  {
+-    "EventCode": "0x1002C",
+-    "EventName": "PM_LD_PREFETCH_CACHE_LINE_MISS",
+-    "BriefDescription": "The L1 cache was reloaded with a line that fulfills a prefetch request."
+-  },
+   {
+     "EventCode": "0x10132",
+     "EventName": "PM_MRK_INST_ISSUED",
+     "BriefDescription": "Marked instruction issued. Note that stores always get issued twice, the address gets issued to the LSU and the data gets issued to the VSU. Also, issues can sometimes get killed/cancelled and cause multiple sequential issues for the same instruction."
+   },
++  {
++    "EventCode": "0x10134",
++    "EventName": "PM_MRK_ST_DONE_L2",
++    "BriefDescription": "Marked store completed in L2."
++  },
++  {
++    "EventCode": "0x1C142",
++    "EventName": "PM_MRK_XFER_FROM_SRC_PMC1",
++    "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[0:12]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
++  },
++  {
++    "EventCode": "0x1C144",
++    "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC1",
++    "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[0:12]."
++  },
++  {
++    "EventCode": "0x1F150",
++    "EventName": "PM_MRK_ST_L2_CYC",
++    "BriefDescription": "Cycles from L2 RC dispatch to L2 RC completion."
++  },
+   {
+     "EventCode": "0x101E0",
+     "EventName": "PM_MRK_INST_DISP",
+@@ -20,9 +35,39 @@
+     "BriefDescription": "Marked Branch Taken instruction completed."
+   },
+   {
+-    "EventCode": "0x2C01C",
+-    "EventName": "PM_EXEC_STALL_DMISS_OFF_CHIP",
+-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a remote chip."
++    "EventCode": "0x101E4",
++    "EventName": "PM_MRK_L1_ICACHE_MISS",
++    "BriefDescription": "Marked instruction suffered an instruction cache miss."
++  },
++  {
++    "EventCode": "0x101EA",
++    "EventName": "PM_MRK_L1_RELOAD_VALID",
++    "BriefDescription": "Marked demand reload."
++  },
++  {
++    "EventCode": "0x20114",
++    "EventName": "PM_MRK_L2_RC_DISP",
++    "BriefDescription": "Marked instruction RC dispatched in L2."
++  },
++  {
++    "EventCode": "0x2011C",
++    "EventName": "PM_MRK_NTF_CYC",
++    "BriefDescription": "Cycles in which the marked instruction is the oldest in the pipeline (next-to-finish or next-to-complete)."
++  },
++  {
++    "EventCode": "0x20130",
++    "EventName": "PM_MRK_INST_DECODED",
++    "BriefDescription": "An instruction was marked at decode time. Random Instruction Sampling (RIS) only."
++  },
++  {
++    "EventCode": "0x20132",
++    "EventName": "PM_MRK_DFU_ISSUE",
++    "BriefDescription": "The marked instruction was a decimal floating point operation issued to the VSU. Measured at issue time."
++  },
++  {
++    "EventCode": "0x20134",
++    "EventName": "PM_MRK_FXU_ISSUE",
++    "BriefDescription": "The marked instruction was a fixed point operation issued to the VSU. Measured at issue time."
+   },
+   {
+     "EventCode": "0x20138",
+@@ -34,6 +79,16 @@
+     "EventName": "PM_MRK_BRU_FIN",
+     "BriefDescription": "Marked Branch instruction finished."
+   },
++  {
++    "EventCode": "0x2013C",
++    "EventName": "PM_MRK_FX_LSU_FIN",
++    "BriefDescription": "The marked instruction was simple fixed point that was issued to the store unit. Measured at finish time."
++  },
++  {
++    "EventCode": "0x2C142",
++    "EventName": "PM_MRK_XFER_FROM_SRC_PMC2",
++    "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[15:27]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
++  },
+   {
+     "EventCode": "0x2C144",
+     "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC2",
+@@ -55,15 +110,50 @@
+     "BriefDescription": "A marked branch completed. All branches are included."
+   },
+   {
+-    "EventCode": "0x200FD",
+-    "EventName": "PM_L1_ICACHE_MISS",
+-    "BriefDescription": "Demand instruction cache miss."
++    "EventCode": "0x2D154",
++    "EventName": "PM_MRK_DERAT_MISS_64K",
++    "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 64K for a marked instruction. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
++  },
++  {
++    "EventCode": "0x201E0",
++    "EventName": "PM_MRK_DATA_FROM_MEMORY",
++    "BriefDescription": "The processor's data cache was reloaded from local, remote, or distant memory due to a demand miss for a marked load."
++  },
++  {
++    "EventCode": "0x201E2",
++    "EventName": "PM_MRK_LD_MISS_L1",
++    "BriefDescription": "Marked demand data load miss counted at finish time."
++  },
++  {
++    "EventCode": "0x201E4",
++    "EventName": "PM_MRK_DATA_FROM_L3MISS",
++    "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss for a marked load."
++  },
++  {
++    "EventCode": "0x3012A",
++    "EventName": "PM_MRK_L2_RC_DONE",
++    "BriefDescription": "L2 RC machine completed the transaction for the marked instruction."
++  },
++  {
++    "EventCode": "0x30132",
++    "EventName": "PM_MRK_VSU_FIN",
++    "BriefDescription": "VSU marked instruction finished. Excludes simple FX instructions issued to the Store Unit."
+   },
+   {
+     "EventCode": "0x34146",
+     "EventName": "PM_MRK_LD_CMPL",
+     "BriefDescription": "Marked load instruction completed."
+   },
++  {
++    "EventCode": "0x3C142",
++    "EventName": "PM_MRK_XFER_FROM_SRC_PMC3",
++    "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[30:42]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
++  },
++  {
++    "EventCode": "0x3C144",
++    "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC3",
++    "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[30:42]."
++  },
+   {
+     "EventCode": "0x3E158",
+     "EventName": "PM_MRK_STCX_FAIL",
+@@ -75,9 +165,19 @@
+     "BriefDescription": "Marked store instruction finished."
+   },
+   {
+-    "EventCode": "0x30068",
+-    "EventName": "PM_L1_ICACHE_RELOADED_PREF",
+-    "BriefDescription": "Counts all instruction cache prefetch reloads (includes demand turned into prefetch)."
++    "EventCode": "0x3F150",
++    "EventName": "PM_MRK_ST_DRAIN_CYC",
++    "BriefDescription": "Cycles in which the marked store drained from the core to the L2."
++  },
++  {
++    "EventCode": "0x30162",
++    "EventName": "PM_MRK_ISSUE_DEPENDENT_LOAD",
++    "BriefDescription": "The marked instruction was dependent on a load. It is eligible for issue kill."
++  },
++  {
++    "EventCode": "0x301E2",
++    "EventName": "PM_MRK_ST_CMPL",
++    "BriefDescription": "Marked store completed and sent to nest. Note that this count excludes cache-inhibited stores."
+   },
+   {
+     "EventCode": "0x301E4",
+@@ -85,39 +185,44 @@
+     "BriefDescription": "Marked Branch Mispredicted. Includes direction and target."
+   },
+   {
+-    "EventCode": "0x300F6",
+-    "EventName": "PM_LD_DEMAND_MISS_L1",
+-    "BriefDescription": "The L1 cache was reloaded with a line that fulfills a demand miss request. Counted at reload time, before finish."
++    "EventCode": "0x40116",
++    "EventName": "PM_MRK_LARX_FIN",
++    "BriefDescription": "Marked load and reserve instruction (LARX) finished. LARX and STCX are instructions used to acquire a lock."
++  },
++  {
++    "EventCode": "0x40132",
++    "EventName": "PM_MRK_LSU_FIN",
++    "BriefDescription": "LSU marked instruction finish."
+   },
+   {
+-    "EventCode": "0x300FE",
+-    "EventName": "PM_DATA_FROM_L3MISS",
+-    "BriefDescription": "The processor's L1 data cache was reloaded from beyond the local core's L3 due to a demand miss."
++    "EventCode": "0x44146",
++    "EventName": "PM_MRK_STCX_CORE_CYC",
++    "BriefDescription": "Cycles spent in the core portion of a marked STCX instruction. It starts counting when the instruction is decoded and stops counting when it drains into the L2."
+   },
+   {
+-    "EventCode": "0x40012",
+-    "EventName": "PM_L1_ICACHE_RELOADED_ALL",
+-    "BriefDescription": "Counts all instruction cache reloads includes demand, prefetch, prefetch turned into demand and demand turned into prefetch."
++    "EventCode": "0x4C142",
++    "EventName": "PM_MRK_XFER_FROM_SRC_PMC4",
++    "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[45:57]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
+   },
+   {
+-    "EventCode": "0x40134",
+-    "EventName": "PM_MRK_INST_TIMEO",
+-    "BriefDescription": "Marked instruction finish timeout (instruction was lost)."
++    "EventCode": "0x4C144",
++    "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC4",
++    "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[45:57]."
+   },
+   {
+-    "EventCode": "0x4505A",
+-    "EventName": "PM_SP_FLOP_CMPL",
+-    "BriefDescription": "Single Precision floating point instruction completed."
++    "EventCode": "0x4C15E",
++    "EventName": "PM_MRK_DTLB_MISS_64K",
++    "BriefDescription": "Marked Data TLB reload (after a miss) page size 64K. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
+   },
+   {
+-    "EventCode": "0x4D058",
+-    "EventName": "PM_VECTOR_FLOP_CMPL",
+-    "BriefDescription": "Vector floating point instruction completed."
++    "EventCode": "0x4E15E",
++    "EventName": "PM_MRK_INST_FLUSHED",
++    "BriefDescription": "The marked instruction was flushed."
+   },
+   {
+-    "EventCode": "0x4D05A",
+-    "EventName": "PM_NON_MATH_FLOP_CMPL",
+-    "BriefDescription": "Non Math instruction completed."
++    "EventCode": "0x40164",
++    "EventName": "PM_MRK_DERAT_MISS_2M",
++    "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 2M for a marked instruction. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
+   },
+   {
+     "EventCode": "0x401E0",
+@@ -125,8 +230,13 @@
+     "BriefDescription": "Marked instruction completed."
+   },
+   {
+-    "EventCode": "0x400FE",
+-    "EventName": "PM_DATA_FROM_MEMORY",
+-    "BriefDescription": "The processor's data cache was reloaded from local, remote, or distant memory due to a demand miss."
++    "EventCode": "0x401E6",
++    "EventName": "PM_MRK_INST_FROM_L3MISS",
++    "BriefDescription": "The processor's instruction cache was reloaded from beyond the local core's L3 due to a demand miss for a marked instruction."
++  },
++  {
++    "EventCode": "0x401E8",
++    "EventName": "PM_MRK_DATA_FROM_L2MISS",
++    "BriefDescription": "The processor's L1 data cache was reloaded from a source beyond the local core's L2 due to a demand miss for a marked instruction."
+   }
+ ]
+diff --git a/tools/perf/pmu-events/arch/powerpc/power10/memory.json b/tools/perf/pmu-events/arch/powerpc/power10/memory.json
+index c4c10ca98cad7..b95a547a704b3 100644
+--- a/tools/perf/pmu-events/arch/powerpc/power10/memory.json
++++ b/tools/perf/pmu-events/arch/powerpc/power10/memory.json
+@@ -1,24 +1,9 @@
+ [
+-  {
+-    "EventCode": "0x1000A",
+-    "EventName": "PM_PMC3_REWIND",
+-    "BriefDescription": "The speculative event selected for PMC3 rewinds and the counter for PMC3 is not charged."
+-  },
+   {
+     "EventCode": "0x1C040",
+     "EventName": "PM_XFER_FROM_SRC_PMC1",
+     "BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[0:12]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
+   },
+-  {
+-    "EventCode": "0x1C142",
+-    "EventName": "PM_MRK_XFER_FROM_SRC_PMC1",
+-    "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[0:12]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
+-  },
+-  {
+-    "EventCode": "0x1C144",
+-    "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC1",
+-    "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[0:12]."
+-  },
+   {
+     "EventCode": "0x1C056",
+     "EventName": "PM_DERAT_MISS_4K",
+@@ -34,26 +19,11 @@
+     "EventName": "PM_DTLB_MISS_2M",
+     "BriefDescription": "Data TLB reload (after a miss) page size 2M. Implies radix translation was used. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
+   },
+-  {
+-    "EventCode": "0x1E056",
+-    "EventName": "PM_EXEC_STALL_STORE_PIPE",
+-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the store unit. This does not include cycles spent handling store misses, PTESYNC instructions or TLBIE instructions."
+-  },
+-  {
+-    "EventCode": "0x1F150",
+-    "EventName": "PM_MRK_ST_L2_CYC",
+-    "BriefDescription": "Cycles from L2 RC dispatch to L2 RC completion."
+-  },
+   {
+     "EventCode": "0x10062",
+     "EventName": "PM_LD_L3MISS_PEND_CYC",
+     "BriefDescription": "Cycles in which an L3 miss was pending for this thread."
+   },
+-  {
+-    "EventCode": "0x20010",
+-    "EventName": "PM_PMC1_OVERFLOW",
+-    "BriefDescription": "The event selected for PMC1 caused the event counter to overflow."
+-  },
+   {
+     "EventCode": "0x2001A",
+     "EventName": "PM_ITLB_HIT",
+@@ -79,36 +49,16 @@
+     "EventName": "PM_DTLB_MISS_4K",
+     "BriefDescription": "Data TLB reload (after a miss) page size 4K. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
+   },
+-  {
+-    "EventCode": "0x2D154",
+-    "EventName": "PM_MRK_DERAT_MISS_64K",
+-    "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 64K for a marked instruction. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
+-  },
+   {
+     "EventCode": "0x200F6",
+     "EventName": "PM_DERAT_MISS",
+     "BriefDescription": "DERAT Reloaded to satisfy a DERAT miss. All page sizes are counted by this event. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
+   },
+-  {
+-    "EventCode": "0x30016",
+-    "EventName": "PM_EXEC_STALL_DERAT_DTLB_MISS",
+-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered a TLB miss and waited for it resolve."
+-  },
+   {
+     "EventCode": "0x3C040",
+     "EventName": "PM_XFER_FROM_SRC_PMC3",
+     "BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[30:42]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
+   },
+-  {
+-    "EventCode": "0x3C142",
+-    "EventName": "PM_MRK_XFER_FROM_SRC_PMC3",
+-    "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[30:42]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
+-  },
+-  {
+-    "EventCode": "0x3C144",
+-    "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC3",
+-    "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[30:42]."
+-  },
+   {
+     "EventCode": "0x3C054",
+     "EventName": "PM_DERAT_MISS_16M",
+@@ -124,21 +74,11 @@
+     "EventName": "PM_LARX_FIN",
+     "BriefDescription": "Load and reserve instruction (LARX) finished. LARX and STCX are instructions used to acquire a lock."
+   },
+-  {
+-    "EventCode": "0x301E2",
+-    "EventName": "PM_MRK_ST_CMPL",
+-    "BriefDescription": "Marked store completed and sent to nest. Note that this count excludes cache-inhibited stores."
+-  },
+   {
+     "EventCode": "0x300FC",
+     "EventName": "PM_DTLB_MISS",
+     "BriefDescription": "The DPTEG required for the load/store instruction in execution was missing from the TLB. This event only counts for demand misses."
+   },
+-  {
+-    "EventCode": "0x4D02C",
+-    "EventName": "PM_PMC1_REWIND",
+-    "BriefDescription": "The speculative event selected for PMC1 rewinds and the counter for PMC1 is not charged."
+-  },
+   {
+     "EventCode": "0x4003E",
+     "EventName": "PM_LD_CMPL",
+@@ -149,16 +89,6 @@
+     "EventName": "PM_XFER_FROM_SRC_PMC4",
+     "BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[45:57]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
+   },
+-  {
+-    "EventCode": "0x4C142",
+-    "EventName": "PM_MRK_XFER_FROM_SRC_PMC4",
+-    "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[45:57]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
+-  },
+-  {
+-    "EventCode": "0x4C144",
+-    "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC4",
+-    "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[45:57]."
+-  },
+   {
+     "EventCode": "0x4C056",
+     "EventName": "PM_DTLB_MISS_16M",
+@@ -168,20 +98,5 @@
+     "EventCode": "0x4C05A",
+     "EventName": "PM_DTLB_MISS_1G",
+     "BriefDescription": "Data TLB reload (after a miss) page size 1G. Implies radix translation was used. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
+-  },
+-  {
+-    "EventCode": "0x4C15E",
+-    "EventName": "PM_MRK_DTLB_MISS_64K",
+-    "BriefDescription": "Marked Data TLB reload (after a miss) page size 64K. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
+-  },
+-  {
+-    "EventCode": "0x4D056",
+-    "EventName": "PM_NON_FMA_FLOP_CMPL",
+-    "BriefDescription": "Non FMA instruction completed."
+-  },
+-  {
+-    "EventCode": "0x40164",
+-    "EventName": "PM_MRK_DERAT_MISS_2M",
+-    "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 2M for a marked instruction. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
+   }
+ ]
+diff --git a/tools/perf/pmu-events/arch/powerpc/power10/others.json b/tools/perf/pmu-events/arch/powerpc/power10/others.json
+index 17c5424ef1ac1..f09c00c89322e 100644
+--- a/tools/perf/pmu-events/arch/powerpc/power10/others.json
++++ b/tools/perf/pmu-events/arch/powerpc/power10/others.json
+@@ -1,23 +1,8 @@
+ [
+   {
+-    "EventCode": "0x10016",
+-    "EventName": "PM_VSU0_ISSUE",
+-    "BriefDescription": "VSU instruction issued to VSU pipe 0."
+-  },
+-  {
+-    "EventCode": "0x1001C",
+-    "EventName": "PM_ULTRAVISOR_INST_CMPL",
+-    "BriefDescription": "PowerPC instruction completed while the thread was in ultravisor state."
+-  },
+-  {
+-    "EventCode": "0x100F0",
+-    "EventName": "PM_CYC",
+-    "BriefDescription": "Processor cycles."
+-  },
+-  {
+-    "EventCode": "0x10134",
+-    "EventName": "PM_MRK_ST_DONE_L2",
+-    "BriefDescription": "Marked store completed in L2."
++    "EventCode": "0x1002C",
++    "EventName": "PM_LD_PREFETCH_CACHE_LINE_MISS",
++    "BriefDescription": "The L1 cache was reloaded with a line that fulfills a prefetch request."
+   },
+   {
+     "EventCode": "0x1505E",
+@@ -34,36 +19,11 @@
+     "EventName": "PM_ADJUNCT_CYC",
+     "BriefDescription": "Cycles in which the thread is in Adjunct state. MSR[S HV PR] bits = 011."
+   },
+-  {
+-    "EventCode": "0x101E4",
+-    "EventName": "PM_MRK_L1_ICACHE_MISS",
+-    "BriefDescription": "Marked instruction suffered an instruction cache miss."
+-  },
+-  {
+-    "EventCode": "0x101EA",
+-    "EventName": "PM_MRK_L1_RELOAD_VALID",
+-    "BriefDescription": "Marked demand reload."
+-  },
+-  {
+-    "EventCode": "0x100F4",
+-    "EventName": "PM_FLOP_CMPL",
+-    "BriefDescription": "Floating Point Operations Completed. Includes any type. It counts once for each 1, 2, 4 or 8 flop instruction. Use PM_1|2|4|8_FLOP_CMPL events to count flops."
+-  },
+-  {
+-    "EventCode": "0x100FA",
+-    "EventName": "PM_RUN_LATCH_ANY_THREAD_CYC",
+-    "BriefDescription": "Cycles when at least one thread has the run latch set."
+-  },
+   {
+     "EventCode": "0x100FC",
+     "EventName": "PM_LD_REF_L1",
+     "BriefDescription": "All L1 D cache load references counted at finish, gated by reject. In P9 and earlier this event counted only cacheable loads but in P10 both cacheable and non-cacheable loads are included."
+   },
+-  {
+-    "EventCode": "0x2000C",
+-    "EventName": "PM_RUN_LATCH_ALL_THREADS_CYC",
+-    "BriefDescription": "Cycles when the run latch is set for all threads."
+-  },
+   {
+     "EventCode": "0x2E010",
+     "EventName": "PM_ADJUNCT_INST_CMPL",
+@@ -74,26 +34,6 @@
+     "EventName": "PM_STCX_FIN",
+     "BriefDescription": "Conditional store instruction (STCX) finished. LARX and STCX are instructions used to acquire a lock."
+   },
+-  {
+-    "EventCode": "0x20130",
+-    "EventName": "PM_MRK_INST_DECODED",
+-    "BriefDescription": "An instruction was marked at decode time. Random Instruction Sampling (RIS) only."
+-  },
+-  {
+-    "EventCode": "0x20132",
+-    "EventName": "PM_MRK_DFU_ISSUE",
+-    "BriefDescription": "The marked instruction was a decimal floating point operation issued to the VSU. Measured at issue time."
+-  },
+-  {
+-    "EventCode": "0x20134",
+-    "EventName": "PM_MRK_FXU_ISSUE",
+-    "BriefDescription": "The marked instruction was a fixed point operation issued to the VSU. Measured at issue time."
+-  },
+-  {
+-    "EventCode": "0x2505C",
+-    "EventName": "PM_VSU_ISSUE",
+-    "BriefDescription": "At least one VSU instruction was issued to one of the VSU pipes. Up to 4 per cycle. Includes fixed point operations."
+-  },
+   {
+     "EventCode": "0x2F054",
+     "EventName": "PM_DISP_SS1_2_INSTR_CYC",
+@@ -104,40 +44,15 @@
+     "EventName": "PM_DISP_SS1_4_INSTR_CYC",
+     "BriefDescription": "Cycles in which Superslice 1 dispatches either 3 or 4 instructions."
+   },
+-  {
+-    "EventCode": "0x2006C",
+-    "EventName": "PM_RUN_CYC_SMT4_MODE",
+-    "BriefDescription": "Cycles when this thread's run latch is set and the core is in SMT4 mode."
+-  },
+-  {
+-    "EventCode": "0x201E0",
+-    "EventName": "PM_MRK_DATA_FROM_MEMORY",
+-    "BriefDescription": "The processor's data cache was reloaded from local, remote, or distant memory due to a demand miss for a marked load."
+-  },
+-  {
+-    "EventCode": "0x201E4",
+-    "EventName": "PM_MRK_DATA_FROM_L3MISS",
+-    "BriefDescription": "The processor's L1 data cache was reloaded from beyond the local core's L3 due to a demand miss for a marked instruction."
+-  },
+-  {
+-    "EventCode": "0x201E8",
+-    "EventName": "PM_THRESH_EXC_512",
+-    "BriefDescription": "Threshold counter exceeded a value of 512."
+-  },
+   {
+     "EventCode": "0x200F2",
+     "EventName": "PM_INST_DISP",
+     "BriefDescription": "PowerPC instruction dispatched."
+   },
+   {
+-    "EventCode": "0x30132",
+-    "EventName": "PM_MRK_VSU_FIN",
+-    "BriefDescription": "VSU marked instruction finished. Excludes simple FX instructions issued to the Store Unit."
+-  },
+-  {
+-    "EventCode": "0x30038",
+-    "EventName": "PM_EXEC_STALL_DMISS_LMEM",
+-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local memory, local OpenCAPI cache, or local OpenCAPI memory."
++    "EventCode": "0x200FD",
++    "EventName": "PM_L1_ICACHE_MISS",
++    "BriefDescription": "Demand instruction cache miss."
+   },
+   {
+     "EventCode": "0x3F04A",
+@@ -149,11 +64,6 @@
+     "EventName": "PM_PRIVILEGED_INST_CMPL",
+     "BriefDescription": "PowerPC instruction completed while the thread was in Privileged state."
+   },
+-  {
+-    "EventCode": "0x3F150",
+-    "EventName": "PM_MRK_ST_DRAIN_CYC",
+-    "BriefDescription": "Cycles in which the marked store drained from the core to the L2."
+-  },
+   {
+     "EventCode": "0x3F054",
+     "EventName": "PM_DISP_SS0_4_INSTR_CYC",
+@@ -165,103 +75,43 @@
+     "BriefDescription": "Cycles in which Superslice 0 dispatches either 5, 6, 7 or 8 instructions."
+   },
+   {
+-    "EventCode": "0x30162",
+-    "EventName": "PM_MRK_ISSUE_DEPENDENT_LOAD",
+-    "BriefDescription": "The marked instruction was dependent on a load. It is eligible for issue kill."
+-  },
+-  {
+-    "EventCode": "0x40114",
+-    "EventName": "PM_MRK_START_PROBE_NOP_DISP",
+-    "BriefDescription": "Marked Start probe nop dispatched. Instruction AND R0,R0,R0."
+-  },
+-  {
+-    "EventCode": "0x4001C",
+-    "EventName": "PM_VSU_FIN",
+-    "BriefDescription": "VSU instruction finished."
+-  },
+-  {
+-    "EventCode": "0x4C01A",
+-    "EventName": "PM_EXEC_STALL_DMISS_OFF_NODE",
+-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a distant chip."
+-  },
+-  {
+-    "EventCode": "0x4D012",
+-    "EventName": "PM_PMC3_SAVED",
+-    "BriefDescription": "The conditions for the speculative event selected for PMC3 are met and PMC3 is charged."
+-  },
+-  {
+-    "EventCode": "0x4D022",
+-    "EventName": "PM_HYPERVISOR_INST_CMPL",
+-    "BriefDescription": "PowerPC instruction completed while the thread was in hypervisor state."
+-  },
+-  {
+-    "EventCode": "0x4D026",
+-    "EventName": "PM_ULTRAVISOR_CYC",
+-    "BriefDescription": "Cycles when the thread is in Ultravisor state. MSR[S HV PR]=110."
++    "EventCode": "0x30068",
++    "EventName": "PM_L1_ICACHE_RELOADED_PREF",
++    "BriefDescription": "Counts all instruction cache prefetch reloads (includes demand turned into prefetch)."
+   },
+   {
+-    "EventCode": "0x4D028",
+-    "EventName": "PM_PRIVILEGED_CYC",
+-    "BriefDescription": "Cycles when the thread is in Privileged state. MSR[S HV PR]=x00."
++    "EventCode": "0x300F6",
++    "EventName": "PM_LD_DEMAND_MISS_L1",
++    "BriefDescription": "The L1 cache was reloaded with a line that fulfills a demand miss request. Counted at reload time, before finish."
+   },
+   {
+-    "EventCode": "0x40030",
+-    "EventName": "PM_INST_FIN",
+-    "BriefDescription": "Instruction finished."
++    "EventCode": "0x300FE",
++    "EventName": "PM_DATA_FROM_L3MISS",
++    "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss."
+   },
+   {
+-    "EventCode": "0x44146",
+-    "EventName": "PM_MRK_STCX_CORE_CYC",
+-    "BriefDescription": "Cycles spent in the core portion of a marked STCX instruction. It starts counting when the instruction is decoded and stops counting when it drains into the L2."
++    "EventCode": "0x40012",
++    "EventName": "PM_L1_ICACHE_RELOADED_ALL",
++    "BriefDescription": "Counts all instruction cache reloads includes demand, prefetch, prefetch turned into demand and demand turned into prefetch."
+   },
+   {
+     "EventCode": "0x44054",
+     "EventName": "PM_VECTOR_LD_CMPL",
+     "BriefDescription": "Vector load instruction completed."
+   },
+-  {
+-    "EventCode": "0x45054",
+-    "EventName": "PM_FMA_CMPL",
+-    "BriefDescription": "Two floating point instruction completed (FMA class of instructions: fmadd, fnmadd, fmsub, fnmsub). Scalar instructions only."
+-  },
+-  {
+-    "EventCode": "0x45056",
+-    "EventName": "PM_SCALAR_FLOP_CMPL",
+-    "BriefDescription": "Scalar floating point instruction completed."
+-  },
+-  {
+-    "EventCode": "0x4505C",
+-    "EventName": "PM_MATH_FLOP_CMPL",
+-    "BriefDescription": "Math floating point instruction completed."
+-  },
+   {
+     "EventCode": "0x4D05E",
+     "EventName": "PM_BR_CMPL",
+     "BriefDescription": "A branch completed. All branches are included."
+   },
+-  {
+-    "EventCode": "0x4E15E",
+-    "EventName": "PM_MRK_INST_FLUSHED",
+-    "BriefDescription": "The marked instruction was flushed."
+-  },
+-  {
+-    "EventCode": "0x401E6",
+-    "EventName": "PM_MRK_INST_FROM_L3MISS",
+-    "BriefDescription": "The processor's instruction cache was reloaded from beyond the local core's L3 due to a demand miss for a marked instruction."
+-  },
+-  {
+-    "EventCode": "0x401E8",
+-    "EventName": "PM_MRK_DATA_FROM_L2MISS",
+-    "BriefDescription": "The processor's L1 data cache was reloaded from a source beyond the local core's L2 due to a demand miss for a marked instruction."
+-  },
+   {
+     "EventCode": "0x400F0",
+     "EventName": "PM_LD_DEMAND_MISS_L1_FIN",
+     "BriefDescription": "Load missed L1, counted at finish time."
+   },
+   {
+-    "EventCode": "0x500FA",
+-    "EventName": "PM_RUN_INST_CMPL",
+-    "BriefDescription": "PowerPC instruction completed while the run latch is set."
++    "EventCode": "0x400FE",
++    "EventName": "PM_DATA_FROM_MEMORY",
++    "BriefDescription": "The processor's data cache was reloaded from local, remote, or distant memory due to a demand miss."
+   }
+ ]
+diff --git a/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json b/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json
+index 799893c56f32b..a8272a2f05174 100644
+--- a/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json
++++ b/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json
+@@ -1,8 +1,13 @@
+ [
+   {
+-    "EventCode": "0x100FE",
+-    "EventName": "PM_INST_CMPL",
+-    "BriefDescription": "PowerPC instruction completed."
++    "EventCode": "0x10004",
++    "EventName": "PM_EXEC_STALL_TRANSLATION",
++    "BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered a TLB miss or ERAT miss and waited for it to resolve."
++  },
++  {
++    "EventCode": "0x10006",
++    "EventName": "PM_DISP_STALL_HELD_OTHER_CYC",
++    "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch for any other reason."
+   },
+   {
+     "EventCode": "0x1000C",
+@@ -30,14 +35,19 @@
+     "BriefDescription": "Cycles in which an instruction reload is pending to satisfy a demand miss."
+   },
+   {
+-    "EventCode": "0x10022",
+-    "EventName": "PM_PMC2_SAVED",
+-    "BriefDescription": "The conditions for the speculative event selected for PMC2 are met and PMC2 is charged."
++    "EventCode": "0x10038",
++    "EventName": "PM_DISP_STALL_TRANSLATION",
++    "BriefDescription": "Cycles when dispatch was stalled for this thread because the MMU was handling a translation miss."
+   },
+   {
+-    "EventCode": "0x10024",
+-    "EventName": "PM_PMC5_OVERFLOW",
+-    "BriefDescription": "The event selected for PMC5 caused the event counter to overflow."
++    "EventCode": "0x1003A",
++    "EventName": "PM_DISP_STALL_BR_MPRED_IC_L2",
++    "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L2 after suffering a branch mispredict."
++  },
++  {
++    "EventCode": "0x1003C",
++    "EventName": "PM_EXEC_STALL_DMISS_L2L3",
++    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from either the local L2 or local L3."
+   },
+   {
+     "EventCode": "0x10058",
+@@ -54,11 +64,36 @@
+     "EventName": "PM_DERAT_MISS_2M",
+     "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 2M. Implies radix translation. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
+   },
++  {
++    "EventCode": "0x1D05E",
++    "EventName": "PM_DISP_STALL_HELD_HALT_CYC",
++    "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch because of power management."
++  },
++  {
++    "EventCode": "0x1E050",
++    "EventName": "PM_DISP_STALL_HELD_STF_MAPPER_CYC",
++    "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch because the STF mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR."
++  },
++  {
++    "EventCode": "0x1E054",
++    "EventName": "PM_EXEC_STALL_DMISS_L21_L31",
++    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from another core's L2 or L3 on the same chip."
++  },
++  {
++    "EventCode": "0x1E056",
++    "EventName": "PM_EXEC_STALL_STORE_PIPE",
++    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the store unit. This does not include cycles spent handling store misses, PTESYNC instructions or TLBIE instructions."
++  },
+   {
+     "EventCode": "0x1E05A",
+     "EventName": "PM_CMPL_STALL_LWSYNC",
+     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a lwsync waiting to complete."
+   },
++  {
++    "EventCode": "0x10064",
++    "EventName": "PM_DISP_STALL_IC_L2",
++    "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L2."
++  },
+   {
+     "EventCode": "0x10068",
+     "EventName": "PM_BR_FIN",
+@@ -70,9 +105,9 @@
+     "BriefDescription": "Simple fixed point instruction issued to the store unit. Measured at finish time."
+   },
+   {
+-    "EventCode": "0x1006C",
+-    "EventName": "PM_RUN_CYC_ST_MODE",
+-    "BriefDescription": "Cycles when the run latch is set and the core is in ST mode."
++    "EventCode": "0x100F8",
++    "EventName": "PM_DISP_STALL_CYC",
++    "BriefDescription": "Cycles the ICT has no itags assigned to this thread (no instructions were dispatched during these cycles)."
+   },
+   {
+     "EventCode": "0x20004",
+@@ -80,69 +115,114 @@
+     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was dispatched but not issued yet."
+   },
+   {
+-    "EventCode": "0x2000A",
+-    "EventName": "PM_HYPERVISOR_CYC",
+-    "BriefDescription": "Cycles when the thread is in Hypervisor state. MSR[S HV PR]=010."
++    "EventCode": "0x20006",
++    "EventName": "PM_DISP_STALL_HELD_ISSQ_FULL_CYC",
++    "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch due to Issue queue full. Includes issue queue and branch queue."
+   },
+   {
+     "EventCode": "0x2000E",
+     "EventName": "PM_LSU_LD1_FIN",
+     "BriefDescription": "LSU Finished an internal operation in LD1 port."
+   },
++  {
++    "EventCode": "0x2C010",
++    "EventName": "PM_EXEC_STALL_LSU",
++    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the Load Store Unit. This does not include simple fixed point instructions."
++  },
+   {
+     "EventCode": "0x2C014",
+     "EventName": "PM_CMPL_STALL_SPECIAL",
+     "BriefDescription": "Cycles in which the oldest instruction in the pipeline required special handling before completing."
+   },
++  {
++    "EventCode": "0x2C016",
++    "EventName": "PM_DISP_STALL_IERAT_ONLY_MISS",
++    "BriefDescription": "Cycles when dispatch was stalled while waiting to resolve an instruction ERAT miss."
++  },
+   {
+     "EventCode": "0x2C018",
+     "EventName": "PM_EXEC_STALL_DMISS_L3MISS",
+     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a source beyond the local L2 or local L3."
+   },
++  {
++    "EventCode": "0x2C01C",
++    "EventName": "PM_EXEC_STALL_DMISS_OFF_CHIP",
++    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a remote chip."
++  },
++  {
++    "EventCode": "0x2C01E",
++    "EventName": "PM_DISP_STALL_BR_MPRED_IC_L3",
++    "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L3 after suffering a branch mispredict."
++  },
+   {
+     "EventCode": "0x2D010",
+     "EventName": "PM_LSU_ST1_FIN",
+     "BriefDescription": "LSU Finished an internal operation in ST1 port."
+   },
++  {
++    "EventCode": "0x10016",
++    "EventName": "PM_VSU0_ISSUE",
++    "BriefDescription": "VSU instruction issued to VSU pipe 0."
++  },
+   {
+     "EventCode": "0x2D012",
+     "EventName": "PM_VSU1_ISSUE",
+     "BriefDescription": "VSU instruction issued to VSU pipe 1."
+   },
++  {
++    "EventCode": "0x2505C",
++    "EventName": "PM_VSU_ISSUE",
++    "BriefDescription": "At least one VSU instruction was issued to one of the VSU pipes. Up to 4 per cycle. Includes fixed point operations."
++  },
++  {
++    "EventCode": "0x4001C",
++    "EventName": "PM_VSU_FIN",
++    "BriefDescription": "VSU instruction finished."
++  },
+   {
+     "EventCode": "0x2D018",
+     "EventName": "PM_EXEC_STALL_VSU",
+     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the VSU (includes FXU, VSU, CRU)."
+   },
++  {
++    "EventCode": "0x2D01A",
++    "EventName": "PM_DISP_STALL_IC_MISS",
++    "BriefDescription": "Cycles when dispatch was stalled for this thread due to an instruction cache miss."
++  },
+   {
+     "EventCode": "0x2D01C",
+     "EventName": "PM_CMPL_STALL_STCX",
+     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a stcx waiting for resolution from the nest before completing."
+   },
++  {
++    "EventCode": "0x2E018",
++    "EventName": "PM_DISP_STALL_FETCH",
++    "BriefDescription": "Cycles when dispatch was stalled for this thread because Fetch was being held."
++  },
++  {
++    "EventCode": "0x2E01A",
++    "EventName": "PM_DISP_STALL_HELD_XVFC_MAPPER_CYC",
++    "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch because the XVFC mapper/SRB was full."
++  },
++  {
++    "EventCode": "0x2E01C",
++    "EventName": "PM_EXEC_STALL_TLBIE",
++    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a TLBIE instruction executing in the Load Store Unit."
++  },
+   {
+     "EventCode": "0x2E01E",
+     "EventName": "PM_EXEC_STALL_NTC_FLUSH",
+     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in any unit before it was flushed. Note that if the flush of the oldest instruction happens after finish, the cycles from dispatch to issue will be included in PM_DISP_STALL and the cycles from issue to finish will be included in PM_EXEC_STALL and its corresponding children. This event will also count cycles when the previous next-to-finish (NTF) instruction is still completing and the new NTF instruction is stalled at dispatch."
+   },
+-  {
+-    "EventCode": "0x2013C",
+-    "EventName": "PM_MRK_FX_LSU_FIN",
+-    "BriefDescription": "The marked instruction was simple fixed point that was issued to the store unit. Measured at finish time."
+-  },
+   {
+     "EventCode": "0x2405A",
+     "EventName": "PM_NTC_FIN",
+     "BriefDescription": "Cycles in which the oldest instruction in the pipeline (NTC) finishes. Note that instructions can finish out of order, therefore not all the instructions that finish have a Next-to-complete status."
+   },
+   {
+-    "EventCode": "0x201E2",
+-    "EventName": "PM_MRK_LD_MISS_L1",
+-    "BriefDescription": "Marked demand data load miss counted at finish time."
+-  },
+-  {
+-    "EventCode": "0x200F4",
+-    "EventName": "PM_RUN_CYC",
+-    "BriefDescription": "Processor cycles gated by the run latch."
++    "EventCode": "0x30004",
++    "EventName": "PM_DISP_STALL_FLUSH",
++    "BriefDescription": "Cycles when dispatch was stalled because of a flush that happened to an instruction(s) that was not yet next-to-complete (NTC). PM_EXEC_STALL_NTC_FLUSH only includes instructions that were flushed after becoming NTC."
+   },
+   {
+     "EventCode": "0x30008",
+@@ -150,24 +230,29 @@
+     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting to finish in one of the execution units (BRU, LSU, VSU). Only cycles between issue and finish are counted in this category."
+   },
+   {
+-    "EventCode": "0x3001A",
+-    "EventName": "PM_LSU_ST2_FIN",
+-    "BriefDescription": "LSU Finished an internal operation in ST2 port."
++    "EventCode": "0x30014",
++    "EventName": "PM_EXEC_STALL_STORE",
++    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a store instruction executing in the Load Store Unit."
+   },
+   {
+-    "EventCode": "0x30020",
+-    "EventName": "PM_PMC2_REWIND",
+-    "BriefDescription": "The speculative event selected for PMC2 rewinds and the counter for PMC2 is not charged."
++    "EventCode": "0x30016",
++    "EventName": "PM_EXEC_STALL_DERAT_DTLB_MISS",
++    "BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered a TLB miss and waited for it resolve."
+   },
+   {
+-    "EventCode": "0x30022",
+-    "EventName": "PM_PMC4_SAVED",
+-    "BriefDescription": "The conditions for the speculative event selected for PMC4 are met and PMC4 is charged."
++    "EventCode": "0x30018",
++    "EventName": "PM_DISP_STALL_HELD_SCOREBOARD_CYC",
++    "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch while waiting on the Scoreboard. This event combines VSCR and FPSCR together."
++  },
++  {
++    "EventCode": "0x3001A",
++    "EventName": "PM_LSU_ST2_FIN",
++    "BriefDescription": "LSU Finished an internal operation in ST2 port."
+   },
+   {
+-    "EventCode": "0x30024",
+-    "EventName": "PM_PMC6_OVERFLOW",
+-    "BriefDescription": "The event selected for PMC6 caused the event counter to overflow."
++    "EventCode": "0x30026",
++    "EventName": "PM_EXEC_STALL_STORE_MISS",
++    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a store whose cache line was not resident in the L1 and was waiting for allocation of the missing line into the L1."
+   },
+   {
+     "EventCode": "0x30028",
+@@ -179,6 +264,11 @@
+     "EventName": "PM_EXEC_STALL_SIMPLE_FX",
+     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a simple fixed point instruction executing in the Load Store Unit."
+   },
++  {
++    "EventCode": "0x30038",
++    "EventName": "PM_EXEC_STALL_DMISS_LMEM",
++    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local memory, local OpenCAPI cache, or local OpenCAPI memory."
++  },
+   {
+     "EventCode": "0x3003A",
+     "EventName": "PM_CMPL_STALL_EXCEPTION",
+@@ -194,6 +284,31 @@
+     "EventName": "PM_TLBIE_FIN",
+     "BriefDescription": "TLBIE instruction finished in the LSU. Two TLBIEs can finish each cycle. All will be counted."
+   },
++  {
++    "EventCode": "0x34054",
++    "EventName": "PM_EXEC_STALL_DMISS_L2L3_NOCONFLICT",
++    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local L2 or local L3, without a dispatch conflict."
++  },
++  {
++    "EventCode": "0x34056",
++    "EventName": "PM_EXEC_STALL_LOAD_FINISH",
++    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was finishing a load after its data was reloaded from a data source beyond the local L1; cycles in which the LSU was processing an L1-hit; cycles in which the next-to-finish (NTF) instruction merged with another load in the LMQ; cycles in which the NTF instruction is waiting for a data reload for a load miss, but the data comes back with a non-NTF instruction."
++  },
++  {
++    "EventCode": "0x34058",
++    "EventName": "PM_DISP_STALL_BR_MPRED_ICMISS",
++    "BriefDescription": "Cycles when dispatch was stalled after a mispredicted branch resulted in an instruction cache miss."
++  },
++  {
++    "EventCode": "0x3D05C",
++    "EventName": "PM_DISP_STALL_HELD_RENAME_CYC",
++    "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch because the mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR and XVFC."
++  },
++  {
++    "EventCode": "0x3E052",
++    "EventName": "PM_DISP_STALL_IC_L3",
++    "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L3."
++  },
+   {
+     "EventCode": "0x30066",
+     "EventName": "PM_LSU_FIN",
+@@ -210,25 +325,45 @@
+     "BriefDescription": "Cycles in which both instructions in the ICT entry pair show as finished. These are the cycles between finish and completion for the oldest pair of instructions in the pipeline."
+   },
+   {
+-    "EventCode": "0x40010",
+-    "EventName": "PM_PMC3_OVERFLOW",
+-    "BriefDescription": "The event selected for PMC3 caused the event counter to overflow."
++    "EventCode": "0x4C010",
++    "EventName": "PM_DISP_STALL_BR_MPRED_IC_L3MISS",
++    "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from sources beyond the local L3 after suffering a mispredicted branch."
+   },
+   {
+     "EventCode": "0x4C012",
+     "EventName": "PM_EXEC_STALL_DERAT_ONLY_MISS",
+     "BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered an ERAT miss and waited for it resolve."
+   },
++  {
++    "EventCode": "0x4C016",
++    "EventName": "PM_EXEC_STALL_DMISS_L2L3_CONFLICT",
++    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local L2 or local L3, with a dispatch conflict."
++  },
+   {
+     "EventCode": "0x4C018",
+     "EventName": "PM_CMPL_STALL",
+     "BriefDescription": "Cycles in which the oldest instruction in the pipeline cannot complete because the thread was blocked for any reason."
+   },
++  {
++    "EventCode": "0x4C01A",
++    "EventName": "PM_EXEC_STALL_DMISS_OFF_NODE",
++    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a distant chip."
++  },
+   {
+     "EventCode": "0x4C01E",
+     "EventName": "PM_LSU_ST3_FIN",
+     "BriefDescription": "LSU Finished an internal operation in ST3 port."
+   },
++  {
++    "EventCode": "0x4D014",
++    "EventName": "PM_EXEC_STALL_LOAD",
++    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a load instruction executing in the Load Store Unit."
++  },
++  {
++    "EventCode": "0x4D016",
++    "EventName": "PM_EXEC_STALL_PTESYNC",
++    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a PTESYNC instruction executing in the Load Store Unit."
++  },
+   {
+     "EventCode": "0x4D018",
+     "EventName": "PM_EXEC_STALL_BRU",
+@@ -244,31 +379,41 @@
+     "EventName": "PM_EXEC_STALL_TLBIEL",
+     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a TLBIEL instruction executing in the Load Store Unit. TLBIEL instructions have lower overhead than TLBIE instructions because they don't get set to the nest."
+   },
++  {
++    "EventCode": "0x4D01E",
++    "EventName": "PM_DISP_STALL_BR_MPRED",
++    "BriefDescription": "Cycles when dispatch was stalled for this thread due to a mispredicted branch."
++  },
++  {
++    "EventCode": "0x4E010",
++    "EventName": "PM_DISP_STALL_IC_L3MISS",
++    "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from any source beyond the local L3."
++  },
+   {
+     "EventCode": "0x4E012",
+     "EventName": "PM_EXEC_STALL_UNKNOWN",
+     "BriefDescription": "Cycles in which the oldest instruction in the pipeline completed without an ntf_type pulse. The ntf_pulse was missed by the ISU because the next-to-finish (NTF) instruction finishes and completions came too close together."
+   },
++  {
++    "EventCode": "0x4E01A",
++    "EventName": "PM_DISP_STALL_HELD_CYC",
++    "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch for any reason."
++  },
+   {
+     "EventCode": "0x4D020",
+     "EventName": "PM_VSU3_ISSUE",
+     "BriefDescription": "VSU instruction was issued to VSU pipe 3."
+   },
+   {
+-    "EventCode": "0x40132",
+-    "EventName": "PM_MRK_LSU_FIN",
+-    "BriefDescription": "LSU marked instruction finish."
++    "EventCode": "0x4003C",
++    "EventName": "PM_DISP_STALL_HELD_SYNC_CYC",
++    "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch because of a synchronizing instruction that requires the ICT to be empty before dispatch."
+   },
+   {
+     "EventCode": "0x45058",
+     "EventName": "PM_IC_MISS_CMPL",
+     "BriefDescription": "Non-speculative instruction cache miss, counted at completion."
+   },
+-  {
+-    "EventCode": "0x4D052",
+-    "EventName": "PM_2FLOP_CMPL",
+-    "BriefDescription": "Double Precision vector version of fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg completed."
+-  },
+   {
+     "EventCode": "0x400F2",
+     "EventName": "PM_1PLUS_PPC_DISP",
+diff --git a/tools/perf/pmu-events/arch/powerpc/power10/pmc.json b/tools/perf/pmu-events/arch/powerpc/power10/pmc.json
+index 364fedbfb490b..0a2bf56ee7c10 100644
+--- a/tools/perf/pmu-events/arch/powerpc/power10/pmc.json
++++ b/tools/perf/pmu-events/arch/powerpc/power10/pmc.json
+@@ -1,22 +1,197 @@
+ [
++  {
++    "EventCode": "0x100FE",
++    "EventName": "PM_INST_CMPL",
++    "BriefDescription": "PowerPC instruction completed."
++  },
++  {
++    "EventCode": "0x1000A",
++    "EventName": "PM_PMC3_REWIND",
++    "BriefDescription": "The speculative event selected for PMC3 rewinds and the counter for PMC3 is not charged."
++  },
++  {
++    "EventCode": "0x10010",
++    "EventName": "PM_PMC4_OVERFLOW",
++    "BriefDescription": "The event selected for PMC4 caused the event counter to overflow."
++  },
++  {
++    "EventCode": "0x1001C",
++    "EventName": "PM_ULTRAVISOR_INST_CMPL",
++    "BriefDescription": "PowerPC instruction completed while the thread was in ultravisor state."
++  },
++  {
++    "EventCode": "0x100F0",
++    "EventName": "PM_CYC",
++    "BriefDescription": "Processor cycles."
++  },
++  {
++    "EventCode": "0x10020",
++    "EventName": "PM_PMC4_REWIND",
++    "BriefDescription": "The speculative event selected for PMC4 rewinds and the counter for PMC4 is not charged."
++  },
++  {
++    "EventCode": "0x10022",
++    "EventName": "PM_PMC2_SAVED",
++    "BriefDescription": "The conditions for the speculative event selected for PMC2 are met and PMC2 is charged."
++  },
++  {
++    "EventCode": "0x10024",
++    "EventName": "PM_PMC5_OVERFLOW",
++    "BriefDescription": "The event selected for PMC5 caused the event counter to overflow."
++  },
++  {
++    "EventCode": "0x1F15E",
++    "EventName": "PM_MRK_START_PROBE_NOP_CMPL",
++    "BriefDescription": "Marked Start probe nop (AND R0,R0,R0) completed."
++  },
++  {
++    "EventCode": "0x1006C",
++    "EventName": "PM_RUN_CYC_ST_MODE",
++    "BriefDescription": "Cycles when the run latch is set and the core is in ST mode."
++  },
++  {
++    "EventCode": "0x101E8",
++    "EventName": "PM_THRESH_EXC_256",
++    "BriefDescription": "Threshold counter exceeded a count of 256."
++  },
++  {
++    "EventCode": "0x101EC",
++    "EventName": "PM_THRESH_MET",
++    "BriefDescription": "Threshold exceeded."
++  },
++  {
++    "EventCode": "0x100FA",
++    "EventName": "PM_RUN_LATCH_ANY_THREAD_CYC",
++    "BriefDescription": "Cycles when at least one thread has the run latch set."
++  },
++  {
++    "EventCode": "0x2000A",
++    "EventName": "PM_HYPERVISOR_CYC",
++    "BriefDescription": "Cycles when the thread is in Hypervisor state. MSR[S HV PR]=010."
++  },
++  {
++    "EventCode": "0x2000C",
++    "EventName": "PM_RUN_LATCH_ALL_THREADS_CYC",
++    "BriefDescription": "Cycles when the run latch is set for all threads."
++  },
++  {
++    "EventCode": "0x20010",
++    "EventName": "PM_PMC1_OVERFLOW",
++    "BriefDescription": "The event selected for PMC1 caused the event counter to overflow."
++  },
++  {
++    "EventCode": "0x2006C",
++    "EventName": "PM_RUN_CYC_SMT4_MODE",
++    "BriefDescription": "Cycles when this thread's run latch is set and the core is in SMT4 mode."
++  },
++  {
++    "EventCode": "0x201E6",
++    "EventName": "PM_THRESH_EXC_32",
++    "BriefDescription": "Threshold counter exceeded a value of 32."
++  },
++  {
++    "EventCode": "0x201E8",
++    "EventName": "PM_THRESH_EXC_512",
++    "BriefDescription": "Threshold counter exceeded a value of 512."
++  },
++  {
++    "EventCode": "0x200F4",
++    "EventName": "PM_RUN_CYC",
++    "BriefDescription": "Processor cycles gated by the run latch."
++  },
++  {
++    "EventCode": "0x30010",
++    "EventName": "PM_PMC2_OVERFLOW",
++    "BriefDescription": "The event selected for PMC2 caused the event counter to overflow."
++  },
++  {
++    "EventCode": "0x30020",
++    "EventName": "PM_PMC2_REWIND",
++    "BriefDescription": "The speculative event selected for PMC2 rewinds and the counter for PMC2 is not charged."
++  },
++  {
++    "EventCode": "0x30022",
++    "EventName": "PM_PMC4_SAVED",
++    "BriefDescription": "The conditions for the speculative event selected for PMC4 are met and PMC4 is charged."
++  },
++  {
++    "EventCode": "0x30024",
++    "EventName": "PM_PMC6_OVERFLOW",
++    "BriefDescription": "The event selected for PMC6 caused the event counter to overflow."
++  },
++  {
++    "EventCode": "0x3006C",
++    "EventName": "PM_RUN_CYC_SMT2_MODE",
++    "BriefDescription": "Cycles when this thread's run latch is set and the core is in SMT2 mode."
++  },
+   {
+     "EventCode": "0x301E8",
+     "EventName": "PM_THRESH_EXC_64",
+     "BriefDescription": "Threshold counter exceeded a value of 64."
+   },
+   {
+-    "EventCode": "0x45050",
+-    "EventName": "PM_1FLOP_CMPL",
+-    "BriefDescription": "One floating point instruction completed (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg)."
++    "EventCode": "0x301EA",
++    "EventName": "PM_THRESH_EXC_1024",
++    "BriefDescription": "Threshold counter exceeded a value of 1024."
++  },
++  {
++    "EventCode": "0x40010",
++    "EventName": "PM_PMC3_OVERFLOW",
++    "BriefDescription": "The event selected for PMC3 caused the event counter to overflow."
++  },
++  {
++    "EventCode": "0x40114",
++    "EventName": "PM_MRK_START_PROBE_NOP_DISP",
++    "BriefDescription": "Marked Start probe nop dispatched. Instruction AND R0,R0,R0."
++  },
++  {
++    "EventCode": "0x4D010",
++    "EventName": "PM_PMC1_SAVED",
++    "BriefDescription": "The conditions for the speculative event selected for PMC1 are met and PMC1 is charged."
++  },
++  {
++    "EventCode": "0x4D012",
++    "EventName": "PM_PMC3_SAVED",
++    "BriefDescription": "The conditions for the speculative event selected for PMC3 are met and PMC3 is charged."
++  },
++  {
++    "EventCode": "0x4D022",
++    "EventName": "PM_HYPERVISOR_INST_CMPL",
++    "BriefDescription": "PowerPC instruction completed while the thread was in hypervisor state."
++  },
++  {
++    "EventCode": "0x4D026",
++    "EventName": "PM_ULTRAVISOR_CYC",
++    "BriefDescription": "Cycles when the thread is in Ultravisor state. MSR[S HV PR]=110."
++  },
++  {
++    "EventCode": "0x4D028",
++    "EventName": "PM_PRIVILEGED_CYC",
++    "BriefDescription": "Cycles when the thread is in Privileged state. MSR[S HV PR]=x00."
++  },
++  {
++    "EventCode": "0x4D02C",
++    "EventName": "PM_PMC1_REWIND",
++    "BriefDescription": "The speculative event selected for PMC1 rewinds and the counter for PMC1 is not charged."
++  },
++  {
++    "EventCode": "0x40030",
++    "EventName": "PM_INST_FIN",
++    "BriefDescription": "Instruction finished."
++  },
++  {
++    "EventCode": "0x40134",
++    "EventName": "PM_MRK_INST_TIMEO",
++    "BriefDescription": "Marked instruction finish timeout (instruction was lost)."
+   },
+   {
+-    "EventCode": "0x45052",
+-    "EventName": "PM_4FLOP_CMPL",
+-    "BriefDescription": "Four floating point instruction completed (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg)."
++    "EventCode": "0x401EA",
++    "EventName": "PM_THRESH_EXC_128",
++    "BriefDescription": "Threshold counter exceeded a value of 128."
+   },
+   {
+-    "EventCode": "0x4D054",
+-    "EventName": "PM_8FLOP_CMPL",
+-    "BriefDescription": "Four Double Precision vector instruction completed."
++    "EventCode": "0x400FA",
++    "EventName": "PM_RUN_INST_CMPL",
++    "BriefDescription": "PowerPC instruction completed while the run latch is set."
+   }
+ ]
+diff --git a/tools/perf/pmu-events/arch/powerpc/power10/translation.json b/tools/perf/pmu-events/arch/powerpc/power10/translation.json
+index 961e2491e73f6..170c9aeb30d83 100644
+--- a/tools/perf/pmu-events/arch/powerpc/power10/translation.json
++++ b/tools/perf/pmu-events/arch/powerpc/power10/translation.json
+@@ -1,29 +1,9 @@
+ [
+-  {
+-    "EventCode": "0x1F15E",
+-    "EventName": "PM_MRK_START_PROBE_NOP_CMPL",
+-    "BriefDescription": "Marked Start probe nop (AND R0,R0,R0) completed."
+-  },
+   {
+     "EventCode": "0x20018",
+     "EventName": "PM_ST_FWD",
+     "BriefDescription": "Store forwards that finished."
+   },
+-  {
+-    "EventCode": "0x2011C",
+-    "EventName": "PM_MRK_NTF_CYC",
+-    "BriefDescription": "Cycles in which the marked instruction is the oldest in the pipeline (next-to-finish or next-to-complete)."
+-  },
+-  {
+-    "EventCode": "0x2E01C",
+-    "EventName": "PM_EXEC_STALL_TLBIE",
+-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a TLBIE instruction executing in the Load Store Unit."
+-  },
+-  {
+-    "EventCode": "0x201E6",
+-    "EventName": "PM_THRESH_EXC_32",
+-    "BriefDescription": "Threshold counter exceeded a value of 32."
+-  },
+   {
+     "EventCode": "0x200F0",
+     "EventName": "PM_ST_CMPL",
+@@ -33,20 +13,5 @@
+     "EventCode": "0x200FE",
+     "EventName": "PM_DATA_FROM_L2MISS",
+     "BriefDescription": "The processor's L1 data cache was reloaded from a source beyond the local core's L2 due to a demand miss."
+-  },
+-  {
+-    "EventCode": "0x30010",
+-    "EventName": "PM_PMC2_OVERFLOW",
+-    "BriefDescription": "The event selected for PMC2 caused the event counter to overflow."
+-  },
+-  {
+-    "EventCode": "0x4D010",
+-    "EventName": "PM_PMC1_SAVED",
+-    "BriefDescription": "The conditions for the speculative event selected for PMC1 are met and PMC1 is charged."
+-  },
+-  {
+-    "EventCode": "0x4D05C",
+-    "EventName": "PM_DPP_FLOP_CMPL",
+-    "BriefDescription": "Double-Precision or Quad-Precision instruction completed."
+   }
+ ]
+-- 
+2.40.1
+
diff --git a/queue-6.5/perf-vendor-events-update-metric-event-names-for-pow.patch b/queue-6.5/perf-vendor-events-update-metric-event-names-for-pow.patch
new file mode 100644 (file)
index 0000000..ccf89ef
--- /dev/null
@@ -0,0 +1,216 @@
+From a0d12b5361d8097f73ad8dc4f49306149beb603a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 14 Aug 2023 16:58:02 +0530
+Subject: perf vendor events: Update metric event names for power10 platform
+
+From: Kajol Jain <kjain@linux.ibm.com>
+
+[ Upstream commit edd65d2bc55fb84d7b80c2ffe3b74d9b11ac4e2f ]
+
+Update metric event name for some of the JSON/metric events for
+power10 platform.
+
+Fixes: 3ca3af7d1f230d1f ("perf vendor events power10: Add metric events JSON file for power10 platform")
+Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
+Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
+Cc: Disha Goel <disgoel@linux.ibm.com>
+Cc: Ian Rogers <irogers@google.com>
+Cc: Kajol Jain <kjain@linux.ibm.com>
+Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: linuxppc-dev@lists.ozlabs.org
+Link: https://lore.kernel.org/r/20230814112803.1508296-6-kjain@linux.ibm.com
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../arch/powerpc/power10/metrics.json         | 50 +++++++++----------
+ 1 file changed, 25 insertions(+), 25 deletions(-)
+
+diff --git a/tools/perf/pmu-events/arch/powerpc/power10/metrics.json b/tools/perf/pmu-events/arch/powerpc/power10/metrics.json
+index e3087eb1ccff8..182369076d956 100644
+--- a/tools/perf/pmu-events/arch/powerpc/power10/metrics.json
++++ b/tools/perf/pmu-events/arch/powerpc/power10/metrics.json
+@@ -16,133 +16,133 @@
+         "BriefDescription": "Average cycles per completed instruction when dispatch was stalled for any reason",
+         "MetricExpr": "PM_DISP_STALL_CYC / PM_RUN_INST_CMPL",
+         "MetricGroup": "CPI;CPI_STALL_RATIO",
+-        "MetricName": "DISPATCHED_CPI"
++        "MetricName": "DISPATCH_STALL_CPI"
+     },
+     {
+         "BriefDescription": "Average cycles per completed instruction when dispatch was stalled because there was a flush",
+         "MetricExpr": "PM_DISP_STALL_FLUSH / PM_RUN_INST_CMPL",
+         "MetricGroup": "CPI",
+-        "MetricName": "DISPATCHED_FLUSH_CPI"
++        "MetricName": "DISPATCH_STALL_FLUSH_CPI"
+     },
+     {
+         "BriefDescription": "Average cycles per completed instruction when dispatch was stalled because the MMU was handling a translation miss",
+         "MetricExpr": "PM_DISP_STALL_TRANSLATION / PM_RUN_INST_CMPL",
+         "MetricGroup": "CPI",
+-        "MetricName": "DISPATCHED_TRANSLATION_CPI"
++        "MetricName": "DISPATCH_STALL_TRANSLATION_CPI"
+     },
+     {
+         "BriefDescription": "Average cycles per completed instruction when dispatch was stalled waiting to resolve an instruction ERAT miss",
+         "MetricExpr": "PM_DISP_STALL_IERAT_ONLY_MISS / PM_RUN_INST_CMPL",
+         "MetricGroup": "CPI",
+-        "MetricName": "DISPATCHED_IERAT_ONLY_MISS_CPI"
++        "MetricName": "DISPATCH_STALL_IERAT_ONLY_MISS_CPI"
+     },
+     {
+         "BriefDescription": "Average cycles per completed instruction when dispatch was stalled waiting to resolve an instruction TLB miss",
+         "MetricExpr": "PM_DISP_STALL_ITLB_MISS / PM_RUN_INST_CMPL",
+         "MetricGroup": "CPI",
+-        "MetricName": "DISPATCHED_ITLB_MISS_CPI"
++        "MetricName": "DISPATCH_STALL_ITLB_MISS_CPI"
+     },
+     {
+         "BriefDescription": "Average cycles per completed instruction when dispatch was stalled due to an icache miss",
+         "MetricExpr": "PM_DISP_STALL_IC_MISS / PM_RUN_INST_CMPL",
+         "MetricGroup": "CPI",
+-        "MetricName": "DISPATCHED_IC_MISS_CPI"
++        "MetricName": "DISPATCH_STALL_IC_MISS_CPI"
+     },
+     {
+         "BriefDescription": "Average cycles per completed instruction when dispatch was stalled while the instruction was fetched from the local L2",
+         "MetricExpr": "PM_DISP_STALL_IC_L2 / PM_RUN_INST_CMPL",
+         "MetricGroup": "CPI",
+-        "MetricName": "DISPATCHED_IC_L2_CPI"
++        "MetricName": "DISPATCH_STALL_IC_L2_CPI"
+     },
+     {
+         "BriefDescription": "Average cycles per completed instruction when dispatch was stalled while the instruction was fetched from the local L3",
+         "MetricExpr": "PM_DISP_STALL_IC_L3 / PM_RUN_INST_CMPL",
+         "MetricGroup": "CPI",
+-        "MetricName": "DISPATCHED_IC_L3_CPI"
++        "MetricName": "DISPATCH_STALL_IC_L3_CPI"
+     },
+     {
+         "BriefDescription": "Average cycles per completed instruction when dispatch was stalled while the instruction was fetched from any source beyond the local L3",
+         "MetricExpr": "PM_DISP_STALL_IC_L3MISS / PM_RUN_INST_CMPL",
+         "MetricGroup": "CPI",
+-        "MetricName": "DISPATCHED_IC_L3MISS_CPI"
++        "MetricName": "DISPATCH_STALL_IC_L3MISS_CPI"
+     },
+     {
+         "BriefDescription": "Average cycles per completed instruction when dispatch was stalled due to an icache miss after a branch mispredict",
+         "MetricExpr": "PM_DISP_STALL_BR_MPRED_ICMISS / PM_RUN_INST_CMPL",
+         "MetricGroup": "CPI",
+-        "MetricName": "DISPATCHED_BR_MPRED_ICMISS_CPI"
++        "MetricName": "DISPATCH_STALL_BR_MPRED_ICMISS_CPI"
+     },
+     {
+         "BriefDescription": "Average cycles per completed instruction when dispatch was stalled while instruction was fetched from the local L2 after suffering a branch mispredict",
+         "MetricExpr": "PM_DISP_STALL_BR_MPRED_IC_L2 / PM_RUN_INST_CMPL",
+         "MetricGroup": "CPI",
+-        "MetricName": "DISPATCHED_BR_MPRED_IC_L2_CPI"
++        "MetricName": "DISPATCH_STALL_BR_MPRED_IC_L2_CPI"
+     },
+     {
+         "BriefDescription": "Average cycles per completed instruction when dispatch was stalled while instruction was fetched from the local L3 after suffering a branch mispredict",
+         "MetricExpr": "PM_DISP_STALL_BR_MPRED_IC_L3 / PM_RUN_INST_CMPL",
+         "MetricGroup": "CPI",
+-        "MetricName": "DISPATCHED_BR_MPRED_IC_L3_CPI"
++        "MetricName": "DISPATCH_STALL_BR_MPRED_IC_L3_CPI"
+     },
+     {
+         "BriefDescription": "Average cycles per completed instruction when dispatch was stalled while instruction was fetched from any source beyond the local L3 after suffering a branch mispredict",
+         "MetricExpr": "PM_DISP_STALL_BR_MPRED_IC_L3MISS / PM_RUN_INST_CMPL",
+         "MetricGroup": "CPI",
+-        "MetricName": "DISPATCHED_BR_MPRED_IC_L3MISS_CPI"
++        "MetricName": "DISPATCH_STALL_BR_MPRED_IC_L3MISS_CPI"
+     },
+     {
+         "BriefDescription": "Average cycles per completed instruction when dispatch was stalled due to a branch mispredict",
+         "MetricExpr": "PM_DISP_STALL_BR_MPRED / PM_RUN_INST_CMPL",
+         "MetricGroup": "CPI",
+-        "MetricName": "DISPATCHED_BR_MPRED_CPI"
++        "MetricName": "DISPATCH_STALL_BR_MPRED_CPI"
+     },
+     {
+         "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch for any reason",
+         "MetricExpr": "PM_DISP_STALL_HELD_CYC / PM_RUN_INST_CMPL",
+         "MetricGroup": "CPI",
+-        "MetricName": "DISPATCHED_HELD_CPI"
++        "MetricName": "DISPATCH_STALL_HELD_CPI"
+     },
+     {
+         "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch because of a synchronizing instruction that requires the ICT to be empty before dispatch",
+         "MetricExpr": "PM_DISP_STALL_HELD_SYNC_CYC / PM_RUN_INST_CMPL",
+         "MetricGroup": "CPI",
+-        "MetricName": "DISP_HELD_STALL_SYNC_CPI"
++        "MetricName": "DISPATCH_STALL_HELD_SYNC_CPI"
+     },
+     {
+         "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch while waiting on the scoreboard",
+         "MetricExpr": "PM_DISP_STALL_HELD_SCOREBOARD_CYC / PM_RUN_INST_CMPL",
+         "MetricGroup": "CPI",
+-        "MetricName": "DISP_HELD_STALL_SCOREBOARD_CPI"
++        "MetricName": "DISPATCH_STALL_HELD_SCOREBOARD_CPI"
+     },
+     {
+         "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch due to issue queue full",
+         "MetricExpr": "PM_DISP_STALL_HELD_ISSQ_FULL_CYC / PM_RUN_INST_CMPL",
+         "MetricGroup": "CPI",
+-        "MetricName": "DISP_HELD_STALL_ISSQ_FULL_CPI"
++        "MetricName": "DISPATCH_STALL_HELD_ISSQ_FULL_CPI"
+     },
+     {
+         "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch because the mapper/SRB was full",
+         "MetricExpr": "PM_DISP_STALL_HELD_RENAME_CYC / PM_RUN_INST_CMPL",
+         "MetricGroup": "CPI",
+-        "MetricName": "DISPATCHED_HELD_RENAME_CPI"
++        "MetricName": "DISPATCH_STALL_HELD_RENAME_CPI"
+     },
+     {
+         "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch because the STF mapper/SRB was full",
+         "MetricExpr": "PM_DISP_STALL_HELD_STF_MAPPER_CYC / PM_RUN_INST_CMPL",
+         "MetricGroup": "CPI",
+-        "MetricName": "DISPATCHED_HELD_STF_MAPPER_CPI"
++        "MetricName": "DISPATCH_STALL_HELD_STF_MAPPER_CPI"
+     },
+     {
+         "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch because the XVFC mapper/SRB was full",
+         "MetricExpr": "PM_DISP_STALL_HELD_XVFC_MAPPER_CYC / PM_RUN_INST_CMPL",
+         "MetricGroup": "CPI",
+-        "MetricName": "DISPATCHED_HELD_XVFC_MAPPER_CPI"
++        "MetricName": "DISPATCH_STALL_HELD_XVFC_MAPPER_CPI"
+     },
+     {
+         "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch for any other reason",
+         "MetricExpr": "PM_DISP_STALL_HELD_OTHER_CYC / PM_RUN_INST_CMPL",
+         "MetricGroup": "CPI",
+-        "MetricName": "DISPATCHED_HELD_OTHER_CPI"
++        "MetricName": "DISPATCH_STALL_HELD_OTHER_CPI"
+     },
+     {
+         "BriefDescription": "Average cycles per completed instruction when the NTC instruction has been dispatched but not issued for any reason",
+@@ -352,13 +352,13 @@
+         "BriefDescription": "Average cycles per completed instruction when dispatch was stalled because fetch was being held, so there was nothing in the pipeline for this thread",
+         "MetricExpr": "PM_DISP_STALL_FETCH / PM_RUN_INST_CMPL",
+         "MetricGroup": "CPI",
+-        "MetricName": "DISPATCHED_FETCH_CPI"
++        "MetricName": "DISPATCH_STALL_FETCH_CPI"
+     },
+     {
+         "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch because of power management",
+         "MetricExpr": "PM_DISP_STALL_HELD_HALT_CYC / PM_RUN_INST_CMPL",
+         "MetricGroup": "CPI",
+-        "MetricName": "DISPATCHED_HELD_HALT_CPI"
++        "MetricName": "DISPATCH_STALL_HELD_HALT_CPI"
+     },
+     {
+         "BriefDescription": "Percentage of flushes per completed instruction",
+@@ -560,7 +560,7 @@
+         "BriefDescription": "Average number of STCX instructions finshed per completed instruction",
+         "MetricExpr": "PM_STCX_FIN / PM_RUN_INST_CMPL",
+         "MetricGroup": "General",
+-        "MetricName": "STXC_PER_INST"
++        "MetricName": "STCX_PER_INST"
+     },
+     {
+         "BriefDescription": "Average number of LARX instructions finshed per completed instruction",
+-- 
+2.40.1
+
diff --git a/queue-6.5/perf-vendor-events-update-the-json-events-descriptio.patch b/queue-6.5/perf-vendor-events-update-the-json-events-descriptio.patch
new file mode 100644 (file)
index 0000000..80e8611
--- /dev/null
@@ -0,0 +1,618 @@
+From b0d2deb0530455f20e8e642ad522b2c046f2baa0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 14 Aug 2023 16:57:57 +0530
+Subject: perf vendor events: Update the JSON/events descriptions for power10
+ platform
+
+From: Kajol Jain <kjain@linux.ibm.com>
+
+[ Upstream commit 3286f88f31da060ac2789cee247153961ba57e49 ]
+
+Update the description for some of the JSON/events for power10 platform.
+
+Fixes: 32daa5d7899e0343 ("perf vendor events: Initial JSON/events list for power10 platform")
+Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
+Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
+Cc: Disha Goel <disgoel@linux.ibm.com>
+Cc: Ian Rogers <irogers@google.com>
+Cc: Kajol Jain <kjain@linux.ibm.com>
+Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: linuxppc-dev@lists.ozlabs.org
+Link: https://lore.kernel.org/r/20230814112803.1508296-1-kjain@linux.ibm.com
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../arch/powerpc/power10/cache.json           |  4 +-
+ .../arch/powerpc/power10/frontend.json        | 30 ++++++------
+ .../arch/powerpc/power10/marked.json          | 20 ++++----
+ .../arch/powerpc/power10/memory.json          |  6 +--
+ .../arch/powerpc/power10/others.json          | 48 +++++++++----------
+ .../arch/powerpc/power10/pipeline.json        | 20 ++++----
+ .../pmu-events/arch/powerpc/power10/pmc.json  |  4 +-
+ .../arch/powerpc/power10/translation.json     |  6 +--
+ 8 files changed, 69 insertions(+), 69 deletions(-)
+
+diff --git a/tools/perf/pmu-events/arch/powerpc/power10/cache.json b/tools/perf/pmu-events/arch/powerpc/power10/cache.json
+index 605be14f441c8..9cb929bb64afd 100644
+--- a/tools/perf/pmu-events/arch/powerpc/power10/cache.json
++++ b/tools/perf/pmu-events/arch/powerpc/power10/cache.json
+@@ -17,7 +17,7 @@
+   {
+     "EventCode": "0x34056",
+     "EventName": "PM_EXEC_STALL_LOAD_FINISH",
+-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was finishing a load after its data was reloaded from a data source beyond the local L1; cycles in which the LSU was processing an L1-hit; cycles in which the NTF instruction merged with another load in the LMQ; cycles in which the NTF instruction is waiting for a data reload for a load miss, but the data comes back with a non-NTF instruction."
++    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was finishing a load after its data was reloaded from a data source beyond the local L1; cycles in which the LSU was processing an L1-hit; cycles in which the next-to-finish (NTF) instruction merged with another load in the LMQ; cycles in which the NTF instruction is waiting for a data reload for a load miss, but the data comes back with a non-NTF instruction."
+   },
+   {
+     "EventCode": "0x3006C",
+@@ -27,7 +27,7 @@
+   {
+     "EventCode": "0x300F4",
+     "EventName": "PM_RUN_INST_CMPL_CONC",
+-    "BriefDescription": "PowerPC instructions completed by this thread when all threads in the core had the run-latch set."
++    "BriefDescription": "PowerPC instruction completed by this thread when all threads in the core had the run-latch set."
+   },
+   {
+     "EventCode": "0x4C016",
+diff --git a/tools/perf/pmu-events/arch/powerpc/power10/frontend.json b/tools/perf/pmu-events/arch/powerpc/power10/frontend.json
+index 558f9530f54ec..61e9e0222c873 100644
+--- a/tools/perf/pmu-events/arch/powerpc/power10/frontend.json
++++ b/tools/perf/pmu-events/arch/powerpc/power10/frontend.json
+@@ -7,7 +7,7 @@
+   {
+     "EventCode": "0x10006",
+     "EventName": "PM_DISP_STALL_HELD_OTHER_CYC",
+-    "BriefDescription": "Cycles in which the NTC instruction is held at dispatch for any other reason."
++    "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch for any other reason."
+   },
+   {
+     "EventCode": "0x10010",
+@@ -32,12 +32,12 @@
+   {
+     "EventCode": "0x1D05E",
+     "EventName": "PM_DISP_STALL_HELD_HALT_CYC",
+-    "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because of power management."
++    "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch because of power management."
+   },
+   {
+     "EventCode": "0x1E050",
+     "EventName": "PM_DISP_STALL_HELD_STF_MAPPER_CYC",
+-    "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because the STF mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR."
++    "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch because the STF mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR."
+   },
+   {
+     "EventCode": "0x1F054",
+@@ -67,7 +67,7 @@
+   {
+     "EventCode": "0x100F6",
+     "EventName": "PM_IERAT_MISS",
+-    "BriefDescription": "IERAT Reloaded to satisfy an IERAT miss. All page sizes are counted by this event."
++    "BriefDescription": "IERAT Reloaded to satisfy an IERAT miss. All page sizes are counted by this event. This event only counts instruction demand access."
+   },
+   {
+     "EventCode": "0x100F8",
+@@ -77,7 +77,7 @@
+   {
+     "EventCode": "0x20006",
+     "EventName": "PM_DISP_STALL_HELD_ISSQ_FULL_CYC",
+-    "BriefDescription": "Cycles in which the NTC instruction is held at dispatch due to Issue queue full. Includes issue queue and branch queue."
++    "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch due to Issue queue full. Includes issue queue and branch queue."
+   },
+   {
+     "EventCode": "0x20114",
+@@ -102,7 +102,7 @@
+   {
+     "EventCode": "0x2D01A",
+     "EventName": "PM_DISP_STALL_IC_MISS",
+-    "BriefDescription": "Cycles when dispatch was stalled for this thread due to an Icache Miss."
++    "BriefDescription": "Cycles when dispatch was stalled for this thread due to an instruction cache miss."
+   },
+   {
+     "EventCode": "0x2E018",
+@@ -112,7 +112,7 @@
+   {
+     "EventCode": "0x2E01A",
+     "EventName": "PM_DISP_STALL_HELD_XVFC_MAPPER_CYC",
+-    "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because the XVFC mapper/SRB was full."
++    "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch because the XVFC mapper/SRB was full."
+   },
+   {
+     "EventCode": "0x2C142",
+@@ -137,7 +137,7 @@
+   {
+     "EventCode": "0x30004",
+     "EventName": "PM_DISP_STALL_FLUSH",
+-    "BriefDescription": "Cycles when dispatch was stalled because of a flush that happened to an instruction(s) that was not yet NTC. PM_EXEC_STALL_NTC_FLUSH only includes instructions that were flushed after becoming NTC."
++    "BriefDescription": "Cycles when dispatch was stalled because of a flush that happened to an instruction(s) that was not yet next-to-complete (NTC). PM_EXEC_STALL_NTC_FLUSH only includes instructions that were flushed after becoming NTC."
+   },
+   {
+     "EventCode": "0x3000A",
+@@ -157,7 +157,7 @@
+   {
+     "EventCode": "0x30018",
+     "EventName": "PM_DISP_STALL_HELD_SCOREBOARD_CYC",
+-    "BriefDescription": "Cycles in which the NTC instruction is held at dispatch while waiting on the Scoreboard. This event combines VSCR and FPSCR together."
++    "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch while waiting on the Scoreboard. This event combines VSCR and FPSCR together."
+   },
+   {
+     "EventCode": "0x30026",
+@@ -182,7 +182,7 @@
+   {
+     "EventCode": "0x3D05C",
+     "EventName": "PM_DISP_STALL_HELD_RENAME_CYC",
+-    "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because the mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR and XVFC."
++    "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch because the mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR and XVFC."
+   },
+   {
+     "EventCode": "0x3E052",
+@@ -192,7 +192,7 @@
+   {
+     "EventCode": "0x3E054",
+     "EventName": "PM_LD_MISS_L1",
+-    "BriefDescription": "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load."
++    "BriefDescription": "Load missed L1, counted at finish time. LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load."
+   },
+   {
+     "EventCode": "0x301EA",
+@@ -202,7 +202,7 @@
+   {
+     "EventCode": "0x300FA",
+     "EventName": "PM_INST_FROM_L3MISS",
+-    "BriefDescription": "The processor's instruction cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss."
++    "BriefDescription": "The processor's instruction cache was reloaded from beyond the local core's L3 due to a demand miss."
+   },
+   {
+     "EventCode": "0x40006",
+@@ -232,16 +232,16 @@
+   {
+     "EventCode": "0x4E01A",
+     "EventName": "PM_DISP_STALL_HELD_CYC",
+-    "BriefDescription": "Cycles in which the NTC instruction is held at dispatch for any reason."
++    "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch for any reason."
+   },
+   {
+     "EventCode": "0x4003C",
+     "EventName": "PM_DISP_STALL_HELD_SYNC_CYC",
+-    "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because of a synchronizing instruction that requires the ICT to be empty before dispatch."
++    "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch because of a synchronizing instruction that requires the ICT to be empty before dispatch."
+   },
+   {
+     "EventCode": "0x44056",
+     "EventName": "PM_VECTOR_ST_CMPL",
+-    "BriefDescription": "Vector store instructions completed."
++    "BriefDescription": "Vector store instruction completed."
+   }
+ ]
+diff --git a/tools/perf/pmu-events/arch/powerpc/power10/marked.json b/tools/perf/pmu-events/arch/powerpc/power10/marked.json
+index 58b5dfe3a2731..131f8d0e88317 100644
+--- a/tools/perf/pmu-events/arch/powerpc/power10/marked.json
++++ b/tools/perf/pmu-events/arch/powerpc/power10/marked.json
+@@ -62,7 +62,7 @@
+   {
+     "EventCode": "0x200FD",
+     "EventName": "PM_L1_ICACHE_MISS",
+-    "BriefDescription": "Demand iCache Miss."
++    "BriefDescription": "Demand instruction cache miss."
+   },
+   {
+     "EventCode": "0x30130",
+@@ -72,7 +72,7 @@
+   {
+     "EventCode": "0x34146",
+     "EventName": "PM_MRK_LD_CMPL",
+-    "BriefDescription": "Marked loads completed."
++    "BriefDescription": "Marked load instruction completed."
+   },
+   {
+     "EventCode": "0x3E158",
+@@ -82,12 +82,12 @@
+   {
+     "EventCode": "0x3E15A",
+     "EventName": "PM_MRK_ST_FIN",
+-    "BriefDescription": "The marked instruction was a store of any kind."
++    "BriefDescription": "Marked store instruction finished."
+   },
+   {
+     "EventCode": "0x30068",
+     "EventName": "PM_L1_ICACHE_RELOADED_PREF",
+-    "BriefDescription": "Counts all Icache prefetch reloads ( includes demand turned into prefetch)."
++    "BriefDescription": "Counts all instruction cache prefetch reloads (includes demand turned into prefetch)."
+   },
+   {
+     "EventCode": "0x301E4",
+@@ -102,12 +102,12 @@
+   {
+     "EventCode": "0x300FE",
+     "EventName": "PM_DATA_FROM_L3MISS",
+-    "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss."
++    "BriefDescription": "The processor's L1 data cache was reloaded from beyond the local core's L3 due to a demand miss."
+   },
+   {
+     "EventCode": "0x40012",
+     "EventName": "PM_L1_ICACHE_RELOADED_ALL",
+-    "BriefDescription": "Counts all Icache reloads includes demand, prefetch, prefetch turned into demand and demand turned into prefetch."
++    "BriefDescription": "Counts all instruction cache reloads includes demand, prefetch, prefetch turned into demand and demand turned into prefetch."
+   },
+   {
+     "EventCode": "0x40134",
+@@ -117,22 +117,22 @@
+   {
+     "EventCode": "0x4505A",
+     "EventName": "PM_SP_FLOP_CMPL",
+-    "BriefDescription": "Single Precision floating point instructions completed."
++    "BriefDescription": "Single Precision floating point instruction completed."
+   },
+   {
+     "EventCode": "0x4D058",
+     "EventName": "PM_VECTOR_FLOP_CMPL",
+-    "BriefDescription": "Vector floating point instructions completed."
++    "BriefDescription": "Vector floating point instruction completed."
+   },
+   {
+     "EventCode": "0x4D05A",
+     "EventName": "PM_NON_MATH_FLOP_CMPL",
+-    "BriefDescription": "Non Math instructions completed."
++    "BriefDescription": "Non Math instruction completed."
+   },
+   {
+     "EventCode": "0x401E0",
+     "EventName": "PM_MRK_INST_CMPL",
+-    "BriefDescription": "marked instruction completed."
++    "BriefDescription": "Marked instruction completed."
+   },
+   {
+     "EventCode": "0x400FE",
+diff --git a/tools/perf/pmu-events/arch/powerpc/power10/memory.json b/tools/perf/pmu-events/arch/powerpc/power10/memory.json
+index 843b51f531e95..c4c10ca98cad7 100644
+--- a/tools/perf/pmu-events/arch/powerpc/power10/memory.json
++++ b/tools/perf/pmu-events/arch/powerpc/power10/memory.json
+@@ -47,7 +47,7 @@
+   {
+     "EventCode": "0x10062",
+     "EventName": "PM_LD_L3MISS_PEND_CYC",
+-    "BriefDescription": "Cycles L3 miss was pending for this thread."
++    "BriefDescription": "Cycles in which an L3 miss was pending for this thread."
+   },
+   {
+     "EventCode": "0x20010",
+@@ -132,7 +132,7 @@
+   {
+     "EventCode": "0x300FC",
+     "EventName": "PM_DTLB_MISS",
+-    "BriefDescription": "The DPTEG required for the load/store instruction in execution was missing from the TLB. It includes pages of all sizes for demand and prefetch activity."
++    "BriefDescription": "The DPTEG required for the load/store instruction in execution was missing from the TLB. This event only counts for demand misses."
+   },
+   {
+     "EventCode": "0x4D02C",
+@@ -142,7 +142,7 @@
+   {
+     "EventCode": "0x4003E",
+     "EventName": "PM_LD_CMPL",
+-    "BriefDescription": "Loads completed."
++    "BriefDescription": "Load instruction completed."
+   },
+   {
+     "EventCode": "0x4C040",
+diff --git a/tools/perf/pmu-events/arch/powerpc/power10/others.json b/tools/perf/pmu-events/arch/powerpc/power10/others.json
+index a771e4b6bec58..a5319cdba89b3 100644
+--- a/tools/perf/pmu-events/arch/powerpc/power10/others.json
++++ b/tools/perf/pmu-events/arch/powerpc/power10/others.json
+@@ -2,12 +2,12 @@
+   {
+     "EventCode": "0x10016",
+     "EventName": "PM_VSU0_ISSUE",
+-    "BriefDescription": "VSU instructions issued to VSU pipe 0."
++    "BriefDescription": "VSU instruction issued to VSU pipe 0."
+   },
+   {
+     "EventCode": "0x1001C",
+     "EventName": "PM_ULTRAVISOR_INST_CMPL",
+-    "BriefDescription": "PowerPC instructions that completed while the thread was in ultravisor state."
++    "BriefDescription": "PowerPC instruction completed while the thread was in ultravisor state."
+   },
+   {
+     "EventCode": "0x100F0",
+@@ -17,12 +17,12 @@
+   {
+     "EventCode": "0x10134",
+     "EventName": "PM_MRK_ST_DONE_L2",
+-    "BriefDescription": "Marked stores completed in L2 (RC machine done)."
++    "BriefDescription": "Marked store completed in L2."
+   },
+   {
+     "EventCode": "0x1505E",
+     "EventName": "PM_LD_HIT_L1",
+-    "BriefDescription": "Loads that finished without experiencing an L1 miss."
++    "BriefDescription": "Load finished without experiencing an L1 miss."
+   },
+   {
+     "EventCode": "0x1F056",
+@@ -42,7 +42,7 @@
+   {
+     "EventCode": "0x101E4",
+     "EventName": "PM_MRK_L1_ICACHE_MISS",
+-    "BriefDescription": "Marked Instruction suffered an icache Miss."
++    "BriefDescription": "Marked instruction suffered an instruction cache miss."
+   },
+   {
+     "EventCode": "0x101EA",
+@@ -72,7 +72,7 @@
+   {
+     "EventCode": "0x2E010",
+     "EventName": "PM_ADJUNCT_INST_CMPL",
+-    "BriefDescription": "PowerPC instructions that completed while the thread is in Adjunct state."
++    "BriefDescription": "PowerPC instruction completed while the thread was in Adjunct state."
+   },
+   {
+     "EventCode": "0x2E014",
+@@ -122,7 +122,7 @@
+   {
+     "EventCode": "0x201E4",
+     "EventName": "PM_MRK_DATA_FROM_L3MISS",
+-    "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss for a marked load."
++    "BriefDescription": "The processor's L1 data cache was reloaded from beyond the local core's L3 due to a demand miss for a marked instruction."
+   },
+   {
+     "EventCode": "0x201E8",
+@@ -132,17 +132,17 @@
+   {
+     "EventCode": "0x200F2",
+     "EventName": "PM_INST_DISP",
+-    "BriefDescription": "PowerPC instructions dispatched."
++    "BriefDescription": "PowerPC instruction dispatched."
+   },
+   {
+     "EventCode": "0x30132",
+     "EventName": "PM_MRK_VSU_FIN",
+-    "BriefDescription": "VSU marked instructions finished. Excludes simple FX instructions issued to the Store Unit."
++    "BriefDescription": "VSU marked instruction finished. Excludes simple FX instructions issued to the Store Unit."
+   },
+   {
+     "EventCode": "0x30038",
+     "EventName": "PM_EXEC_STALL_DMISS_LMEM",
+-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local memory, local OpenCapp cache, or local OpenCapp memory."
++    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local memory, local OpenCAPI cache, or local OpenCAPI memory."
+   },
+   {
+     "EventCode": "0x3F04A",
+@@ -152,12 +152,12 @@
+   {
+     "EventCode": "0x3405A",
+     "EventName": "PM_PRIVILEGED_INST_CMPL",
+-    "BriefDescription": "PowerPC Instructions that completed while the thread is in Privileged state."
++    "BriefDescription": "PowerPC instruction completed while the thread was in Privileged state."
+   },
+   {
+     "EventCode": "0x3F150",
+     "EventName": "PM_MRK_ST_DRAIN_CYC",
+-    "BriefDescription": "cycles to drain st from core to L2."
++    "BriefDescription": "Cycles in which the marked store drained from the core to the L2."
+   },
+   {
+     "EventCode": "0x3F054",
+@@ -182,7 +182,7 @@
+   {
+     "EventCode": "0x4001C",
+     "EventName": "PM_VSU_FIN",
+-    "BriefDescription": "VSU instructions finished."
++    "BriefDescription": "VSU instruction finished."
+   },
+   {
+     "EventCode": "0x4C01A",
+@@ -197,7 +197,7 @@
+   {
+     "EventCode": "0x4D022",
+     "EventName": "PM_HYPERVISOR_INST_CMPL",
+-    "BriefDescription": "PowerPC instructions that completed while the thread is in hypervisor state."
++    "BriefDescription": "PowerPC instruction completed while the thread was in hypervisor state."
+   },
+   {
+     "EventCode": "0x4D026",
+@@ -212,32 +212,32 @@
+   {
+     "EventCode": "0x40030",
+     "EventName": "PM_INST_FIN",
+-    "BriefDescription": "Instructions finished."
++    "BriefDescription": "Instruction finished."
+   },
+   {
+     "EventCode": "0x44146",
+     "EventName": "PM_MRK_STCX_CORE_CYC",
+-    "BriefDescription": "Cycles spent in the core portion of a marked Stcx instruction. It starts counting when the instruction is decoded and stops counting when it drains into the L2."
++    "BriefDescription": "Cycles spent in the core portion of a marked STCX instruction. It starts counting when the instruction is decoded and stops counting when it drains into the L2."
+   },
+   {
+     "EventCode": "0x44054",
+     "EventName": "PM_VECTOR_LD_CMPL",
+-    "BriefDescription": "Vector load instructions completed."
++    "BriefDescription": "Vector load instruction completed."
+   },
+   {
+     "EventCode": "0x45054",
+     "EventName": "PM_FMA_CMPL",
+-    "BriefDescription": "Two floating point instructions completed (FMA class of instructions: fmadd, fnmadd, fmsub, fnmsub). Scalar instructions only."
++    "BriefDescription": "Two floating point instruction completed (FMA class of instructions: fmadd, fnmadd, fmsub, fnmsub). Scalar instructions only."
+   },
+   {
+     "EventCode": "0x45056",
+     "EventName": "PM_SCALAR_FLOP_CMPL",
+-    "BriefDescription": "Scalar floating point instructions completed."
++    "BriefDescription": "Scalar floating point instruction completed."
+   },
+   {
+     "EventCode": "0x4505C",
+     "EventName": "PM_MATH_FLOP_CMPL",
+-    "BriefDescription": "Math floating point instructions completed."
++    "BriefDescription": "Math floating point instruction completed."
+   },
+   {
+     "EventCode": "0x4D05E",
+@@ -252,21 +252,21 @@
+   {
+     "EventCode": "0x401E6",
+     "EventName": "PM_MRK_INST_FROM_L3MISS",
+-    "BriefDescription": "The processor's instruction cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss for a marked instruction."
++    "BriefDescription": "The processor's instruction cache was reloaded from beyond the local core's L3 due to a demand miss for a marked instruction."
+   },
+   {
+     "EventCode": "0x401E8",
+     "EventName": "PM_MRK_DATA_FROM_L2MISS",
+-    "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1 or L2 due to a demand miss for a marked load."
++    "BriefDescription": "The processor's L1 data cache was reloaded from a source beyond the local core's L2 due to a demand miss for a marked instruction."
+   },
+   {
+     "EventCode": "0x400F0",
+     "EventName": "PM_LD_DEMAND_MISS_L1_FIN",
+-    "BriefDescription": "Load Missed L1, counted at finish time."
++    "BriefDescription": "Load missed L1, counted at finish time."
+   },
+   {
+     "EventCode": "0x500FA",
+     "EventName": "PM_RUN_INST_CMPL",
+-    "BriefDescription": "Completed PowerPC instructions gated by the run latch."
++    "BriefDescription": "PowerPC instruction completed while the run latch is set."
+   }
+ ]
+diff --git a/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json b/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json
+index b8aded6045faa..449f57e8ba6af 100644
+--- a/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json
++++ b/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json
+@@ -2,7 +2,7 @@
+   {
+     "EventCode": "0x100FE",
+     "EventName": "PM_INST_CMPL",
+-    "BriefDescription": "PowerPC instructions completed."
++    "BriefDescription": "PowerPC instruction completed."
+   },
+   {
+     "EventCode": "0x1000C",
+@@ -12,7 +12,7 @@
+   {
+     "EventCode": "0x1000E",
+     "EventName": "PM_MMA_ISSUED",
+-    "BriefDescription": "MMA instructions issued."
++    "BriefDescription": "MMA instruction issued."
+   },
+   {
+     "EventCode": "0x10012",
+@@ -107,7 +107,7 @@
+   {
+     "EventCode": "0x2D012",
+     "EventName": "PM_VSU1_ISSUE",
+-    "BriefDescription": "VSU instructions issued to VSU pipe 1."
++    "BriefDescription": "VSU instruction issued to VSU pipe 1."
+   },
+   {
+     "EventCode": "0x2D018",
+@@ -122,7 +122,7 @@
+   {
+     "EventCode": "0x2E01E",
+     "EventName": "PM_EXEC_STALL_NTC_FLUSH",
+-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in any unit before it was flushed. Note that if the flush of the oldest instruction happens after finish, the cycles from dispatch to issue will be included in PM_DISP_STALL and the cycles from issue to finish will be included in PM_EXEC_STALL and its corresponding children. This event will also count cycles when the previous NTF instruction is still completing and the new NTF instruction is stalled at dispatch."
++    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in any unit before it was flushed. Note that if the flush of the oldest instruction happens after finish, the cycles from dispatch to issue will be included in PM_DISP_STALL and the cycles from issue to finish will be included in PM_EXEC_STALL and its corresponding children. This event will also count cycles when the previous next-to-finish (NTF) instruction is still completing and the new NTF instruction is stalled at dispatch."
+   },
+   {
+     "EventCode": "0x2013C",
+@@ -137,7 +137,7 @@
+   {
+     "EventCode": "0x201E2",
+     "EventName": "PM_MRK_LD_MISS_L1",
+-    "BriefDescription": "Marked DL1 Demand Miss counted at finish time."
++    "BriefDescription": "Marked demand data load miss counted at finish time."
+   },
+   {
+     "EventCode": "0x200F4",
+@@ -172,7 +172,7 @@
+   {
+     "EventCode": "0x30028",
+     "EventName": "PM_CMPL_STALL_MEM_ECC",
+-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for the non-speculative finish of either a stcx waiting for its result or a load waiting for non-critical sectors of data and ECC."
++    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for the non-speculative finish of either a STCX waiting for its result or a load waiting for non-critical sectors of data and ECC."
+   },
+   {
+     "EventCode": "0x30036",
+@@ -187,12 +187,12 @@
+   {
+     "EventCode": "0x3F044",
+     "EventName": "PM_VSU2_ISSUE",
+-    "BriefDescription": "VSU instructions issued to VSU pipe 2."
++    "BriefDescription": "VSU instruction issued to VSU pipe 2."
+   },
+   {
+     "EventCode": "0x30058",
+     "EventName": "PM_TLBIE_FIN",
+-    "BriefDescription": "TLBIE instructions finished in the LSU. Two TLBIEs can finish each cycle. All will be counted."
++    "BriefDescription": "TLBIE instruction finished in the LSU. Two TLBIEs can finish each cycle. All will be counted."
+   },
+   {
+     "EventCode": "0x3D058",
+@@ -252,7 +252,7 @@
+   {
+     "EventCode": "0x4E012",
+     "EventName": "PM_EXEC_STALL_UNKNOWN",
+-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline completed without an ntf_type pulse. The ntf_pulse was missed by the ISU because the NTF finishes and completions came too close together."
++    "BriefDescription": "Cycles in which the oldest instruction in the pipeline completed without an ntf_type pulse. The ntf_pulse was missed by the ISU because the next-to-finish (NTF) instruction finishes and completions came too close together."
+   },
+   {
+     "EventCode": "0x4D020",
+@@ -267,7 +267,7 @@
+   {
+     "EventCode": "0x45058",
+     "EventName": "PM_IC_MISS_CMPL",
+-    "BriefDescription": "Non-speculative icache miss, counted at completion."
++    "BriefDescription": "Non-speculative instruction cache miss, counted at completion."
+   },
+   {
+     "EventCode": "0x4D050",
+diff --git a/tools/perf/pmu-events/arch/powerpc/power10/pmc.json b/tools/perf/pmu-events/arch/powerpc/power10/pmc.json
+index b5d1bd39cfb22..364fedbfb490b 100644
+--- a/tools/perf/pmu-events/arch/powerpc/power10/pmc.json
++++ b/tools/perf/pmu-events/arch/powerpc/power10/pmc.json
+@@ -12,11 +12,11 @@
+   {
+     "EventCode": "0x45052",
+     "EventName": "PM_4FLOP_CMPL",
+-    "BriefDescription": "Four floating point instructions completed (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg)."
++    "BriefDescription": "Four floating point instruction completed (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg)."
+   },
+   {
+     "EventCode": "0x4D054",
+     "EventName": "PM_8FLOP_CMPL",
+-    "BriefDescription": "Four Double Precision vector instructions completed."
++    "BriefDescription": "Four Double Precision vector instruction completed."
+   }
+ ]
+diff --git a/tools/perf/pmu-events/arch/powerpc/power10/translation.json b/tools/perf/pmu-events/arch/powerpc/power10/translation.json
+index db3766dca07c5..3e47b804a0a8f 100644
+--- a/tools/perf/pmu-events/arch/powerpc/power10/translation.json
++++ b/tools/perf/pmu-events/arch/powerpc/power10/translation.json
+@@ -17,7 +17,7 @@
+   {
+     "EventCode": "0x2011C",
+     "EventName": "PM_MRK_NTF_CYC",
+-    "BriefDescription": "Cycles during which the marked instruction is the oldest in the pipeline (NTF or NTC)."
++    "BriefDescription": "Cycles in which the marked instruction is the oldest in the pipeline (next-to-finish or next-to-complete)."
+   },
+   {
+     "EventCode": "0x2E01C",
+@@ -37,7 +37,7 @@
+   {
+     "EventCode": "0x200FE",
+     "EventName": "PM_DATA_FROM_L2MISS",
+-    "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1 or L2 due to a demand miss."
++    "BriefDescription": "The processor's L1 data cache was reloaded from a source beyond the local core's L2 due to a demand miss."
+   },
+   {
+     "EventCode": "0x30010",
+@@ -52,6 +52,6 @@
+   {
+     "EventCode": "0x4D05C",
+     "EventName": "PM_DPP_FLOP_CMPL",
+-    "BriefDescription": "Double-Precision or Quad-Precision instructions completed."
++    "BriefDescription": "Double-Precision or Quad-Precision instruction completed."
+   }
+ ]
+-- 
+2.40.1
+
diff --git a/queue-6.5/pwm-atmel-tcb-fix-resource-freeing-in-error-path-and.patch b/queue-6.5/pwm-atmel-tcb-fix-resource-freeing-in-error-path-and.patch
new file mode 100644 (file)
index 0000000..485c469
--- /dev/null
@@ -0,0 +1,90 @@
+From 43eb77d3d2b0edb1591801ce856af79a282cf129 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jul 2023 21:20:10 +0200
+Subject: pwm: atmel-tcb: Fix resource freeing in error path and remove
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+
+[ Upstream commit c11622324c023415fb69196c5fc3782d2b8cced0 ]
+
+Several resources were not freed in the error path and the remove
+function. Add the forgotten items.
+
+Fixes: 34cbcd72588f ("pwm: atmel-tcb: Add sama5d2 support")
+Fixes: 061f8572a31c ("pwm: atmel-tcb: Switch to new binding")
+Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+Reviewed-by: Claudiu Beznea <claudiu.beznea@tuxon.dev>
+Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/pwm/pwm-atmel-tcb.c | 23 +++++++++++++++++------
+ 1 file changed, 17 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/pwm/pwm-atmel-tcb.c b/drivers/pwm/pwm-atmel-tcb.c
+index 613dd1810fb53..2826fc216d291 100644
+--- a/drivers/pwm/pwm-atmel-tcb.c
++++ b/drivers/pwm/pwm-atmel-tcb.c
+@@ -450,16 +450,20 @@ static int atmel_tcb_pwm_probe(struct platform_device *pdev)
+       tcbpwm->clk = of_clk_get_by_name(np->parent, clk_name);
+       if (IS_ERR(tcbpwm->clk))
+               tcbpwm->clk = of_clk_get_by_name(np->parent, "t0_clk");
+-      if (IS_ERR(tcbpwm->clk))
+-              return PTR_ERR(tcbpwm->clk);
++      if (IS_ERR(tcbpwm->clk)) {
++              err = PTR_ERR(tcbpwm->clk);
++              goto err_slow_clk;
++      }
+       match = of_match_node(atmel_tcb_of_match, np->parent);
+       config = match->data;
+       if (config->has_gclk) {
+               tcbpwm->gclk = of_clk_get_by_name(np->parent, "gclk");
+-              if (IS_ERR(tcbpwm->gclk))
+-                      return PTR_ERR(tcbpwm->gclk);
++              if (IS_ERR(tcbpwm->gclk)) {
++                      err = PTR_ERR(tcbpwm->gclk);
++                      goto err_clk;
++              }
+       }
+       tcbpwm->chip.dev = &pdev->dev;
+@@ -470,7 +474,7 @@ static int atmel_tcb_pwm_probe(struct platform_device *pdev)
+       err = clk_prepare_enable(tcbpwm->slow_clk);
+       if (err)
+-              goto err_slow_clk;
++              goto err_gclk;
+       spin_lock_init(&tcbpwm->lock);
+@@ -485,6 +489,12 @@ static int atmel_tcb_pwm_probe(struct platform_device *pdev)
+ err_disable_clk:
+       clk_disable_unprepare(tcbpwm->slow_clk);
++err_gclk:
++      clk_put(tcbpwm->gclk);
++
++err_clk:
++      clk_put(tcbpwm->clk);
++
+ err_slow_clk:
+       clk_put(tcbpwm->slow_clk);
+@@ -498,8 +508,9 @@ static void atmel_tcb_pwm_remove(struct platform_device *pdev)
+       pwmchip_remove(&tcbpwm->chip);
+       clk_disable_unprepare(tcbpwm->slow_clk);
+-      clk_put(tcbpwm->slow_clk);
++      clk_put(tcbpwm->gclk);
+       clk_put(tcbpwm->clk);
++      clk_put(tcbpwm->slow_clk);
+ }
+ static const struct of_device_id atmel_tcb_pwm_dt_ids[] = {
+-- 
+2.40.1
+
diff --git a/queue-6.5/pwm-atmel-tcb-harmonize-resource-allocation-order.patch b/queue-6.5/pwm-atmel-tcb-harmonize-resource-allocation-order.patch
new file mode 100644 (file)
index 0000000..ca53d68
--- /dev/null
@@ -0,0 +1,124 @@
+From 8426d9bed6b28925a172a687bf2b39a02bb7a6ac Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jul 2023 21:20:09 +0200
+Subject: pwm: atmel-tcb: Harmonize resource allocation order
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+
+[ Upstream commit 0323e8fedd1ef25342cf7abf3a2024f5670362b8 ]
+
+Allocate driver data as first resource in the probe function. This way it
+can be used during allocation of the other resources (instead of assigning
+these to local variables first and update driver data only when it's
+allocated). Also as driver data is allocated using a devm function this
+should happen first to have the order of freeing resources in the error
+path and the remove function in reverse.
+
+Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
+Stable-dep-of: c11622324c02 ("pwm: atmel-tcb: Fix resource freeing in error path and remove")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/pwm/pwm-atmel-tcb.c | 49 +++++++++++++++----------------------
+ 1 file changed, 20 insertions(+), 29 deletions(-)
+
+diff --git a/drivers/pwm/pwm-atmel-tcb.c b/drivers/pwm/pwm-atmel-tcb.c
+index 4a116dc44f6e7..613dd1810fb53 100644
+--- a/drivers/pwm/pwm-atmel-tcb.c
++++ b/drivers/pwm/pwm-atmel-tcb.c
+@@ -422,13 +422,14 @@ static int atmel_tcb_pwm_probe(struct platform_device *pdev)
+       struct atmel_tcb_pwm_chip *tcbpwm;
+       const struct atmel_tcb_config *config;
+       struct device_node *np = pdev->dev.of_node;
+-      struct regmap *regmap;
+-      struct clk *clk, *gclk = NULL;
+-      struct clk *slow_clk;
+       char clk_name[] = "t0_clk";
+       int err;
+       int channel;
++      tcbpwm = devm_kzalloc(&pdev->dev, sizeof(*tcbpwm), GFP_KERNEL);
++      if (tcbpwm == NULL)
++              return -ENOMEM;
++
+       err = of_property_read_u32(np, "reg", &channel);
+       if (err < 0) {
+               dev_err(&pdev->dev,
+@@ -437,47 +438,37 @@ static int atmel_tcb_pwm_probe(struct platform_device *pdev)
+               return err;
+       }
+-      regmap = syscon_node_to_regmap(np->parent);
+-      if (IS_ERR(regmap))
+-              return PTR_ERR(regmap);
++      tcbpwm->regmap = syscon_node_to_regmap(np->parent);
++      if (IS_ERR(tcbpwm->regmap))
++              return PTR_ERR(tcbpwm->regmap);
+-      slow_clk = of_clk_get_by_name(np->parent, "slow_clk");
+-      if (IS_ERR(slow_clk))
+-              return PTR_ERR(slow_clk);
++      tcbpwm->slow_clk = of_clk_get_by_name(np->parent, "slow_clk");
++      if (IS_ERR(tcbpwm->slow_clk))
++              return PTR_ERR(tcbpwm->slow_clk);
+       clk_name[1] += channel;
+-      clk = of_clk_get_by_name(np->parent, clk_name);
+-      if (IS_ERR(clk))
+-              clk = of_clk_get_by_name(np->parent, "t0_clk");
+-      if (IS_ERR(clk))
+-              return PTR_ERR(clk);
++      tcbpwm->clk = of_clk_get_by_name(np->parent, clk_name);
++      if (IS_ERR(tcbpwm->clk))
++              tcbpwm->clk = of_clk_get_by_name(np->parent, "t0_clk");
++      if (IS_ERR(tcbpwm->clk))
++              return PTR_ERR(tcbpwm->clk);
+       match = of_match_node(atmel_tcb_of_match, np->parent);
+       config = match->data;
+       if (config->has_gclk) {
+-              gclk = of_clk_get_by_name(np->parent, "gclk");
+-              if (IS_ERR(gclk))
+-                      return PTR_ERR(gclk);
+-      }
+-
+-      tcbpwm = devm_kzalloc(&pdev->dev, sizeof(*tcbpwm), GFP_KERNEL);
+-      if (tcbpwm == NULL) {
+-              err = -ENOMEM;
+-              goto err_slow_clk;
++              tcbpwm->gclk = of_clk_get_by_name(np->parent, "gclk");
++              if (IS_ERR(tcbpwm->gclk))
++                      return PTR_ERR(tcbpwm->gclk);
+       }
+       tcbpwm->chip.dev = &pdev->dev;
+       tcbpwm->chip.ops = &atmel_tcb_pwm_ops;
+       tcbpwm->chip.npwm = NPWM;
+       tcbpwm->channel = channel;
+-      tcbpwm->regmap = regmap;
+-      tcbpwm->clk = clk;
+-      tcbpwm->gclk = gclk;
+-      tcbpwm->slow_clk = slow_clk;
+       tcbpwm->width = config->counter_width;
+-      err = clk_prepare_enable(slow_clk);
++      err = clk_prepare_enable(tcbpwm->slow_clk);
+       if (err)
+               goto err_slow_clk;
+@@ -495,7 +486,7 @@ static int atmel_tcb_pwm_probe(struct platform_device *pdev)
+       clk_disable_unprepare(tcbpwm->slow_clk);
+ err_slow_clk:
+-      clk_put(slow_clk);
++      clk_put(tcbpwm->slow_clk);
+       return err;
+ }
+-- 
+2.40.1
+
diff --git a/queue-6.5/pwm-lpc32xx-remove-handling-of-pwm-channels.patch b/queue-6.5/pwm-lpc32xx-remove-handling-of-pwm-channels.patch
new file mode 100644 (file)
index 0000000..3e3b92c
--- /dev/null
@@ -0,0 +1,88 @@
+From 3bfdabed9a69f159ffad19cfcc13e26f4cc6fd78 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Jul 2023 17:52:57 +0200
+Subject: pwm: lpc32xx: Remove handling of PWM channels
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Vladimir Zapolskiy <vz@mleia.com>
+
+[ Upstream commit 4aae44f65827f0213a7361cf9c32cfe06114473f ]
+
+Because LPC32xx PWM controllers have only a single output which is
+registered as the only PWM device/channel per controller, it is known in
+advance that pwm->hwpwm value is always 0. On basis of this fact
+simplify the code by removing operations with pwm->hwpwm, there is no
+controls which require channel number as input.
+
+Even though I wasn't aware at the time when I forward ported that patch,
+this fixes a null pointer dereference as lpc32xx->chip.pwms is NULL
+before devm_pwmchip_add() is called.
+
+Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
+Signed-off-by: Vladimir Zapolskiy <vz@mleia.com>
+Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+Fixes: 3d2813fb17e5 ("pwm: lpc32xx: Don't modify HW state in .probe() after the PWM chip was registered")
+Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/pwm/pwm-lpc32xx.c | 16 ++++++++--------
+ 1 file changed, 8 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/pwm/pwm-lpc32xx.c b/drivers/pwm/pwm-lpc32xx.c
+index 86a0ea0f6955c..806f0bb3ad6d8 100644
+--- a/drivers/pwm/pwm-lpc32xx.c
++++ b/drivers/pwm/pwm-lpc32xx.c
+@@ -51,10 +51,10 @@ static int lpc32xx_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
+       if (duty_cycles > 255)
+               duty_cycles = 255;
+-      val = readl(lpc32xx->base + (pwm->hwpwm << 2));
++      val = readl(lpc32xx->base);
+       val &= ~0xFFFF;
+       val |= (period_cycles << 8) | duty_cycles;
+-      writel(val, lpc32xx->base + (pwm->hwpwm << 2));
++      writel(val, lpc32xx->base);
+       return 0;
+ }
+@@ -69,9 +69,9 @@ static int lpc32xx_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
+       if (ret)
+               return ret;
+-      val = readl(lpc32xx->base + (pwm->hwpwm << 2));
++      val = readl(lpc32xx->base);
+       val |= PWM_ENABLE;
+-      writel(val, lpc32xx->base + (pwm->hwpwm << 2));
++      writel(val, lpc32xx->base);
+       return 0;
+ }
+@@ -81,9 +81,9 @@ static void lpc32xx_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
+       struct lpc32xx_pwm_chip *lpc32xx = to_lpc32xx_pwm_chip(chip);
+       u32 val;
+-      val = readl(lpc32xx->base + (pwm->hwpwm << 2));
++      val = readl(lpc32xx->base);
+       val &= ~PWM_ENABLE;
+-      writel(val, lpc32xx->base + (pwm->hwpwm << 2));
++      writel(val, lpc32xx->base);
+       clk_disable_unprepare(lpc32xx->clk);
+ }
+@@ -141,9 +141,9 @@ static int lpc32xx_pwm_probe(struct platform_device *pdev)
+       lpc32xx->chip.npwm = 1;
+       /* If PWM is disabled, configure the output to the default value */
+-      val = readl(lpc32xx->base + (lpc32xx->chip.pwms[0].hwpwm << 2));
++      val = readl(lpc32xx->base);
+       val &= ~PWM_PIN_LEVEL;
+-      writel(val, lpc32xx->base + (lpc32xx->chip.pwms[0].hwpwm << 2));
++      writel(val, lpc32xx->base);
+       ret = devm_pwmchip_add(&pdev->dev, &lpc32xx->chip);
+       if (ret < 0) {
+-- 
+2.40.1
+
diff --git a/queue-6.5/regulator-tps6287x-fix-n_voltages.patch b/queue-6.5/regulator-tps6287x-fix-n_voltages.patch
new file mode 100644 (file)
index 0000000..eded950
--- /dev/null
@@ -0,0 +1,37 @@
+From e8f100a570ee8c57795d4248acc486c77fe95e93 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 29 Aug 2023 16:04:12 +0200
+Subject: regulator: tps6287x: Fix n_voltages
+
+From: Vincent Whitchurch <vincent.whitchurch@axis.com>
+
+[ Upstream commit c69290557c7571dff3d995fa27619b965915e8a1 ]
+
+There are 256 possible voltage settings for each range, not 256 possible
+voltage settings in total.
+
+Fixes: 15a1cd245d5b ("regulator: tps6287x: Fix missing .n_voltages setting")
+Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com
+Link: https://lore.kernel.org/r/20230829-tps-voltages-v1-1-7ba4f958a194@axis.com
+Signed-off-by: Mark Brown <broonie@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/regulator/tps6287x-regulator.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/regulator/tps6287x-regulator.c b/drivers/regulator/tps6287x-regulator.c
+index b1c0963586ace..e45579a4498c6 100644
+--- a/drivers/regulator/tps6287x-regulator.c
++++ b/drivers/regulator/tps6287x-regulator.c
+@@ -119,7 +119,7 @@ static struct regulator_desc tps6287x_reg = {
+       .ramp_mask = TPS6287X_CTRL1_VRAMP,
+       .ramp_delay_table = tps6287x_ramp_table,
+       .n_ramp_values = ARRAY_SIZE(tps6287x_ramp_table),
+-      .n_voltages = 256,
++      .n_voltages = 256 * ARRAY_SIZE(tps6287x_voltage_ranges),
+       .linear_ranges = tps6287x_voltage_ranges,
+       .n_linear_ranges = ARRAY_SIZE(tps6287x_voltage_ranges),
+       .linear_range_selectors = tps6287x_voltage_range_sel,
+-- 
+2.40.1
+
diff --git a/queue-6.5/regulator-tps6594-regulator-fix-random-kernel-crash.patch b/queue-6.5/regulator-tps6594-regulator-fix-random-kernel-crash.patch
new file mode 100644 (file)
index 0000000..97c20aa
--- /dev/null
@@ -0,0 +1,97 @@
+From 8d0aaaa1b2d86db710e2b206ac5667226476b70d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 5 Sep 2023 16:07:34 +0200
+Subject: regulator: tps6594-regulator: Fix random kernel crash
+
+From: Jerome Neanne <jneanne@baylibre.com>
+
+[ Upstream commit ca0e36e3e39a4e8b5a4b647dff8c5938ca6ccbec ]
+
+Random kernel crash detected in TI CICD when regulator driver is added.
+This is root caused to irq index increment being done twice causing
+irq_data being allocated outside of the range.
+
+- Rework tps6594_request_reg_irqs with correct index increment
+- Adjust irq_data kmalloc size to the exact size needed for the device
+
+This has been reported on TI mainline. No public bug report associated.
+
+Reported-by: Udit Kumar <u-kumar1@ti.com>
+Fixes: f17ccc5deb4d ("regulator: tps6594-regulator: Add driver for TI TPS6594 regulators")
+Signed-off-by: Jerome Neanne <jneanne@baylibre.com>
+Link: https://lore.kernel.org/r/20230828-tps6594_random_boot_crash_fix-v1-1-f29cbf9ddb37@baylibre.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/regulator/tps6594-regulator.c | 31 +++++++++++++--------------
+ 1 file changed, 15 insertions(+), 16 deletions(-)
+
+diff --git a/drivers/regulator/tps6594-regulator.c b/drivers/regulator/tps6594-regulator.c
+index d5a574ec6d12f..47c3b7efe145e 100644
+--- a/drivers/regulator/tps6594-regulator.c
++++ b/drivers/regulator/tps6594-regulator.c
+@@ -384,21 +384,19 @@ static int tps6594_request_reg_irqs(struct platform_device *pdev,
+               if (irq < 0)
+                       return -EINVAL;
+-              irq_data[*irq_idx + j].dev = tps->dev;
+-              irq_data[*irq_idx + j].type = irq_type;
+-              irq_data[*irq_idx + j].rdev = rdev;
++              irq_data[*irq_idx].dev = tps->dev;
++              irq_data[*irq_idx].type = irq_type;
++              irq_data[*irq_idx].rdev = rdev;
+               error = devm_request_threaded_irq(tps->dev, irq, NULL,
+-                                                tps6594_regulator_irq_handler,
+-                                                IRQF_ONESHOT,
+-                                                irq_type->irq_name,
+-                                                &irq_data[*irq_idx]);
+-              (*irq_idx)++;
++                                                tps6594_regulator_irq_handler, IRQF_ONESHOT,
++                                                irq_type->irq_name, &irq_data[*irq_idx]);
+               if (error) {
+                       dev_err(tps->dev, "tps6594 failed to request %s IRQ %d: %d\n",
+                               irq_type->irq_name, irq, error);
+                       return error;
+               }
++              (*irq_idx)++;
+       }
+       return 0;
+ }
+@@ -420,8 +418,8 @@ static int tps6594_regulator_probe(struct platform_device *pdev)
+       int error, i, irq, multi, delta;
+       int irq_idx = 0;
+       int buck_idx = 0;
+-      int ext_reg_irq_nb = 2;
+-
++      size_t ext_reg_irq_nb = 2;
++      size_t reg_irq_nb;
+       enum {
+               MULTI_BUCK12,
+               MULTI_BUCK123,
+@@ -484,15 +482,16 @@ static int tps6594_regulator_probe(struct platform_device *pdev)
+               }
+       }
+-      if (tps->chip_id == LP8764)
++      if (tps->chip_id == LP8764) {
+               /* There is only 4 buck on LP8764 */
+               buck_configured[4] = 1;
++              reg_irq_nb = size_mul(REGS_INT_NB, (BUCK_NB - 1));
++      } else {
++              reg_irq_nb = size_mul(REGS_INT_NB, (size_add(BUCK_NB, LDO_NB)));
++      }
+-      irq_data = devm_kmalloc_array(tps->dev,
+-                              REGS_INT_NB * sizeof(struct tps6594_regulator_irq_data),
+-                              ARRAY_SIZE(tps6594_bucks_irq_types) +
+-                              ARRAY_SIZE(tps6594_ldos_irq_types),
+-                              GFP_KERNEL);
++      irq_data = devm_kmalloc_array(tps->dev, reg_irq_nb,
++                                    sizeof(struct tps6594_regulator_irq_data), GFP_KERNEL);
+       if (!irq_data)
+               return -ENOMEM;
+-- 
+2.40.1
+
diff --git a/queue-6.5/s390-bpf-pass-through-tail-call-counter-in-trampolin.patch b/queue-6.5/s390-bpf-pass-through-tail-call-counter-in-trampolin.patch
new file mode 100644 (file)
index 0000000..5265d52
--- /dev/null
@@ -0,0 +1,82 @@
+From d220683361bc21db4f5f90d4457d7b27fee18a73 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 6 Sep 2023 02:44:19 +0200
+Subject: s390/bpf: Pass through tail call counter in trampolines
+
+From: Ilya Leoshkevich <iii@linux.ibm.com>
+
+[ Upstream commit a192103a11465e9d517975c50f9944dc80e44d61 ]
+
+s390x eBPF programs use the following extension to the s390x calling
+convention: tail call counter is passed on stack at offset
+STK_OFF_TCCNT, which callees otherwise use as scratch space.
+
+Currently trampoline does not respect this and clobbers tail call
+counter. This breaks enforcing tail call limits in eBPF programs, which
+have trampolines attached to them.
+
+Fix by forwarding a copy of the tail call counter to the original eBPF
+program in the trampoline (for fexit), and by restoring it at the end
+of the trampoline (for fentry).
+
+Fixes: 528eb2cb87bc ("s390/bpf: Implement arch_prepare_bpf_trampoline()")
+Reported-by: Leon Hwang <hffilwlqm@gmail.com>
+Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Link: https://lore.kernel.org/bpf/20230906004448.111674-1-iii@linux.ibm.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/s390/net/bpf_jit_comp.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
+index 5e9371fbf3d5f..de2fb12120d2e 100644
+--- a/arch/s390/net/bpf_jit_comp.c
++++ b/arch/s390/net/bpf_jit_comp.c
+@@ -2088,6 +2088,7 @@ struct bpf_tramp_jit {
+                                */
+       int r14_off;            /* Offset of saved %r14 */
+       int run_ctx_off;        /* Offset of struct bpf_tramp_run_ctx */
++      int tccnt_off;          /* Offset of saved tailcall counter */
+       int do_fexit;           /* do_fexit: label */
+ };
+@@ -2258,12 +2259,16 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
+       tjit->r14_off = alloc_stack(tjit, sizeof(u64));
+       tjit->run_ctx_off = alloc_stack(tjit,
+                                       sizeof(struct bpf_tramp_run_ctx));
++      tjit->tccnt_off = alloc_stack(tjit, sizeof(u64));
+       /* The caller has already reserved STACK_FRAME_OVERHEAD bytes. */
+       tjit->stack_size -= STACK_FRAME_OVERHEAD;
+       tjit->orig_stack_args_off = tjit->stack_size + STACK_FRAME_OVERHEAD;
+       /* aghi %r15,-stack_size */
+       EMIT4_IMM(0xa70b0000, REG_15, -tjit->stack_size);
++      /* mvc tccnt_off(4,%r15),stack_size+STK_OFF_TCCNT(%r15) */
++      _EMIT6(0xd203f000 | tjit->tccnt_off,
++             0xf000 | (tjit->stack_size + STK_OFF_TCCNT));
+       /* stmg %r2,%rN,fwd_reg_args_off(%r15) */
+       if (nr_reg_args)
+               EMIT6_DISP_LH(0xeb000000, 0x0024, REG_2,
+@@ -2400,6 +2405,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
+                                      (nr_stack_args * sizeof(u64) - 1) << 16 |
+                                      tjit->stack_args_off,
+                              0xf000 | tjit->orig_stack_args_off);
++              /* mvc STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */
++              _EMIT6(0xd203f000 | STK_OFF_TCCNT, 0xf000 | tjit->tccnt_off);
+               /* lgr %r1,%r8 */
+               EMIT4(0xb9040000, REG_1, REG_8);
+               /* %r1() */
+@@ -2456,6 +2463,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
+       if (flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET))
+               EMIT6_DISP_LH(0xe3000000, 0x0004, REG_2, REG_0, REG_15,
+                             tjit->retval_off);
++      /* mvc stack_size+STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */
++      _EMIT6(0xd203f000 | (tjit->stack_size + STK_OFF_TCCNT),
++             0xf000 | tjit->tccnt_off);
+       /* aghi %r15,stack_size */
+       EMIT4_IMM(0xa70b0000, REG_15, tjit->stack_size);
+       /* Emit an expoline for the following indirect jump. */
+-- 
+2.40.1
+
diff --git a/queue-6.5/s390-zcrypt-don-t-leak-memory-if-dev_set_name-fails.patch b/queue-6.5/s390-zcrypt-don-t-leak-memory-if-dev_set_name-fails.patch
new file mode 100644 (file)
index 0000000..84f119a
--- /dev/null
@@ -0,0 +1,37 @@
+From f747742a5d3c3342441bfe474e4fd3315577c835 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 31 Aug 2023 13:59:59 +0300
+Subject: s390/zcrypt: don't leak memory if dev_set_name() fails
+
+From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+
+[ Upstream commit 6252f47b78031979ad919f971dc8468b893488bd ]
+
+When dev_set_name() fails, zcdn_create() doesn't free the newly
+allocated resources. Do it.
+
+Fixes: 00fab2350e6b ("s390/zcrypt: multiple zcrypt device nodes support")
+Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Link: https://lore.kernel.org/r/20230831110000.24279-1-andriy.shevchenko@linux.intel.com
+Signed-off-by: Harald Freudenberger <freude@linux.ibm.com>
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/s390/crypto/zcrypt_api.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c
+index 4b23c9f7f3e54..6b99f7dd06433 100644
+--- a/drivers/s390/crypto/zcrypt_api.c
++++ b/drivers/s390/crypto/zcrypt_api.c
+@@ -413,6 +413,7 @@ static int zcdn_create(const char *name)
+                        ZCRYPT_NAME "_%d", (int)MINOR(devt));
+       nodename[sizeof(nodename) - 1] = '\0';
+       if (dev_set_name(&zcdndev->device, nodename)) {
++              kfree(zcdndev);
+               rc = -EINVAL;
+               goto unlockout;
+       }
+-- 
+2.40.1
+
diff --git a/queue-6.5/sctp-annotate-data-races-around-sk-sk_wmem_queued.patch b/queue-6.5/sctp-annotate-data-races-around-sk-sk_wmem_queued.patch
new file mode 100644 (file)
index 0000000..f669395
--- /dev/null
@@ -0,0 +1,152 @@
+From 5bd33ca573acb58e6c5956972c27287cd60267e4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 30 Aug 2023 09:45:19 +0000
+Subject: sctp: annotate data-races around sk->sk_wmem_queued
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit dc9511dd6f37fe803f6b15b61b030728d7057417 ]
+
+sk->sk_wmem_queued can be read locklessly from sctp_poll()
+
+Use sk_wmem_queued_add() when the field is changed,
+and add READ_ONCE() annotations in sctp_writeable()
+and sctp_assocs_seq_show()
+
+syzbot reported:
+
+BUG: KCSAN: data-race in sctp_poll / sctp_wfree
+
+read-write to 0xffff888149d77810 of 4 bytes by interrupt on cpu 0:
+sctp_wfree+0x170/0x4a0 net/sctp/socket.c:9147
+skb_release_head_state+0xb7/0x1a0 net/core/skbuff.c:988
+skb_release_all net/core/skbuff.c:1000 [inline]
+__kfree_skb+0x16/0x140 net/core/skbuff.c:1016
+consume_skb+0x57/0x180 net/core/skbuff.c:1232
+sctp_chunk_destroy net/sctp/sm_make_chunk.c:1503 [inline]
+sctp_chunk_put+0xcd/0x130 net/sctp/sm_make_chunk.c:1530
+sctp_datamsg_put+0x29a/0x300 net/sctp/chunk.c:128
+sctp_chunk_free+0x34/0x50 net/sctp/sm_make_chunk.c:1515
+sctp_outq_sack+0xafa/0xd70 net/sctp/outqueue.c:1381
+sctp_cmd_process_sack net/sctp/sm_sideeffect.c:834 [inline]
+sctp_cmd_interpreter net/sctp/sm_sideeffect.c:1366 [inline]
+sctp_side_effects net/sctp/sm_sideeffect.c:1198 [inline]
+sctp_do_sm+0x12c7/0x31b0 net/sctp/sm_sideeffect.c:1169
+sctp_assoc_bh_rcv+0x2b2/0x430 net/sctp/associola.c:1051
+sctp_inq_push+0x108/0x120 net/sctp/inqueue.c:80
+sctp_rcv+0x116e/0x1340 net/sctp/input.c:243
+sctp6_rcv+0x25/0x40 net/sctp/ipv6.c:1120
+ip6_protocol_deliver_rcu+0x92f/0xf30 net/ipv6/ip6_input.c:437
+ip6_input_finish net/ipv6/ip6_input.c:482 [inline]
+NF_HOOK include/linux/netfilter.h:303 [inline]
+ip6_input+0xbd/0x1b0 net/ipv6/ip6_input.c:491
+dst_input include/net/dst.h:468 [inline]
+ip6_rcv_finish+0x1e2/0x2e0 net/ipv6/ip6_input.c:79
+NF_HOOK include/linux/netfilter.h:303 [inline]
+ipv6_rcv+0x74/0x150 net/ipv6/ip6_input.c:309
+__netif_receive_skb_one_core net/core/dev.c:5452 [inline]
+__netif_receive_skb+0x90/0x1b0 net/core/dev.c:5566
+process_backlog+0x21f/0x380 net/core/dev.c:5894
+__napi_poll+0x60/0x3b0 net/core/dev.c:6460
+napi_poll net/core/dev.c:6527 [inline]
+net_rx_action+0x32b/0x750 net/core/dev.c:6660
+__do_softirq+0xc1/0x265 kernel/softirq.c:553
+run_ksoftirqd+0x17/0x20 kernel/softirq.c:921
+smpboot_thread_fn+0x30a/0x4a0 kernel/smpboot.c:164
+kthread+0x1d7/0x210 kernel/kthread.c:389
+ret_from_fork+0x2e/0x40 arch/x86/kernel/process.c:145
+ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304
+
+read to 0xffff888149d77810 of 4 bytes by task 17828 on cpu 1:
+sctp_writeable net/sctp/socket.c:9304 [inline]
+sctp_poll+0x265/0x410 net/sctp/socket.c:8671
+sock_poll+0x253/0x270 net/socket.c:1374
+vfs_poll include/linux/poll.h:88 [inline]
+do_pollfd fs/select.c:873 [inline]
+do_poll fs/select.c:921 [inline]
+do_sys_poll+0x636/0xc00 fs/select.c:1015
+__do_sys_ppoll fs/select.c:1121 [inline]
+__se_sys_ppoll+0x1af/0x1f0 fs/select.c:1101
+__x64_sys_ppoll+0x67/0x80 fs/select.c:1101
+do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
+entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+value changed: 0x00019e80 -> 0x0000cc80
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 1 PID: 17828 Comm: syz-executor.1 Not tainted 6.5.0-rc7-syzkaller-00185-g28f20a19294d #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/26/2023
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Acked-by: Xin Long <lucien.xin@gmail.com>
+Link: https://lore.kernel.org/r/20230830094519.950007-1-edumazet@google.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sctp/proc.c   |  2 +-
+ net/sctp/socket.c | 10 +++++-----
+ 2 files changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/net/sctp/proc.c b/net/sctp/proc.c
+index f13d6a34f32f2..ec00ee75d59a6 100644
+--- a/net/sctp/proc.c
++++ b/net/sctp/proc.c
+@@ -282,7 +282,7 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
+               assoc->init_retries, assoc->shutdown_retries,
+               assoc->rtx_data_chunks,
+               refcount_read(&sk->sk_wmem_alloc),
+-              sk->sk_wmem_queued,
++              READ_ONCE(sk->sk_wmem_queued),
+               sk->sk_sndbuf,
+               sk->sk_rcvbuf);
+       seq_printf(seq, "\n");
+diff --git a/net/sctp/socket.c b/net/sctp/socket.c
+index 423dc400992ba..7cf207706eb66 100644
+--- a/net/sctp/socket.c
++++ b/net/sctp/socket.c
+@@ -69,7 +69,7 @@
+ #include <net/sctp/stream_sched.h>
+ /* Forward declarations for internal helper functions. */
+-static bool sctp_writeable(struct sock *sk);
++static bool sctp_writeable(const struct sock *sk);
+ static void sctp_wfree(struct sk_buff *skb);
+ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
+                               size_t msg_len);
+@@ -140,7 +140,7 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk)
+       refcount_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
+       asoc->sndbuf_used += chunk->skb->truesize + sizeof(struct sctp_chunk);
+-      sk->sk_wmem_queued += chunk->skb->truesize + sizeof(struct sctp_chunk);
++      sk_wmem_queued_add(sk, chunk->skb->truesize + sizeof(struct sctp_chunk));
+       sk_mem_charge(sk, chunk->skb->truesize);
+ }
+@@ -9144,7 +9144,7 @@ static void sctp_wfree(struct sk_buff *skb)
+       struct sock *sk = asoc->base.sk;
+       sk_mem_uncharge(sk, skb->truesize);
+-      sk->sk_wmem_queued -= skb->truesize + sizeof(struct sctp_chunk);
++      sk_wmem_queued_add(sk, -(skb->truesize + sizeof(struct sctp_chunk)));
+       asoc->sndbuf_used -= skb->truesize + sizeof(struct sctp_chunk);
+       WARN_ON(refcount_sub_and_test(sizeof(struct sctp_chunk),
+                                     &sk->sk_wmem_alloc));
+@@ -9299,9 +9299,9 @@ void sctp_write_space(struct sock *sk)
+  * UDP-style sockets or TCP-style sockets, this code should work.
+  *  - Daisy
+  */
+-static bool sctp_writeable(struct sock *sk)
++static bool sctp_writeable(const struct sock *sk)
+ {
+-      return sk->sk_sndbuf > sk->sk_wmem_queued;
++      return READ_ONCE(sk->sk_sndbuf) > READ_ONCE(sk->sk_wmem_queued);
+ }
+ /* Wait for an association to go into ESTABLISHED state. If timeout is 0,
+-- 
+2.40.1
+
diff --git a/queue-6.5/selftests-bpf-fix-a-ci-failure-caused-by-vsock-write.patch b/queue-6.5/selftests-bpf-fix-a-ci-failure-caused-by-vsock-write.patch
new file mode 100644 (file)
index 0000000..3f23fa7
--- /dev/null
@@ -0,0 +1,101 @@
+From 341b41264999a57e94288ff6194829f1bc65b554 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 1 Sep 2023 11:10:37 +0800
+Subject: selftests/bpf: Fix a CI failure caused by vsock write
+
+From: Xu Kuohai <xukuohai@huawei.com>
+
+[ Upstream commit c1970e26bdc1209974bb5cf31cc23f2b7ad6ce50 ]
+
+While commit 90f0074cd9f9 ("selftests/bpf: fix a CI failure caused by vsock sockmap test")
+fixes a receive failure of vsock sockmap test, there is still a write failure:
+
+Error: #211/79 sockmap_listen/sockmap VSOCK test_vsock_redir
+Error: #211/79 sockmap_listen/sockmap VSOCK test_vsock_redir
+  ./test_progs:vsock_unix_redir_connectible:1501: egress: write: Transport endpoint is not connected
+  vsock_unix_redir_connectible:FAIL:1501
+  ./test_progs:vsock_unix_redir_connectible:1501: ingress: write: Transport endpoint is not connected
+  vsock_unix_redir_connectible:FAIL:1501
+  ./test_progs:vsock_unix_redir_connectible:1501: egress: write: Transport endpoint is not connected
+  vsock_unix_redir_connectible:FAIL:1501
+
+The reason is that the vsock connection in the test is set to ESTABLISHED state
+by function virtio_transport_recv_pkt, which is executed in a workqueue thread,
+so when the user space test thread runs before the workqueue thread, this
+problem occurs.
+
+To fix it, before writing the connection, wait for it to be connected.
+
+Fixes: d61bd8c1fd02 ("selftests/bpf: add a test case for vsock sockmap")
+Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Link: https://lore.kernel.org/bpf/20230901031037.3314007-1-xukuohai@huaweicloud.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../bpf/prog_tests/sockmap_helpers.h          | 26 +++++++++++++++++++
+ .../selftests/bpf/prog_tests/sockmap_listen.c |  7 +++++
+ 2 files changed, 33 insertions(+)
+
+diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
+index d12665490a905..36d829a65aa44 100644
+--- a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
++++ b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
+@@ -179,6 +179,32 @@
+               __ret;                                                         \
+       })
++static inline int poll_connect(int fd, unsigned int timeout_sec)
++{
++      struct timeval timeout = { .tv_sec = timeout_sec };
++      fd_set wfds;
++      int r, eval;
++      socklen_t esize = sizeof(eval);
++
++      FD_ZERO(&wfds);
++      FD_SET(fd, &wfds);
++
++      r = select(fd + 1, NULL, &wfds, NULL, &timeout);
++      if (r == 0)
++              errno = ETIME;
++      if (r != 1)
++              return -1;
++
++      if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &eval, &esize) < 0)
++              return -1;
++      if (eval != 0) {
++              errno = eval;
++              return -1;
++      }
++
++      return 0;
++}
++
+ static inline int poll_read(int fd, unsigned int timeout_sec)
+ {
+       struct timeval timeout = { .tv_sec = timeout_sec };
+diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+index 5674a9d0cacf0..8df8cbb447f10 100644
+--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
++++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+@@ -1452,11 +1452,18 @@ static int vsock_socketpair_connectible(int sotype, int *v0, int *v1)
+       if (p < 0)
+               goto close_cli;
++      if (poll_connect(c, IO_TIMEOUT_SEC) < 0) {
++              FAIL_ERRNO("poll_connect");
++              goto close_acc;
++      }
++
+       *v0 = p;
+       *v1 = c;
+       return 0;
++close_acc:
++      close(p);
+ close_cli:
+       close(c);
+ close_srv:
+-- 
+2.40.1
+
diff --git a/queue-6.5/selftests-bpf-fix-flaky-cgroup_iter_sleepable-subtes.patch b/queue-6.5/selftests-bpf-fix-flaky-cgroup_iter_sleepable-subtes.patch
new file mode 100644 (file)
index 0000000..e18f4bb
--- /dev/null
@@ -0,0 +1,83 @@
+From c820a048112793b96dde618ef0f80a2d5ba874ec Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 27 Aug 2023 08:05:51 -0700
+Subject: selftests/bpf: Fix flaky cgroup_iter_sleepable subtest
+
+From: Yonghong Song <yonghong.song@linux.dev>
+
+[ Upstream commit 5439cfa7fe612e7d02d5a1234feda3fa6e483ba7 ]
+
+Occasionally, with './test_progs -j' on my vm, I will hit the
+following failure:
+
+  test_cgrp_local_storage:PASS:join_cgroup /cgrp_local_storage 0 nsec
+  test_cgroup_iter_sleepable:PASS:skel_open 0 nsec
+  test_cgroup_iter_sleepable:PASS:skel_load 0 nsec
+  test_cgroup_iter_sleepable:PASS:attach_iter 0 nsec
+  test_cgroup_iter_sleepable:PASS:iter_create 0 nsec
+  test_cgroup_iter_sleepable:FAIL:cgroup_id unexpected cgroup_id: actual 1 != expected 2812
+  #48/5    cgrp_local_storage/cgroup_iter_sleepable:FAIL
+  #48      cgrp_local_storage:FAIL
+
+Finally, I decided to do some investigation since the test is introduced
+by myself. It turns out the reason is due to cgroup_fd with value 0.
+In cgroup_iter, a cgroup_fd of value 0 means the root cgroup.
+
+       /* from cgroup_iter.c */
+        if (fd)
+                cgrp = cgroup_v1v2_get_from_fd(fd);
+        else if (id)
+                cgrp = cgroup_get_from_id(id);
+        else /* walk the entire hierarchy by default. */
+                cgrp = cgroup_get_from_path("/");
+
+That is why we got cgroup_id 1 instead of expected 2812.
+
+Why we got a cgroup_fd 0? Nobody should really touch 'stdin' (fd 0) in
+test_progs. I traced 'close' syscall with stack trace and found the root
+cause, which is a bug in bpf_obj_pinning.c. Basically, the code closed
+fd 0 although it should not. Fixing the bug in bpf_obj_pinning.c also
+resolved the above cgroup_iter_sleepable subtest failure.
+
+Fixes: 3b22f98e5a05 ("selftests/bpf: Add path_fd-based BPF_OBJ_PIN and BPF_OBJ_GET tests")
+Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Link: https://lore.kernel.org/bpf/20230827150551.1743497-1-yonghong.song@linux.dev
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/bpf/prog_tests/bpf_obj_pinning.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_pinning.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_pinning.c
+index 31f1e815f6719..ee0458a5ce789 100644
+--- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_pinning.c
++++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_pinning.c
+@@ -8,6 +8,7 @@
+ #include <linux/unistd.h>
+ #include <linux/mount.h>
+ #include <sys/syscall.h>
++#include "bpf/libbpf_internal.h"
+ static inline int sys_fsopen(const char *fsname, unsigned flags)
+ {
+@@ -155,7 +156,7 @@ static void validate_pin(int map_fd, const char *map_name, int src_value,
+       ASSERT_OK(err, "obj_pin");
+       /* cleanup */
+-      if (pin_opts.path_fd >= 0)
++      if (path_kind == PATH_FD_REL && pin_opts.path_fd >= 0)
+               close(pin_opts.path_fd);
+       if (old_cwd[0])
+               ASSERT_OK(chdir(old_cwd), "restore_cwd");
+@@ -220,7 +221,7 @@ static void validate_get(int map_fd, const char *map_name, int src_value,
+               goto cleanup;
+       /* cleanup */
+-      if (get_opts.path_fd >= 0)
++      if (path_kind == PATH_FD_REL && get_opts.path_fd >= 0)
+               close(get_opts.path_fd);
+       if (old_cwd[0])
+               ASSERT_OK(chdir(old_cwd), "restore_cwd");
+-- 
+2.40.1
+
index bb2fb2738c3f5868128e0466788835f4a095363a..9917992ad42e8279d1400c44be81df94cbb06006 100644 (file)
@@ -53,3 +53,124 @@ clk-qcom-mss-sc7180-fix-missing-resume-during-probe.patch
 nfs-fix-a-potential-data-corruption.patch
 nfsv4-pnfs-minor-fix-for-cleanup-path-in-nfs4_get_device_info.patch
 bus-mhi-host-skip-mhi-reset-if-device-is-in-rddm.patch
+kbuild-rpm-pkg-define-_arch-conditionally.patch
+kbuild-do-not-run-depmod-for-make-modules_sign.patch
+kbuild-dummy-tools-make-mprofile_kernel-checks-work-.patch
+tpm_crb-fix-an-error-handling-path-in-crb_acpi_add.patch
+gfs2-switch-to-wait_event-in-gfs2_logd.patch
+gfs2-low-memory-forced-flush-fixes.patch
+mailbox-qcom-ipcc-fix-incorrect-num_chans-counting.patch
+kconfig-fix-possible-buffer-overflow.patch
+tools-mm-fix-undefined-reference-to-pthread_once.patch
+input-iqs7222-configure-power-mode-before-triggering.patch
+perf-trace-really-free-the-evsel-priv-area.patch
+pwm-atmel-tcb-harmonize-resource-allocation-order.patch
+pwm-atmel-tcb-fix-resource-freeing-in-error-path-and.patch
+backlight-lp855x-initialize-pwm-state-on-first-brigh.patch
+backlight-gpio_backlight-drop-output-gpio-direction-.patch
+perf-parse-events-separate-yyabort-and-yynomem-cases.patch
+perf-parse-events-move-instances-of-yyabort-to-yynom.patch
+perf-parse-events-separate-enomem-memory-handling.patch
+perf-parse-events-additional-error-reporting.patch
+kvm-svm-don-t-defer-nmi-unblocking-until-next-exit-f.patch
+input-tca6416-keypad-always-expect-proper-irq-number.patch
+input-tca6416-keypad-fix-interrupt-enable-disbalance.patch
+perf-annotate-bpf-don-t-enclose-non-debug-code-with-.patch
+x86-virt-drop-unnecessary-check-on-extended-cpuid-le.patch
+perf-script-print-cgroup-field-on-the-same-line-as-c.patch
+perf-bpf-filter-fix-sample-flag-check-with.patch
+perf-dlfilter-initialize-addr_location-before-passin.patch
+perf-dlfilter-add-al_cleanup.patch
+perf-vendor-events-update-the-json-events-descriptio.patch
+perf-vendor-events-drop-some-of-the-json-events-for-.patch
+perf-vendor-events-drop-stores_per_inst-metric-event.patch
+perf-vendor-events-move-json-events-to-appropriate-f.patch
+perf-vendor-events-update-metric-event-names-for-pow.patch
+perf-top-don-t-pass-an-err_ptr-directly-to-perf_sess.patch
+perf-lock-don-t-pass-an-err_ptr-directly-to-perf_ses.patch
+kvm-svm-correct-the-size-of-spec_ctrl-field-in-vmcb-.patch
+watchdog-intel-mid_wdt-add-module_alias-to-allow-aut.patch
+perf-vendor-events-arm64-remove-l1d_cache_lmiss-from.patch
+pwm-lpc32xx-remove-handling-of-pwm-channels.patch
+accel-ivpu-refactor-deprecated-strncpy.patch
+perf-header-fix-missing-pmu-caps.patch
+i3c-master-svc-describe-member-saved_regs.patch
+perf-test-stat_bpf_counters_cgrp-fix-shellcheck-issu.patch
+perf-test-stat_bpf_counters_cgrp-enhance-perf-stat-c.patch
+regulator-tps6287x-fix-n_voltages.patch
+selftests-bpf-fix-flaky-cgroup_iter_sleepable-subtes.patch
+drm-i915-mark-requests-for-guc-virtual-engines-to-av.patch
+blk-throttle-use-calculate_io-bytes_allowed-for-thro.patch
+blk-throttle-consider-carryover_ios-bytes-in-throtl_.patch
+netfilter-nf_tables-audit-log-setelem-reset.patch
+netfilter-nf_tables-audit-log-rule-reset.patch
+smb-propagate-error-code-of-extract_sharename.patch
+net-sched-fq_pie-avoid-stalls-in-fq_pie_timer.patch
+sctp-annotate-data-races-around-sk-sk_wmem_queued.patch
+ipv4-annotate-data-races-around-fi-fib_dead.patch
+net-read-sk-sk_family-once-in-sk_mc_loop.patch
+net-fib-avoid-warn-splat-in-flow-dissector.patch
+xsk-fix-xsk_diag-use-after-free-error-during-socket-.patch
+ceph-make-members-in-struct-ceph_mds_request_args_ex.patch
+drm-i915-gvt-verify-pfn-is-valid-before-dereferencin.patch
+drm-i915-gvt-put-the-page-reference-obtained-by-kvm-.patch
+drm-i915-gvt-drop-unused-helper-intel_vgpu_reset_gtt.patch
+drm-amd-display-fix-mode-scaling-rmx_.patch
+net-handshake-fix-null-ptr-deref-in-handshake_nl_don.patch
+net-use-sk_forward_alloc_get-in-sk_get_meminfo.patch
+net-annotate-data-races-around-sk-sk_forward_alloc.patch
+mptcp-annotate-data-races-around-msk-rmem_fwd_alloc.patch
+net-annotate-data-races-around-sk-sk_tsflags.patch
+net-annotate-data-races-around-sk-sk_bind_phc.patch
+ipv4-ignore-dst-hint-for-multipath-routes.patch
+ipv6-ignore-dst-hint-for-multipath-routes.patch
+selftests-bpf-fix-a-ci-failure-caused-by-vsock-write.patch
+igb-disable-virtualization-features-on-82580.patch
+gve-fix-frag_list-chaining.patch
+veth-fixing-transmit-return-status-for-dropped-packe.patch
+net-ipv6-addrconf-avoid-integer-underflow-in-ipv6_cr.patch
+net-phy-micrel-correct-bit-assignments-for-phy_devic.patch
+bpf-sockmap-fix-skb-refcnt-race-after-locking-change.patch
+af_unix-fix-msg_controllen-test-in-scm_pidfd_recv-fo.patch
+af_unix-fix-data-races-around-user-unix_inflight.patch
+af_unix-fix-data-race-around-unix_tot_inflight.patch
+af_unix-fix-data-races-around-sk-sk_shutdown.patch
+af_unix-fix-data-race-around-sk-sk_err.patch
+net-sched-sch_qfq-fix-uaf-in-qfq_dequeue.patch
+kcm-destroy-mutex-in-kcm_exit_net.patch
+octeontx2-af-fix-truncation-of-smq-in-cn10k-nix-aq-e.patch
+igc-change-igc_min-to-allow-set-rx-tx-value-between-.patch
+igbvf-change-igbvf_min-to-allow-set-rx-tx-value-betw.patch
+igb-change-igb_min-to-allow-set-rx-tx-value-between-.patch
+s390-zcrypt-don-t-leak-memory-if-dev_set_name-fails.patch
+regulator-tps6594-regulator-fix-random-kernel-crash.patch
+idr-fix-param-name-in-idr_alloc_cyclic-doc.patch
+ip_tunnels-use-dev_stats_inc.patch
+net-mlx5e-clear-mirred-devices-array-if-the-rule-is-.patch
+net-mlx5-give-esw_offloads_load-unload_rep-mlx5_-pre.patch
+net-mlx5-rework-devlink-port-alloc-free-into-init-cl.patch
+net-mlx5-push-devlink-port-pf-vf-init-cleanup-calls-.patch
+mlx5-core-e-switch-create-acl-ft-for-eswitch-manager.patch
+net-dsa-sja1105-fix-bandwidth-discrepancy-between-tc.patch
+net-dsa-sja1105-fix-enospc-when-replacing-the-same-t.patch
+net-dsa-sja1105-complete-tc-cbs-offload-support-on-s.patch
+net-phylink-fix-sphinx-complaint-about-invalid-liter.patch
+bpf-invoke-__bpf_prog_exit_sleepable_recur-on-recurs.patch
+bpf-assign-bpf_tramp_run_ctx-saved_run_ctx-before-re.patch
+s390-bpf-pass-through-tail-call-counter-in-trampolin.patch
+bpf-bpf_sk_storage-fix-invalid-wait-context-lockdep-.patch
+bpf-bpf_sk_storage-fix-the-missing-uncharge-in-sk_om.patch
+netfilter-nftables-exthdr-fix-4-byte-stack-oob-write.patch
+netfilter-nfnetlink_osf-avoid-oob-read.patch
+netfilter-nft_set_rbtree-skip-sync-gc-for-new-elemen.patch
+netfilter-nf_tables-unbreak-audit-log-reset.patch
+net-phy-provide-module-4-ksz9477-errata-ds80000754c.patch
+net-hns3-fix-tx-timeout-issue.patch
+net-hns3-fix-byte-order-conversion-issue-in-hclge_db.patch
+net-hns3-fix-debugfs-concurrency-issue-between-kfree.patch
+net-hns3-fix-invalid-mutex-between-tc-qdisc-and-dcb-.patch
+net-hns3-fix-the-port-information-display-when-sfp-i.patch
+net-hns3-remove-gso-partial-feature-bit.patch
+net-enetc-distinguish-error-from-valid-pointers-in-e.patch
+sh-boards-fix-ceu-buffer-size-passed-to-dma_declare_.patch
+sh-push-switch-reorder-cleanup-operations-to-avoid-u.patch
diff --git a/queue-6.5/sh-boards-fix-ceu-buffer-size-passed-to-dma_declare_.patch b/queue-6.5/sh-boards-fix-ceu-buffer-size-passed-to-dma_declare_.patch
new file mode 100644 (file)
index 0000000..49bbe7f
--- /dev/null
@@ -0,0 +1,121 @@
+From 2c2b2eaada446a486c798d18daa02b4906b42f87 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 24 Jul 2023 14:07:42 +0200
+Subject: sh: boards: Fix CEU buffer size passed to
+ dma_declare_coherent_memory()
+
+From: Petr Tesarik <petr.tesarik.ext@huawei.com>
+
+[ Upstream commit fb60211f377b69acffead3147578f86d0092a7a5 ]
+
+In all these cases, the last argument to dma_declare_coherent_memory() is
+the buffer end address, but the expected value should be the size of the
+reserved region.
+
+Fixes: 39fb993038e1 ("media: arch: sh: ap325rxa: Use new renesas-ceu camera driver")
+Fixes: c2f9b05fd5c1 ("media: arch: sh: ecovec: Use new renesas-ceu camera driver")
+Fixes: f3590dc32974 ("media: arch: sh: kfr2r09: Use new renesas-ceu camera driver")
+Fixes: 186c446f4b84 ("media: arch: sh: migor: Use new renesas-ceu camera driver")
+Fixes: 1a3c230b4151 ("media: arch: sh: ms7724se: Use new renesas-ceu camera driver")
+Signed-off-by: Petr Tesarik <petr.tesarik.ext@huawei.com>
+Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
+Reviewed-by: Jacopo Mondi <jacopo.mondi@ideasonboard.com>
+Reviewed-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
+Reviewed-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
+Link: https://lore.kernel.org/r/20230724120742.2187-1-petrtesarik@huaweicloud.com
+Signed-off-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/sh/boards/mach-ap325rxa/setup.c | 2 +-
+ arch/sh/boards/mach-ecovec24/setup.c | 6 ++----
+ arch/sh/boards/mach-kfr2r09/setup.c  | 2 +-
+ arch/sh/boards/mach-migor/setup.c    | 2 +-
+ arch/sh/boards/mach-se/7724/setup.c  | 6 ++----
+ 5 files changed, 7 insertions(+), 11 deletions(-)
+
+diff --git a/arch/sh/boards/mach-ap325rxa/setup.c b/arch/sh/boards/mach-ap325rxa/setup.c
+index 151792162152c..645cccf3da88e 100644
+--- a/arch/sh/boards/mach-ap325rxa/setup.c
++++ b/arch/sh/boards/mach-ap325rxa/setup.c
+@@ -531,7 +531,7 @@ static int __init ap325rxa_devices_setup(void)
+       device_initialize(&ap325rxa_ceu_device.dev);
+       dma_declare_coherent_memory(&ap325rxa_ceu_device.dev,
+                       ceu_dma_membase, ceu_dma_membase,
+-                      ceu_dma_membase + CEU_BUFFER_MEMORY_SIZE - 1);
++                      CEU_BUFFER_MEMORY_SIZE);
+       platform_device_add(&ap325rxa_ceu_device);
+diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c
+index 674da7ebd8b7f..7ec03d4a4edf0 100644
+--- a/arch/sh/boards/mach-ecovec24/setup.c
++++ b/arch/sh/boards/mach-ecovec24/setup.c
+@@ -1454,15 +1454,13 @@ static int __init arch_setup(void)
+       device_initialize(&ecovec_ceu_devices[0]->dev);
+       dma_declare_coherent_memory(&ecovec_ceu_devices[0]->dev,
+                                   ceu0_dma_membase, ceu0_dma_membase,
+-                                  ceu0_dma_membase +
+-                                  CEU_BUFFER_MEMORY_SIZE - 1);
++                                  CEU_BUFFER_MEMORY_SIZE);
+       platform_device_add(ecovec_ceu_devices[0]);
+       device_initialize(&ecovec_ceu_devices[1]->dev);
+       dma_declare_coherent_memory(&ecovec_ceu_devices[1]->dev,
+                                   ceu1_dma_membase, ceu1_dma_membase,
+-                                  ceu1_dma_membase +
+-                                  CEU_BUFFER_MEMORY_SIZE - 1);
++                                  CEU_BUFFER_MEMORY_SIZE);
+       platform_device_add(ecovec_ceu_devices[1]);
+       gpiod_add_lookup_table(&cn12_power_gpiod_table);
+diff --git a/arch/sh/boards/mach-kfr2r09/setup.c b/arch/sh/boards/mach-kfr2r09/setup.c
+index 20f4db778ed6a..c6d556dfbbbe6 100644
+--- a/arch/sh/boards/mach-kfr2r09/setup.c
++++ b/arch/sh/boards/mach-kfr2r09/setup.c
+@@ -603,7 +603,7 @@ static int __init kfr2r09_devices_setup(void)
+       device_initialize(&kfr2r09_ceu_device.dev);
+       dma_declare_coherent_memory(&kfr2r09_ceu_device.dev,
+                       ceu_dma_membase, ceu_dma_membase,
+-                      ceu_dma_membase + CEU_BUFFER_MEMORY_SIZE - 1);
++                      CEU_BUFFER_MEMORY_SIZE);
+       platform_device_add(&kfr2r09_ceu_device);
+diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c
+index f60061283c482..773ee767d0c4e 100644
+--- a/arch/sh/boards/mach-migor/setup.c
++++ b/arch/sh/boards/mach-migor/setup.c
+@@ -604,7 +604,7 @@ static int __init migor_devices_setup(void)
+       device_initialize(&migor_ceu_device.dev);
+       dma_declare_coherent_memory(&migor_ceu_device.dev,
+                       ceu_dma_membase, ceu_dma_membase,
+-                      ceu_dma_membase + CEU_BUFFER_MEMORY_SIZE - 1);
++                      CEU_BUFFER_MEMORY_SIZE);
+       platform_device_add(&migor_ceu_device);
+diff --git a/arch/sh/boards/mach-se/7724/setup.c b/arch/sh/boards/mach-se/7724/setup.c
+index b60a2626e18b2..6495f93540654 100644
+--- a/arch/sh/boards/mach-se/7724/setup.c
++++ b/arch/sh/boards/mach-se/7724/setup.c
+@@ -940,15 +940,13 @@ static int __init devices_setup(void)
+       device_initialize(&ms7724se_ceu_devices[0]->dev);
+       dma_declare_coherent_memory(&ms7724se_ceu_devices[0]->dev,
+                                   ceu0_dma_membase, ceu0_dma_membase,
+-                                  ceu0_dma_membase +
+-                                  CEU_BUFFER_MEMORY_SIZE - 1);
++                                  CEU_BUFFER_MEMORY_SIZE);
+       platform_device_add(ms7724se_ceu_devices[0]);
+       device_initialize(&ms7724se_ceu_devices[1]->dev);
+       dma_declare_coherent_memory(&ms7724se_ceu_devices[1]->dev,
+                                   ceu1_dma_membase, ceu1_dma_membase,
+-                                  ceu1_dma_membase +
+-                                  CEU_BUFFER_MEMORY_SIZE - 1);
++                                  CEU_BUFFER_MEMORY_SIZE);
+       platform_device_add(ms7724se_ceu_devices[1]);
+       return platform_add_devices(ms7724se_devices,
+-- 
+2.40.1
+
diff --git a/queue-6.5/sh-push-switch-reorder-cleanup-operations-to-avoid-u.patch b/queue-6.5/sh-push-switch-reorder-cleanup-operations-to-avoid-u.patch
new file mode 100644 (file)
index 0000000..846d3aa
--- /dev/null
@@ -0,0 +1,57 @@
+From e272121a7e99c8609a1ab93af553333485a1948a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Aug 2023 11:37:37 +0800
+Subject: sh: push-switch: Reorder cleanup operations to avoid use-after-free
+ bug
+
+From: Duoming Zhou <duoming@zju.edu.cn>
+
+[ Upstream commit 246f80a0b17f8f582b2c0996db02998239057c65 ]
+
+The original code puts flush_work() before timer_shutdown_sync()
+in switch_drv_remove(). Although we use flush_work() to stop
+the worker, it could be rescheduled in switch_timer(). As a result,
+a use-after-free bug can occur. The details are shown below:
+
+      (cpu 0)                    |      (cpu 1)
+switch_drv_remove()              |
+ flush_work()                    |
+  ...                            |  switch_timer // timer
+                                 |   schedule_work(&psw->work)
+ timer_shutdown_sync()           |
+ ...                             |  switch_work_handler // worker
+ kfree(psw) // free              |
+                                 |   psw->state = 0 // use
+
+This patch puts timer_shutdown_sync() before flush_work() to
+mitigate the bugs. As a result, the worker and timer will be
+stopped safely before the deallocate operations.
+
+Fixes: 9f5e8eee5cfe ("sh: generic push-switch framework.")
+Signed-off-by: Duoming Zhou <duoming@zju.edu.cn>
+Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
+Reviewed-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
+Link: https://lore.kernel.org/r/20230802033737.9738-1-duoming@zju.edu.cn
+Signed-off-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/sh/drivers/push-switch.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/sh/drivers/push-switch.c b/arch/sh/drivers/push-switch.c
+index c95f48ff3f6fb..6ecba5f521eb6 100644
+--- a/arch/sh/drivers/push-switch.c
++++ b/arch/sh/drivers/push-switch.c
+@@ -101,8 +101,8 @@ static int switch_drv_remove(struct platform_device *pdev)
+               device_remove_file(&pdev->dev, &dev_attr_switch);
+       platform_set_drvdata(pdev, NULL);
+-      flush_work(&psw->work);
+       timer_shutdown_sync(&psw->debounce);
++      flush_work(&psw->work);
+       free_irq(irq, pdev);
+       kfree(psw);
+-- 
+2.40.1
+
diff --git a/queue-6.5/smb-propagate-error-code-of-extract_sharename.patch b/queue-6.5/smb-propagate-error-code-of-extract_sharename.patch
new file mode 100644 (file)
index 0000000..0ead2bf
--- /dev/null
@@ -0,0 +1,37 @@
+From 93a75b0abe30ba9a9b2101fc38a3b358dc9e4dc6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Aug 2023 16:38:31 +0300
+Subject: smb: propagate error code of extract_sharename()
+
+From: Katya Orlova <e.orlova@ispras.ru>
+
+[ Upstream commit efc0b0bcffcba60d9c6301063d25a22a4744b499 ]
+
+In addition to the EINVAL, there may be an ENOMEM.
+
+Found by Linux Verification Center (linuxtesting.org) with SVACE.
+
+Fixes: 70431bfd825d ("cifs: Support fscache indexing rewrite")
+Signed-off-by: Katya Orlova <e.orlova@ispras.ru>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/smb/client/fscache.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/smb/client/fscache.c b/fs/smb/client/fscache.c
+index 8f6909d633da8..34e20c4cd507f 100644
+--- a/fs/smb/client/fscache.c
++++ b/fs/smb/client/fscache.c
+@@ -48,7 +48,7 @@ int cifs_fscache_get_super_cookie(struct cifs_tcon *tcon)
+       sharename = extract_sharename(tcon->tree_name);
+       if (IS_ERR(sharename)) {
+               cifs_dbg(FYI, "%s: couldn't extract sharename\n", __func__);
+-              return -EINVAL;
++              return PTR_ERR(sharename);
+       }
+       slen = strlen(sharename);
+-- 
+2.40.1
+
diff --git a/queue-6.5/tools-mm-fix-undefined-reference-to-pthread_once.patch b/queue-6.5/tools-mm-fix-undefined-reference-to-pthread_once.patch
new file mode 100644 (file)
index 0000000..8d79be1
--- /dev/null
@@ -0,0 +1,54 @@
+From 9e78e197885753b868ec109631e3f370999d794c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 31 Aug 2023 11:42:05 +0800
+Subject: tools/mm: fix undefined reference to pthread_once
+
+From: Xie XiuQi <xiexiuqi@huawei.com>
+
+[ Upstream commit 7f33105cdd59a99d068d3d147723a865d10e2260 ]
+
+Commit 97d5f2e9ee12 ("tools api fs: More thread safety for global
+filesystem variables") introduces pthread_once, so the libpthread
+should be added at link time, or we'll meet the following compile
+error when 'make -C tools/mm':
+
+  gcc -Wall -Wextra -I../lib/ -o page-types page-types.c ../lib/api/libapi.a
+  ~/linux/tools/lib/api/fs/fs.c:146: undefined reference to `pthread_once'
+  ~/linux/tools/lib/api/fs/fs.c:147: undefined reference to `pthread_once'
+  ~/linux/tools/lib/api/fs/fs.c:148: undefined reference to `pthread_once'
+  ~/linux/tools/lib/api/fs/fs.c:149: undefined reference to `pthread_once'
+  ~/linux/tools/lib/api/fs/fs.c:150: undefined reference to `pthread_once'
+  /usr/bin/ld: ../lib/api/libapi.a(libapi-in.o):~/linux/tools/lib/api/fs/fs.c:151:
+  more undefined references to `pthread_once' follow
+  collect2: error: ld returned 1 exit status
+  make: *** [Makefile:22: page-types] Error 1
+
+Link: https://lkml.kernel.org/r/20230831034205.2376653-1-xiexiuqi@huaweicloud.com
+Fixes: 97d5f2e9ee12 ("tools api fs: More thread safety for global filesystem variables")
+Signed-off-by: Xie XiuQi <xiexiuqi@huawei.com>
+Acked-by: Ian Rogers <irogers@google.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/mm/Makefile | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/tools/mm/Makefile b/tools/mm/Makefile
+index 6c1da51f4177c..1c5606cc33346 100644
+--- a/tools/mm/Makefile
++++ b/tools/mm/Makefile
+@@ -8,8 +8,8 @@ TARGETS=page-types slabinfo page_owner_sort
+ LIB_DIR = ../lib/api
+ LIBS = $(LIB_DIR)/libapi.a
+-CFLAGS += -Wall -Wextra -I../lib/
+-LDFLAGS += $(LIBS)
++CFLAGS += -Wall -Wextra -I../lib/ -pthread
++LDFLAGS += $(LIBS) -pthread
+ all: $(TARGETS)
+-- 
+2.40.1
+
diff --git a/queue-6.5/tpm_crb-fix-an-error-handling-path-in-crb_acpi_add.patch b/queue-6.5/tpm_crb-fix-an-error-handling-path-in-crb_acpi_add.patch
new file mode 100644 (file)
index 0000000..3277c29
--- /dev/null
@@ -0,0 +1,45 @@
+From 492327900abd5c26efed4c22051951991c724643 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 25 Feb 2023 11:58:48 +0100
+Subject: tpm_crb: Fix an error handling path in crb_acpi_add()
+
+From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+
+[ Upstream commit 9c377852ddfdc557b1370f196b0cfdf28d233460 ]
+
+Some error paths don't call acpi_put_table() before returning.
+Branch to the correct place instead of doing some direct return.
+
+Fixes: 4d2732882703 ("tpm_crb: Add support for CRB devices based on Pluton")
+Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+Acked-by: Matthew Garrett <mgarrett@aurora.tech>
+Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/char/tpm/tpm_crb.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c
+index a5dbebb1acfcf..ea085b14ab7c9 100644
+--- a/drivers/char/tpm/tpm_crb.c
++++ b/drivers/char/tpm/tpm_crb.c
+@@ -775,12 +775,13 @@ static int crb_acpi_add(struct acpi_device *device)
+                               FW_BUG "TPM2 ACPI table has wrong size %u for start method type %d\n",
+                               buf->header.length,
+                               ACPI_TPM2_COMMAND_BUFFER_WITH_PLUTON);
+-                      return -EINVAL;
++                      rc = -EINVAL;
++                      goto out;
+               }
+               crb_pluton = ACPI_ADD_PTR(struct tpm2_crb_pluton, buf, sizeof(*buf));
+               rc = crb_map_pluton(dev, priv, buf, crb_pluton);
+               if (rc)
+-                      return rc;
++                      goto out;
+       }
+       priv->sm = sm;
+-- 
+2.40.1
+
diff --git a/queue-6.5/veth-fixing-transmit-return-status-for-dropped-packe.patch b/queue-6.5/veth-fixing-transmit-return-status-for-dropped-packe.patch
new file mode 100644 (file)
index 0000000..3d37991
--- /dev/null
@@ -0,0 +1,54 @@
+From ebb7bf408d6d525f71a81e21d4218375c1248131 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 1 Sep 2023 12:09:21 +0800
+Subject: veth: Fixing transmit return status for dropped packets
+
+From: Liang Chen <liangchen.linux@gmail.com>
+
+[ Upstream commit 151e887d8ff97e2e42110ffa1fb1e6a2128fb364 ]
+
+The veth_xmit function returns NETDEV_TX_OK even when packets are dropped.
+This behavior leads to incorrect calculations of statistics counts, as
+well as things like txq->trans_start updates.
+
+Fixes: e314dbdc1c0d ("[NET]: Virtual ethernet device driver.")
+Signed-off-by: Liang Chen <liangchen.linux@gmail.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/veth.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/veth.c b/drivers/net/veth.c
+index ef8eacb596f73..2db678c0082a3 100644
+--- a/drivers/net/veth.c
++++ b/drivers/net/veth.c
+@@ -344,6 +344,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
+ {
+       struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
+       struct veth_rq *rq = NULL;
++      int ret = NETDEV_TX_OK;
+       struct net_device *rcv;
+       int length = skb->len;
+       bool use_napi = false;
+@@ -376,6 +377,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
+       } else {
+ drop:
+               atomic64_inc(&priv->dropped);
++              ret = NET_XMIT_DROP;
+       }
+       if (use_napi)
+@@ -383,7 +385,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
+       rcu_read_unlock();
+-      return NETDEV_TX_OK;
++      return ret;
+ }
+ static u64 veth_stats_tx(struct net_device *dev, u64 *packets, u64 *bytes)
+-- 
+2.40.1
+
diff --git a/queue-6.5/watchdog-intel-mid_wdt-add-module_alias-to-allow-aut.patch b/queue-6.5/watchdog-intel-mid_wdt-add-module_alias-to-allow-aut.patch
new file mode 100644 (file)
index 0000000..1f494a2
--- /dev/null
@@ -0,0 +1,40 @@
+From 0ce1df157d64a27714752740f7c8574314bdc7fc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 11 Aug 2023 17:32:20 +0530
+Subject: watchdog: intel-mid_wdt: add MODULE_ALIAS() to allow auto-load
+
+From: Raag Jadav <raag.jadav@intel.com>
+
+[ Upstream commit cf38e7691c85f1b09973b22a0b89bf1e1228d2f9 ]
+
+When built with CONFIG_INTEL_MID_WATCHDOG=m, currently the driver
+needs to be loaded manually, for the lack of module alias.
+This causes unintended resets in cases where watchdog timer is
+set-up by bootloader and the driver is not explicitly loaded.
+Add MODULE_ALIAS() to load the driver automatically at boot and
+avoid this issue.
+
+Fixes: 87a1ef8058d9 ("watchdog: add Intel MID watchdog driver support")
+Signed-off-by: Raag Jadav <raag.jadav@intel.com>
+Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Reviewed-by: Guenter Roeck <linux@roeck-us.net>
+Link: https://lore.kernel.org/r/20230811120220.31578-1-raag.jadav@intel.com
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Wim Van Sebroeck <wim@linux-watchdog.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/watchdog/intel-mid_wdt.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/watchdog/intel-mid_wdt.c b/drivers/watchdog/intel-mid_wdt.c
+index 9b2173f765c8c..fb7fae750181b 100644
+--- a/drivers/watchdog/intel-mid_wdt.c
++++ b/drivers/watchdog/intel-mid_wdt.c
+@@ -203,3 +203,4 @@ module_platform_driver(mid_wdt_driver);
+ MODULE_AUTHOR("David Cohen <david.a.cohen@linux.intel.com>");
+ MODULE_DESCRIPTION("Watchdog Driver for Intel MID platform");
+ MODULE_LICENSE("GPL");
++MODULE_ALIAS("platform:intel_mid_wdt");
+-- 
+2.40.1
+
diff --git a/queue-6.5/x86-virt-drop-unnecessary-check-on-extended-cpuid-le.patch b/queue-6.5/x86-virt-drop-unnecessary-check-on-extended-cpuid-le.patch
new file mode 100644 (file)
index 0000000..7d940e6
--- /dev/null
@@ -0,0 +1,45 @@
+From 5df6874625245deb2ca0fd8e0a4bfde657f866a4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 21 Jul 2023 13:18:52 -0700
+Subject: x86/virt: Drop unnecessary check on extended CPUID level in
+ cpu_has_svm()
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit 5df8ecfe3632d5879d1f154f7aa8de441b5d1c89 ]
+
+Drop the explicit check on the extended CPUID level in cpu_has_svm(), the
+kernel's cached CPUID info will leave the entire SVM leaf unset if said
+leaf is not supported by hardware.  Prior to using cached information,
+the check was needed to avoid false positives due to Intel's rather crazy
+CPUID behavior of returning the values of the maximum supported leaf if
+the specified leaf is unsupported.
+
+Fixes: 682a8108872f ("x86/kvm/svm: Simplify cpu_has_svm()")
+Link: https://lore.kernel.org/r/20230721201859.2307736-13-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/include/asm/virtext.h | 6 ------
+ 1 file changed, 6 deletions(-)
+
+diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
+index 3b12e6b994123..6c2e3ff3cb28f 100644
+--- a/arch/x86/include/asm/virtext.h
++++ b/arch/x86/include/asm/virtext.h
+@@ -101,12 +101,6 @@ static inline int cpu_has_svm(const char **msg)
+               return 0;
+       }
+-      if (boot_cpu_data.extended_cpuid_level < SVM_CPUID_FUNC) {
+-              if (msg)
+-                      *msg = "can't execute cpuid_8000000a";
+-              return 0;
+-      }
+-
+       if (!boot_cpu_has(X86_FEATURE_SVM)) {
+               if (msg)
+                       *msg = "svm not available";
+-- 
+2.40.1
+
diff --git a/queue-6.5/xsk-fix-xsk_diag-use-after-free-error-during-socket-.patch b/queue-6.5/xsk-fix-xsk_diag-use-after-free-error-during-socket-.patch
new file mode 100644 (file)
index 0000000..32f3a08
--- /dev/null
@@ -0,0 +1,58 @@
+From e93dffb536d80953ff5fde265900000e48db58bc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 31 Aug 2023 12:01:17 +0200
+Subject: xsk: Fix xsk_diag use-after-free error during socket cleanup
+
+From: Magnus Karlsson <magnus.karlsson@intel.com>
+
+[ Upstream commit 3e019d8a05a38abb5c85d4f1e85fda964610aa14 ]
+
+Fix a use-after-free error that is possible if the xsk_diag interface
+is used after the socket has been unbound from the device. This can
+happen either due to the socket being closed or the device
+disappearing. In the early days of AF_XDP, the way we tested that a
+socket was not bound to a device was to simply check if the netdevice
+pointer in the xsk socket structure was NULL. Later, a better system
+was introduced by having an explicit state variable in the xsk socket
+struct. For example, the state of a socket that is on the way to being
+closed and has been unbound from the device is XSK_UNBOUND.
+
+The commit in the Fixes tag below deleted the old way of signalling
+that a socket is unbound, setting dev to NULL. This in the belief that
+all code using the old way had been exterminated. That was
+unfortunately not true as the xsk diagnostics code was still using the
+old way and thus does not work as intended when a socket is going
+down. Fix this by introducing a test against the state variable. If
+the socket is in the state XSK_UNBOUND, simply abort the diagnostic's
+netlink operation.
+
+Fixes: 18b1ab7aa76b ("xsk: Fix race at socket teardown")
+Reported-by: syzbot+822d1359297e2694f873@syzkaller.appspotmail.com
+Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Tested-by: syzbot+822d1359297e2694f873@syzkaller.appspotmail.com
+Tested-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+Link: https://lore.kernel.org/bpf/20230831100119.17408-1-magnus.karlsson@gmail.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/xdp/xsk_diag.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c
+index c014217f5fa7d..22b36c8143cfd 100644
+--- a/net/xdp/xsk_diag.c
++++ b/net/xdp/xsk_diag.c
+@@ -111,6 +111,9 @@ static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb,
+       sock_diag_save_cookie(sk, msg->xdiag_cookie);
+       mutex_lock(&xs->mutex);
++      if (READ_ONCE(xs->state) == XSK_UNBOUND)
++              goto out_nlmsg_trim;
++
+       if ((req->xdiag_show & XDP_SHOW_INFO) && xsk_diag_put_info(xs, nlskb))
+               goto out_nlmsg_trim;
+-- 
+2.40.1
+