]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 6.9
authorSasha Levin <sashal@kernel.org>
Wed, 5 Jun 2024 12:09:28 +0000 (08:09 -0400)
committerSasha Levin <sashal@kernel.org>
Wed, 5 Jun 2024 12:09:28 +0000 (08:09 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
84 files changed:
queue-6.9/af_unix-annotate-data-race-around-unix_sk-sk-addr.patch [new file with mode: 0644]
queue-6.9/af_unix-read-sk-sk_hash-under-bindlock-during-bind.patch [new file with mode: 0644]
queue-6.9/alsa-core-remove-debugfs-at-disconnection.patch [new file with mode: 0644]
queue-6.9/alsa-hda-realtek-adjust-g814jzr-to-use-spi-init-for-.patch [new file with mode: 0644]
queue-6.9/alsa-seq-don-t-clear-bank-selection-at-event-ump-mid.patch [new file with mode: 0644]
queue-6.9/alsa-seq-fix-missing-bank-setup-between-midi1-midi2-.patch [new file with mode: 0644]
queue-6.9/alsa-seq-fix-yet-another-spot-for-system-message-con.patch [new file with mode: 0644]
queue-6.9/alsa-seq-ump-fix-swapped-song-position-pointer-data.patch [new file with mode: 0644]
queue-6.9/asoc-cs42l43-only-restrict-44.1khz-for-the-asp.patch [new file with mode: 0644]
queue-6.9/block-stack-max_user_sectors.patch [new file with mode: 0644]
queue-6.9/bpf-allow-delete-from-sockmap-sockhash-only-if-updat.patch [new file with mode: 0644]
queue-6.9/bpf-fix-potential-integer-overflow-in-resolve_btfids.patch [new file with mode: 0644]
queue-6.9/dma-buf-sw-sync-don-t-enable-irq-from-sync_print_obj.patch [new file with mode: 0644]
queue-6.9/dma-mapping-benchmark-fix-node-id-validation.patch [new file with mode: 0644]
queue-6.9/dma-mapping-benchmark-fix-up-kthread-related-error-h.patch [new file with mode: 0644]
queue-6.9/dma-mapping-benchmark-handle-numa_no_node-correctly.patch [new file with mode: 0644]
queue-6.9/drm-amd-display-enable-colorspace-property-for-mst-c.patch [new file with mode: 0644]
queue-6.9/drm-amdgpu-adjust-logic-in-amdgpu_device_partner_ban.patch [new file with mode: 0644]
queue-6.9/drm-i915-gt-fix-ccs-id-s-calculation-for-ccs-mode-se.patch [new file with mode: 0644]
queue-6.9/drm-i915-guc-avoid-field_prep-warning.patch [new file with mode: 0644]
queue-6.9/drm-panel-sitronix-st7789v-fix-display-size-for-jt24.patch [new file with mode: 0644]
queue-6.9/drm-panel-sitronix-st7789v-fix-timing-for-jt240mhqs_.patch [new file with mode: 0644]
queue-6.9/drm-panel-sitronix-st7789v-tweak-timing-for-jt240mhq.patch [new file with mode: 0644]
queue-6.9/drm-xe-add-dbg-messages-on-the-suspend-resume-functi.patch [new file with mode: 0644]
queue-6.9/drm-xe-change-pcode-timeout-to-50msec-while-polling-.patch [new file with mode: 0644]
queue-6.9/drm-xe-check-pcode-init-status-only-on-root-gt-of-ro.patch [new file with mode: 0644]
queue-6.9/drm-xe-only-use-reserved-bcs-instances-for-usm-migra.patch [new file with mode: 0644]
queue-6.9/e1000e-move-force-smbus-near-the-end-of-enable_ulp-f.patch [new file with mode: 0644]
queue-6.9/enic-validate-length-of-nl-attributes-in-enic_set_vf.patch [new file with mode: 0644]
queue-6.9/hwmon-intel-m10-bmc-hwmon-fix-multiplier-for-n6000-b.patch [new file with mode: 0644]
queue-6.9/hwmon-shtc1-fix-property-misspelling.patch [new file with mode: 0644]
queue-6.9/ice-fix-200g-phy-types-to-link-speed-mapping.patch [new file with mode: 0644]
queue-6.9/ice-fix-accounting-if-a-vlan-already-exists.patch [new file with mode: 0644]
queue-6.9/idpf-don-t-enable-napi-and-interrupts-prior-to-alloc.patch [new file with mode: 0644]
queue-6.9/inet-introduce-dst_rtable-helper.patch [new file with mode: 0644]
queue-6.9/ipv4-correctly-iterate-over-the-target-netns-in-inet.patch [new file with mode: 0644]
queue-6.9/ipv4-fix-address-dump-when-ipv4-is-disabled-on-an-in.patch [new file with mode: 0644]
queue-6.9/ipv6-introduce-dst_rt6_info-helper.patch [new file with mode: 0644]
queue-6.9/ipvlan-dont-use-skb-sk-in-ipvlan_process_v-4-6-_outb.patch [new file with mode: 0644]
queue-6.9/kconfig-fix-comparison-to-constant-symbols-m-n.patch [new file with mode: 0644]
queue-6.9/kheaders-use-command-v-to-test-for-existence-of-cpio.patch [new file with mode: 0644]
queue-6.9/net-dsa-microchip-fix-rgmii-error-in-ksz-dsa-driver.patch [new file with mode: 0644]
queue-6.9/net-ena-fix-redundant-device-numa-node-override.patch [new file with mode: 0644]
queue-6.9/net-fec-add-fec_enet_deinit.patch [new file with mode: 0644]
queue-6.9/net-fix-__dst_negative_advice-race.patch [new file with mode: 0644]
queue-6.9/net-micrel-fix-lan8841_config_intr-after-getting-out.patch [new file with mode: 0644]
queue-6.9/net-mlx5-do-not-query-mpir-on-embedded-cpu-function.patch [new file with mode: 0644]
queue-6.9/net-mlx5-fix-mtmp-register-capability-offset-in-mcam.patch [new file with mode: 0644]
queue-6.9/net-mlx5-lag-do-bond-only-if-slaves-agree-on-roce-st.patch [new file with mode: 0644]
queue-6.9/net-mlx5-use-mlx5_ipsec_rx_status_destroy-to-correct.patch [new file with mode: 0644]
queue-6.9/net-mlx5e-fix-ipsec-tunnel-mode-offload-feature-chec.patch [new file with mode: 0644]
queue-6.9/net-mlx5e-fix-udp-gso-for-encapsulated-packets.patch [new file with mode: 0644]
queue-6.9/net-mlx5e-use-rx_missed_errors-instead-of-rx_dropped.patch [new file with mode: 0644]
queue-6.9/net-phy-micrel-set-soft_reset-callback-to-genphy_sof.patch [new file with mode: 0644]
queue-6.9/net-sched-taprio-extend-minimum-interval-restriction.patch [new file with mode: 0644]
queue-6.9/net-sched-taprio-make-q-picos_per_byte-available-to-.patch [new file with mode: 0644]
queue-6.9/net-ti-icssg-prueth-fix-start-counter-for-ft1-filter.patch [new file with mode: 0644]
queue-6.9/net-usb-smsc95xx-fix-changing-led_sel-bit-value-upda.patch [new file with mode: 0644]
queue-6.9/netfilter-ipset-add-list-flush-to-cancel_gc.patch [new file with mode: 0644]
queue-6.9/netfilter-nfnetlink_queue-acquire-rcu_read_lock-in-i.patch [new file with mode: 0644]
queue-6.9/netfilter-nft_fib-allow-from-forward-input-without-i.patch [new file with mode: 0644]
queue-6.9/netfilter-nft_payload-restore-vlan-q-in-q-match-supp.patch [new file with mode: 0644]
queue-6.9/netfilter-nft_payload-skbuff-vlan-metadata-mangle-su.patch [new file with mode: 0644]
queue-6.9/netfilter-tproxy-bail-out-if-ip-has-been-disabled-on.patch [new file with mode: 0644]
queue-6.9/netkit-fix-pkt_type-override-upon-netkit-pass-verdic.patch [new file with mode: 0644]
queue-6.9/netkit-fix-setting-mac-address-in-l2-mode.patch [new file with mode: 0644]
queue-6.9/null_blk-fix-return-value-of-nullb_device_power_stor.patch [new file with mode: 0644]
queue-6.9/nvme-fix-multipath-batched-completion-accounting.patch [new file with mode: 0644]
queue-6.9/nvme-multipath-fix-io-accounting-on-failover.patch [new file with mode: 0644]
queue-6.9/nvmet-fix-ns-enable-disable-possible-hang.patch [new file with mode: 0644]
queue-6.9/octeontx2-pf-free-send-queue-buffers-incase-of-leaf-.patch [new file with mode: 0644]
queue-6.9/powerpc-pseries-lparcfg-drop-error-message-from-gues.patch [new file with mode: 0644]
queue-6.9/powerpc-uaccess-use-yz-asm-constraint-for-ld.patch [new file with mode: 0644]
queue-6.9/riscv-prevent-pt_regs-corruption-for-secondary-idle-.patch [new file with mode: 0644]
queue-6.9/sd-also-set-max_user_sectors-when-setting-max_sector.patch [new file with mode: 0644]
queue-6.9/selftests-mptcp-add-ms-units-for-tc-netem-delay.patch [new file with mode: 0644]
queue-6.9/selftests-mptcp-join-mark-fail-tests-as-flaky.patch [new file with mode: 0644]
queue-6.9/selftests-mptcp-join-mark-fastclose-tests-as-flaky.patch [new file with mode: 0644]
queue-6.9/selftests-mptcp-simult-flows-mark-unbalanced-tests-a.patch [new file with mode: 0644]
queue-6.9/series
queue-6.9/spi-don-t-mark-message-dma-mapped-when-no-transfer-i.patch [new file with mode: 0644]
queue-6.9/spi-stm32-don-t-warn-about-spurious-interrupts.patch [new file with mode: 0644]
queue-6.9/spi-stm32-revert-change-that-enabled-controller-befo.patch [new file with mode: 0644]
queue-6.9/tcp-reduce-accepted-window-in-new_syn_recv-state.patch-1230 [new file with mode: 0644]

diff --git a/queue-6.9/af_unix-annotate-data-race-around-unix_sk-sk-addr.patch b/queue-6.9/af_unix-annotate-data-race-around-unix_sk-sk-addr.patch
new file mode 100644 (file)
index 0000000..5ccce1e
--- /dev/null
@@ -0,0 +1,119 @@
+From 10b71b1a7c2fddb6d41783b8d6823669701c3c87 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 May 2024 00:40:02 +0900
+Subject: af_unix: Annotate data-race around unix_sk(sk)->addr.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 97e1db06c7bb948da10ba85acad8030b56886593 ]
+
+Once unix_sk(sk)->addr is assigned under net->unx.table.locks and
+unix_sk(sk)->bindlock, *(unix_sk(sk)->addr) and unix_sk(sk)->path are
+fully set up, and unix_sk(sk)->addr is never changed.
+
+unix_getname() and unix_copy_addr() access the two fields locklessly,
+and commit ae3b564179bf ("missing barriers in some of unix_sock ->addr
+and ->path accesses") added smp_store_release() and smp_load_acquire()
+pairs.
+
+In other functions, we still read unix_sk(sk)->addr locklessly to check
+if the socket is bound, and KCSAN complains about it.  [0]
+
+Given these functions have no dependency for *(unix_sk(sk)->addr) and
+unix_sk(sk)->path, READ_ONCE() is enough to annotate the data-race.
+
+Note that it is safe to access unix_sk(sk)->addr locklessly if the socket
+is found in the hash table.  For example, the lockless read of otheru->addr
+in unix_stream_connect() is safe.
+
+Note also that newu->addr there is of the child socket that is still not
+accessible from userspace, and smp_store_release() publishes the address
+in case the socket is accept()ed and unix_getname() / unix_copy_addr()
+is called.
+
+[0]:
+BUG: KCSAN: data-race in unix_bind / unix_listen
+
+write (marked) to 0xffff88805f8d1840 of 8 bytes by task 13723 on cpu 0:
+ __unix_set_addr_hash net/unix/af_unix.c:329 [inline]
+ unix_bind_bsd net/unix/af_unix.c:1241 [inline]
+ unix_bind+0x881/0x1000 net/unix/af_unix.c:1319
+ __sys_bind+0x194/0x1e0 net/socket.c:1847
+ __do_sys_bind net/socket.c:1858 [inline]
+ __se_sys_bind net/socket.c:1856 [inline]
+ __x64_sys_bind+0x40/0x50 net/socket.c:1856
+ do_syscall_x64 arch/x86/entry/common.c:52 [inline]
+ do_syscall_64+0x4f/0x110 arch/x86/entry/common.c:83
+ entry_SYSCALL_64_after_hwframe+0x46/0x4e
+
+read to 0xffff88805f8d1840 of 8 bytes by task 13724 on cpu 1:
+ unix_listen+0x72/0x180 net/unix/af_unix.c:734
+ __sys_listen+0xdc/0x160 net/socket.c:1881
+ __do_sys_listen net/socket.c:1890 [inline]
+ __se_sys_listen net/socket.c:1888 [inline]
+ __x64_sys_listen+0x2e/0x40 net/socket.c:1888
+ do_syscall_x64 arch/x86/entry/common.c:52 [inline]
+ do_syscall_64+0x4f/0x110 arch/x86/entry/common.c:83
+ entry_SYSCALL_64_after_hwframe+0x46/0x4e
+
+value changed: 0x0000000000000000 -> 0xffff88807b5b1b40
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 1 PID: 13724 Comm: syz-executor.4 Not tainted 6.8.0-12822-gcd51db110a7e #12
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Reported-by: syzkaller <syzkaller@googlegroups.com>
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://lore.kernel.org/r/20240522154002.77857-1-kuniyu@amazon.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/unix/af_unix.c | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
+index 9bc879f3e34e6..6498e41e2ecf9 100644
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -731,7 +731,7 @@ static int unix_listen(struct socket *sock, int backlog)
+       if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
+               goto out;       /* Only stream/seqpacket sockets accept */
+       err = -EINVAL;
+-      if (!u->addr)
++      if (!READ_ONCE(u->addr))
+               goto out;       /* No listens on an unbound socket */
+       unix_state_lock(sk);
+       if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
+@@ -1369,7 +1369,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
+               if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
+                    test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
+-                  !unix_sk(sk)->addr) {
++                  !READ_ONCE(unix_sk(sk)->addr)) {
+                       err = unix_autobind(sk);
+                       if (err)
+                               goto out;
+@@ -1481,7 +1481,8 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
+               goto out;
+       if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
+-           test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) {
++           test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
++          !READ_ONCE(u->addr)) {
+               err = unix_autobind(sk);
+               if (err)
+                       goto out;
+@@ -1997,7 +1998,8 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
+       }
+       if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
+-           test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) {
++           test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
++          !READ_ONCE(u->addr)) {
+               err = unix_autobind(sk);
+               if (err)
+                       goto out;
+-- 
+2.43.0
+
diff --git a/queue-6.9/af_unix-read-sk-sk_hash-under-bindlock-during-bind.patch b/queue-6.9/af_unix-read-sk-sk_hash-under-bindlock-during-bind.patch
new file mode 100644 (file)
index 0000000..ffb5be6
--- /dev/null
@@ -0,0 +1,132 @@
+From d49e3e200c7bc25dbec993e48b9d36c2b2437012 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 May 2024 00:42:18 +0900
+Subject: af_unix: Read sk->sk_hash under bindlock during bind().
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 51d1b25a720982324871338b1a36b197ec9bd6f0 ]
+
+syzkaller reported data-race of sk->sk_hash in unix_autobind() [0],
+and the same ones exist in unix_bind_bsd() and unix_bind_abstract().
+
+The three bind() functions prefetch sk->sk_hash locklessly and
+use it later after validating that unix_sk(sk)->addr is NULL under
+unix_sk(sk)->bindlock.
+
+The prefetched sk->sk_hash is the hash value of unbound socket set
+in unix_create1() and does not change until bind() completes.
+
+There could be a chance that sk->sk_hash changes after the lockless
+read.  However, in such a case, non-NULL unix_sk(sk)->addr is visible
+under unix_sk(sk)->bindlock, and bind() returns -EINVAL without using
+the prefetched value.
+
+The KCSAN splat is false-positive, but let's silence it by reading
+sk->sk_hash under unix_sk(sk)->bindlock.
+
+[0]:
+BUG: KCSAN: data-race in unix_autobind / unix_autobind
+
+write to 0xffff888034a9fb88 of 4 bytes by task 4468 on cpu 0:
+ __unix_set_addr_hash net/unix/af_unix.c:331 [inline]
+ unix_autobind+0x47a/0x7d0 net/unix/af_unix.c:1185
+ unix_dgram_connect+0x7e3/0x890 net/unix/af_unix.c:1373
+ __sys_connect_file+0xd7/0xe0 net/socket.c:2048
+ __sys_connect+0x114/0x140 net/socket.c:2065
+ __do_sys_connect net/socket.c:2075 [inline]
+ __se_sys_connect net/socket.c:2072 [inline]
+ __x64_sys_connect+0x40/0x50 net/socket.c:2072
+ do_syscall_x64 arch/x86/entry/common.c:52 [inline]
+ do_syscall_64+0x4f/0x110 arch/x86/entry/common.c:83
+ entry_SYSCALL_64_after_hwframe+0x46/0x4e
+
+read to 0xffff888034a9fb88 of 4 bytes by task 4465 on cpu 1:
+ unix_autobind+0x28/0x7d0 net/unix/af_unix.c:1134
+ unix_dgram_connect+0x7e3/0x890 net/unix/af_unix.c:1373
+ __sys_connect_file+0xd7/0xe0 net/socket.c:2048
+ __sys_connect+0x114/0x140 net/socket.c:2065
+ __do_sys_connect net/socket.c:2075 [inline]
+ __se_sys_connect net/socket.c:2072 [inline]
+ __x64_sys_connect+0x40/0x50 net/socket.c:2072
+ do_syscall_x64 arch/x86/entry/common.c:52 [inline]
+ do_syscall_64+0x4f/0x110 arch/x86/entry/common.c:83
+ entry_SYSCALL_64_after_hwframe+0x46/0x4e
+
+value changed: 0x000000e4 -> 0x000001e3
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 1 PID: 4465 Comm: syz-executor.0 Not tainted 6.8.0-12822-gcd51db110a7e #12
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
+
+Fixes: afd20b9290e1 ("af_unix: Replace the big lock with small locks.")
+Reported-by: syzkaller <syzkaller@googlegroups.com>
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://lore.kernel.org/r/20240522154218.78088-1-kuniyu@amazon.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/unix/af_unix.c | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
+index 6498e41e2ecf9..439c531744a27 100644
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -1131,8 +1131,8 @@ static struct sock *unix_find_other(struct net *net,
+ static int unix_autobind(struct sock *sk)
+ {
+-      unsigned int new_hash, old_hash = sk->sk_hash;
+       struct unix_sock *u = unix_sk(sk);
++      unsigned int new_hash, old_hash;
+       struct net *net = sock_net(sk);
+       struct unix_address *addr;
+       u32 lastnum, ordernum;
+@@ -1155,6 +1155,7 @@ static int unix_autobind(struct sock *sk)
+       addr->name->sun_family = AF_UNIX;
+       refcount_set(&addr->refcnt, 1);
++      old_hash = sk->sk_hash;
+       ordernum = get_random_u32();
+       lastnum = ordernum & 0xFFFFF;
+ retry:
+@@ -1195,8 +1196,8 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
+ {
+       umode_t mode = S_IFSOCK |
+              (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
+-      unsigned int new_hash, old_hash = sk->sk_hash;
+       struct unix_sock *u = unix_sk(sk);
++      unsigned int new_hash, old_hash;
+       struct net *net = sock_net(sk);
+       struct mnt_idmap *idmap;
+       struct unix_address *addr;
+@@ -1234,6 +1235,7 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
+       if (u->addr)
+               goto out_unlock;
++      old_hash = sk->sk_hash;
+       new_hash = unix_bsd_hash(d_backing_inode(dentry));
+       unix_table_double_lock(net, old_hash, new_hash);
+       u->path.mnt = mntget(parent.mnt);
+@@ -1261,8 +1263,8 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
+ static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
+                             int addr_len)
+ {
+-      unsigned int new_hash, old_hash = sk->sk_hash;
+       struct unix_sock *u = unix_sk(sk);
++      unsigned int new_hash, old_hash;
+       struct net *net = sock_net(sk);
+       struct unix_address *addr;
+       int err;
+@@ -1280,6 +1282,7 @@ static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
+               goto out_mutex;
+       }
++      old_hash = sk->sk_hash;
+       new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
+       unix_table_double_lock(net, old_hash, new_hash);
+-- 
+2.43.0
+
diff --git a/queue-6.9/alsa-core-remove-debugfs-at-disconnection.patch b/queue-6.9/alsa-core-remove-debugfs-at-disconnection.patch
new file mode 100644 (file)
index 0000000..ea27ced
--- /dev/null
@@ -0,0 +1,134 @@
+From 9f2c7f6b1452c288c2f61cb25118faeffe8517c4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 24 May 2024 17:11:46 +0200
+Subject: ALSA: core: Remove debugfs at disconnection
+
+From: Takashi Iwai <tiwai@suse.de>
+
+[ Upstream commit 495000a38634e640e2fd02f7e4f1512ccc92d770 ]
+
+The card-specific debugfs entries are removed at the last stage of
+card free phase, and it's performed after synchronization of the
+closes of all opened fds.  This works fine for most cases, but it can
+be potentially problematic for a hotplug device like USB-audio.  Due
+to the nature of snd_card_free_when_closed(), the card free isn't
+called immediately after the driver removal for a hotplug device, but
+it's left until the last fd is closed.  It implies that the card
+debugfs entries also remain.  Meanwhile, when a new device is inserted
+before the last close and the very same card slot is assigned, the
+driver tries to create the card debugfs root again on the very same
+path.  This conflicts with the remaining entry, and results in the
+kernel warning such as:
+  debugfs: Directory 'card0' with parent 'sound' already present!
+with the missing debugfs entry afterwards.
+
+For avoiding such conflicts, remove debugfs entries at the device
+disconnection phase instead.  The jack kctl debugfs entries get
+removed in snd_jack_dev_disconnect() instead of each kctl
+private_free.
+
+Fixes: 2d670ea2bd53 ("ALSA: jack: implement software jack injection via debugfs")
+Link: https://lore.kernel.org/r/20240524151256.32521-1-tiwai@suse.de
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/core/init.c |  9 +++++----
+ sound/core/jack.c | 21 ++++++++++++++-------
+ 2 files changed, 19 insertions(+), 11 deletions(-)
+
+diff --git a/sound/core/init.c b/sound/core/init.c
+index 66d7265fea920..b8912de048662 100644
+--- a/sound/core/init.c
++++ b/sound/core/init.c
+@@ -539,6 +539,11 @@ void snd_card_disconnect(struct snd_card *card)
+               synchronize_irq(card->sync_irq);
+       snd_info_card_disconnect(card);
++#ifdef CONFIG_SND_DEBUG
++      debugfs_remove(card->debugfs_root);
++      card->debugfs_root = NULL;
++#endif
++
+       if (card->registered) {
+               device_del(&card->card_dev);
+               card->registered = false;
+@@ -590,10 +595,6 @@ static int snd_card_do_free(struct snd_card *card)
+               dev_warn(card->dev, "unable to free card info\n");
+               /* Not fatal error */
+       }
+-#ifdef CONFIG_SND_DEBUG
+-      debugfs_remove(card->debugfs_root);
+-      card->debugfs_root = NULL;
+-#endif
+       if (card->release_completion)
+               complete(card->release_completion);
+       if (!card->managed)
+diff --git a/sound/core/jack.c b/sound/core/jack.c
+index e08b2c4fbd1a5..e4bcecdf89b7e 100644
+--- a/sound/core/jack.c
++++ b/sound/core/jack.c
+@@ -37,11 +37,15 @@ static const int jack_switch_types[SND_JACK_SWITCH_TYPES] = {
+ };
+ #endif /* CONFIG_SND_JACK_INPUT_DEV */
++static void snd_jack_remove_debugfs(struct snd_jack *jack);
++
+ static int snd_jack_dev_disconnect(struct snd_device *device)
+ {
+-#ifdef CONFIG_SND_JACK_INPUT_DEV
+       struct snd_jack *jack = device->device_data;
++      snd_jack_remove_debugfs(jack);
++
++#ifdef CONFIG_SND_JACK_INPUT_DEV
+       guard(mutex)(&jack->input_dev_lock);
+       if (!jack->input_dev)
+               return 0;
+@@ -381,10 +385,14 @@ static int snd_jack_debugfs_add_inject_node(struct snd_jack *jack,
+       return 0;
+ }
+-static void snd_jack_debugfs_clear_inject_node(struct snd_jack_kctl *jack_kctl)
++static void snd_jack_remove_debugfs(struct snd_jack *jack)
+ {
+-      debugfs_remove(jack_kctl->jack_debugfs_root);
+-      jack_kctl->jack_debugfs_root = NULL;
++      struct snd_jack_kctl *jack_kctl;
++
++      list_for_each_entry(jack_kctl, &jack->kctl_list, list) {
++              debugfs_remove(jack_kctl->jack_debugfs_root);
++              jack_kctl->jack_debugfs_root = NULL;
++      }
+ }
+ #else /* CONFIG_SND_JACK_INJECTION_DEBUG */
+ static int snd_jack_debugfs_add_inject_node(struct snd_jack *jack,
+@@ -393,7 +401,7 @@ static int snd_jack_debugfs_add_inject_node(struct snd_jack *jack,
+       return 0;
+ }
+-static void snd_jack_debugfs_clear_inject_node(struct snd_jack_kctl *jack_kctl)
++static void snd_jack_remove_debugfs(struct snd_jack *jack)
+ {
+ }
+ #endif /* CONFIG_SND_JACK_INJECTION_DEBUG */
+@@ -404,7 +412,6 @@ static void snd_jack_kctl_private_free(struct snd_kcontrol *kctl)
+       jack_kctl = kctl->private_data;
+       if (jack_kctl) {
+-              snd_jack_debugfs_clear_inject_node(jack_kctl);
+               list_del(&jack_kctl->list);
+               kfree(jack_kctl);
+       }
+@@ -497,8 +504,8 @@ int snd_jack_new(struct snd_card *card, const char *id, int type,
+               .dev_free = snd_jack_dev_free,
+ #ifdef CONFIG_SND_JACK_INPUT_DEV
+               .dev_register = snd_jack_dev_register,
+-              .dev_disconnect = snd_jack_dev_disconnect,
+ #endif /* CONFIG_SND_JACK_INPUT_DEV */
++              .dev_disconnect = snd_jack_dev_disconnect,
+       };
+       if (initial_kctl) {
+-- 
+2.43.0
+
diff --git a/queue-6.9/alsa-hda-realtek-adjust-g814jzr-to-use-spi-init-for-.patch b/queue-6.9/alsa-hda-realtek-adjust-g814jzr-to-use-spi-init-for-.patch
new file mode 100644 (file)
index 0000000..fd6cae9
--- /dev/null
@@ -0,0 +1,37 @@
+From 8d6804d5af388a0ff7d95bbc94b16d4559098305 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 26 May 2024 21:10:32 +1200
+Subject: ALSA: hda/realtek: Adjust G814JZR to use SPI init for amp
+
+From: Luke D. Jones <luke@ljones.dev>
+
+[ Upstream commit 2be46155d792d629e8fe3188c2cde176833afe36 ]
+
+The 2024 ASUS ROG G814J model is much the same as the 2023 model
+and the 2023 16" version. We can use the same Cirrus Amp quirk.
+
+Fixes: 811dd426a9b1 ("ALSA: hda/realtek: Add quirks for Asus ROG 2024 laptops using CS35L41")
+Signed-off-by: Luke D. Jones <luke@ljones.dev>
+Link: https://lore.kernel.org/r/20240526091032.114545-1-luke@ljones.dev
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/pci/hda/patch_realtek.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index d8caa2be63c8b..1a1ca7caaff07 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -10294,7 +10294,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+       SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2),
+       SND_PCI_QUIRK(0x1043, 0x3a20, "ASUS G614JZR", ALC245_FIXUP_CS35L41_SPI_2),
+       SND_PCI_QUIRK(0x1043, 0x3a30, "ASUS G814JVR/JIR", ALC245_FIXUP_CS35L41_SPI_2),
+-      SND_PCI_QUIRK(0x1043, 0x3a40, "ASUS G814JZR", ALC245_FIXUP_CS35L41_SPI_2),
++      SND_PCI_QUIRK(0x1043, 0x3a40, "ASUS G814JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS),
+       SND_PCI_QUIRK(0x1043, 0x3a50, "ASUS G834JYR/JZR", ALC245_FIXUP_CS35L41_SPI_2),
+       SND_PCI_QUIRK(0x1043, 0x3a60, "ASUS G634JYR/JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS),
+       SND_PCI_QUIRK(0x1043, 0x831a, "ASUS P901", ALC269_FIXUP_STEREO_DMIC),
+-- 
+2.43.0
+
diff --git a/queue-6.9/alsa-seq-don-t-clear-bank-selection-at-event-ump-mid.patch b/queue-6.9/alsa-seq-don-t-clear-bank-selection-at-event-ump-mid.patch
new file mode 100644 (file)
index 0000000..7e8f1a7
--- /dev/null
@@ -0,0 +1,40 @@
+From fc4a2ca6f254fcb8dfe43057500c393b582e60ff Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 27 May 2024 17:18:50 +0200
+Subject: ALSA: seq: Don't clear bank selection at event -> UMP MIDI2
+ conversion
+
+From: Takashi Iwai <tiwai@suse.de>
+
+[ Upstream commit a200df7deb3186cd7b55abb77ab96dfefb8a4f09 ]
+
+The current code to convert from a legacy sequencer event to UMP MIDI2
+clears the bank selection at each time the program change is
+submitted.  This is confusing and may lead to incorrect bank values
+tranmitted to the destination in the end.
+
+Drop the line to clear the bank info and keep the provided values.
+
+Fixes: e9e02819a98a ("ALSA: seq: Automatic conversion of UMP events")
+Link: https://lore.kernel.org/r/20240527151852.29036-2-tiwai@suse.de
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/core/seq/seq_ump_convert.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/sound/core/seq/seq_ump_convert.c b/sound/core/seq/seq_ump_convert.c
+index c21be87f5da9e..f5d22dd008426 100644
+--- a/sound/core/seq/seq_ump_convert.c
++++ b/sound/core/seq/seq_ump_convert.c
+@@ -892,7 +892,6 @@ static int pgm_ev_to_ump_midi2(const struct snd_seq_event *event,
+               data->pg.bank_msb = cc->cc_bank_msb;
+               data->pg.bank_lsb = cc->cc_bank_lsb;
+               cc->bank_set = 0;
+-              cc->cc_bank_msb = cc->cc_bank_lsb = 0;
+       }
+       return 1;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.9/alsa-seq-fix-missing-bank-setup-between-midi1-midi2-.patch b/queue-6.9/alsa-seq-fix-missing-bank-setup-between-midi1-midi2-.patch
new file mode 100644 (file)
index 0000000..df0e8e1
--- /dev/null
@@ -0,0 +1,104 @@
+From f063ea44f7f099ff49cdaa51f6ab66e5f63f5db7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 27 May 2024 17:18:49 +0200
+Subject: ALSA: seq: Fix missing bank setup between MIDI1/MIDI2 UMP conversion
+
+From: Takashi Iwai <tiwai@suse.de>
+
+[ Upstream commit 8a42886cae307663f3f999846926bd6e64392000 ]
+
+When a UMP packet is converted between MIDI1 and MIDI2 protocols, the
+bank selection may be lost.  The conversion from MIDI1 to MIDI2 needs
+the encoding of the bank into UMP_MSG_STATUS_PROGRAM bits, while the
+conversion from MIDI2 to MIDI1 needs the extraction from that
+instead.
+
+This patch implements the missing bank selection mechanism in those
+conversions.
+
+Fixes: e9e02819a98a ("ALSA: seq: Automatic conversion of UMP events")
+Link: https://lore.kernel.org/r/20240527151852.29036-1-tiwai@suse.de
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/core/seq/seq_ump_convert.c | 38 ++++++++++++++++++++++++++++++++
+ 1 file changed, 38 insertions(+)
+
+diff --git a/sound/core/seq/seq_ump_convert.c b/sound/core/seq/seq_ump_convert.c
+index ee6ac649df836..c21be87f5da9e 100644
+--- a/sound/core/seq/seq_ump_convert.c
++++ b/sound/core/seq/seq_ump_convert.c
+@@ -368,6 +368,7 @@ static int cvt_ump_midi1_to_midi2(struct snd_seq_client *dest,
+       struct snd_seq_ump_event ev_cvt;
+       const union snd_ump_midi1_msg *midi1 = (const union snd_ump_midi1_msg *)event->ump;
+       union snd_ump_midi2_msg *midi2 = (union snd_ump_midi2_msg *)ev_cvt.ump;
++      struct snd_seq_ump_midi2_bank *cc;
+       ev_cvt = *event;
+       memset(&ev_cvt.ump, 0, sizeof(ev_cvt.ump));
+@@ -387,11 +388,29 @@ static int cvt_ump_midi1_to_midi2(struct snd_seq_client *dest,
+               midi2->paf.data = upscale_7_to_32bit(midi1->paf.data);
+               break;
+       case UMP_MSG_STATUS_CC:
++              cc = &dest_port->midi2_bank[midi1->note.channel];
++              switch (midi1->cc.index) {
++              case UMP_CC_BANK_SELECT:
++                      cc->bank_set = 1;
++                      cc->cc_bank_msb = midi1->cc.data;
++                      return 0; // skip
++              case UMP_CC_BANK_SELECT_LSB:
++                      cc->bank_set = 1;
++                      cc->cc_bank_lsb = midi1->cc.data;
++                      return 0; // skip
++              }
+               midi2->cc.index = midi1->cc.index;
+               midi2->cc.data = upscale_7_to_32bit(midi1->cc.data);
+               break;
+       case UMP_MSG_STATUS_PROGRAM:
+               midi2->pg.program = midi1->pg.program;
++              cc = &dest_port->midi2_bank[midi1->note.channel];
++              if (cc->bank_set) {
++                      midi2->pg.bank_valid = 1;
++                      midi2->pg.bank_msb = cc->cc_bank_msb;
++                      midi2->pg.bank_lsb = cc->cc_bank_lsb;
++                      cc->bank_set = 0;
++              }
+               break;
+       case UMP_MSG_STATUS_CHANNEL_PRESSURE:
+               midi2->caf.data = upscale_7_to_32bit(midi1->caf.data);
+@@ -419,6 +438,7 @@ static int cvt_ump_midi2_to_midi1(struct snd_seq_client *dest,
+       struct snd_seq_ump_event ev_cvt;
+       union snd_ump_midi1_msg *midi1 = (union snd_ump_midi1_msg *)ev_cvt.ump;
+       const union snd_ump_midi2_msg *midi2 = (const union snd_ump_midi2_msg *)event->ump;
++      int err;
+       u16 v;
+       ev_cvt = *event;
+@@ -443,6 +463,24 @@ static int cvt_ump_midi2_to_midi1(struct snd_seq_client *dest,
+               midi1->cc.data = downscale_32_to_7bit(midi2->cc.data);
+               break;
+       case UMP_MSG_STATUS_PROGRAM:
++              if (midi2->pg.bank_valid) {
++                      midi1->cc.status = UMP_MSG_STATUS_CC;
++                      midi1->cc.index = UMP_CC_BANK_SELECT;
++                      midi1->cc.data = midi2->pg.bank_msb;
++                      err = __snd_seq_deliver_single_event(dest, dest_port,
++                                                           (struct snd_seq_event *)&ev_cvt,
++                                                           atomic, hop);
++                      if (err < 0)
++                              return err;
++                      midi1->cc.index = UMP_CC_BANK_SELECT_LSB;
++                      midi1->cc.data = midi2->pg.bank_lsb;
++                      err = __snd_seq_deliver_single_event(dest, dest_port,
++                                                           (struct snd_seq_event *)&ev_cvt,
++                                                           atomic, hop);
++                      if (err < 0)
++                              return err;
++                      midi1->note.status = midi2->note.status;
++              }
+               midi1->pg.program = midi2->pg.program;
+               break;
+       case UMP_MSG_STATUS_CHANNEL_PRESSURE:
+-- 
+2.43.0
+
diff --git a/queue-6.9/alsa-seq-fix-yet-another-spot-for-system-message-con.patch b/queue-6.9/alsa-seq-fix-yet-another-spot-for-system-message-con.patch
new file mode 100644 (file)
index 0000000..ae61e9c
--- /dev/null
@@ -0,0 +1,37 @@
+From a85640610d983e996eef6fd45645f82fbd5b38f9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 30 May 2024 12:10:43 +0200
+Subject: ALSA: seq: Fix yet another spot for system message conversion
+
+From: Takashi Iwai <tiwai@suse.de>
+
+[ Upstream commit 700fe6fd093d08c6da2bda8efe00479b0e617327 ]
+
+We fixed the incorrect UMP type for system messages in the recent
+commit, but it missed one place in system_ev_to_ump_midi1().
+Fix it now.
+
+Fixes: e9e02819a98a ("ALSA: seq: Automatic conversion of UMP events")
+Fixes: c2bb79613fed ("ALSA: seq: Fix incorrect UMP type for system messages")
+Link: https://lore.kernel.org/r/20240530101044.17524-1-tiwai@suse.de
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/core/seq/seq_ump_convert.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/sound/core/seq/seq_ump_convert.c b/sound/core/seq/seq_ump_convert.c
+index f5d22dd008426..903a644b80e25 100644
+--- a/sound/core/seq/seq_ump_convert.c
++++ b/sound/core/seq/seq_ump_convert.c
+@@ -729,6 +729,7 @@ static int system_ev_to_ump_midi1(const struct snd_seq_event *event,
+                                 union snd_ump_midi1_msg *data,
+                                 unsigned char status)
+ {
++      data->system.type = UMP_MSG_TYPE_SYSTEM; // override
+       data->system.status = status;
+       return 1;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.9/alsa-seq-ump-fix-swapped-song-position-pointer-data.patch b/queue-6.9/alsa-seq-ump-fix-swapped-song-position-pointer-data.patch
new file mode 100644 (file)
index 0000000..6333283
--- /dev/null
@@ -0,0 +1,49 @@
+From 88973dc31181c71fd6a30840d752d5c1ddba0067 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 31 May 2024 09:51:07 +0200
+Subject: ALSA: seq: ump: Fix swapped song position pointer data
+
+From: Takashi Iwai <tiwai@suse.de>
+
+[ Upstream commit 310fa3ec2859f1c094e6e9b5d2e1ca51738c409a ]
+
+At converting between the legacy event and UMP, the parameters for
+MIDI Song Position Pointer are incorrectly stored.  It should have
+been LSB -> MSB order while it stored in MSB -> LSB order.
+This patch corrects the ordering.
+
+Fixes: e9e02819a98a ("ALSA: seq: Automatic conversion of UMP events")
+Link: https://lore.kernel.org/r/20240531075110.3250-1-tiwai@suse.de
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/core/seq/seq_ump_convert.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/sound/core/seq/seq_ump_convert.c b/sound/core/seq/seq_ump_convert.c
+index 903a644b80e25..9bfba69b2a709 100644
+--- a/sound/core/seq/seq_ump_convert.c
++++ b/sound/core/seq/seq_ump_convert.c
+@@ -157,7 +157,7 @@ static void ump_system_to_one_param_ev(const union snd_ump_midi1_msg *val,
+ static void ump_system_to_songpos_ev(const union snd_ump_midi1_msg *val,
+                                    struct snd_seq_event *ev)
+ {
+-      ev->data.control.value = (val->system.parm1 << 7) | val->system.parm2;
++      ev->data.control.value = (val->system.parm2 << 7) | val->system.parm1;
+ }
+ /* Encoders for 0xf0 - 0xff */
+@@ -752,8 +752,8 @@ static int system_2p_ev_to_ump_midi1(const struct snd_seq_event *event,
+                                    unsigned char status)
+ {
+       data->system.status = status;
+-      data->system.parm1 = (event->data.control.value >> 7) & 0x7f;
+-      data->system.parm2 = event->data.control.value & 0x7f;
++      data->system.parm1 = event->data.control.value & 0x7f;
++      data->system.parm2 = (event->data.control.value >> 7) & 0x7f;
+       return 1;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.9/asoc-cs42l43-only-restrict-44.1khz-for-the-asp.patch b/queue-6.9/asoc-cs42l43-only-restrict-44.1khz-for-the-asp.patch
new file mode 100644 (file)
index 0000000..92a854f
--- /dev/null
@@ -0,0 +1,42 @@
+From 87fa48d18d0b23f4bd5be25a61b312f4f44269db Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 27 May 2024 11:08:40 +0100
+Subject: ASoC: cs42l43: Only restrict 44.1kHz for the ASP
+
+From: Charles Keepax <ckeepax@opensource.cirrus.com>
+
+[ Upstream commit 797c525e85d1e44cf0e6f338890e8e0c661f524a ]
+
+The SoundWire interface can always support 44.1kHz using flow controlled
+mode, and whether the ASP is in master mode should obviously only affect
+the ASP. Update cs42l43_startup() to only restrict the rates for the ASP
+DAI.
+
+Fixes: fc918cbe874e ("ASoC: cs42l43: Add support for the cs42l43")
+Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
+Link: https://msgid.link/r/20240527100840.439832-1-ckeepax@opensource.cirrus.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/codecs/cs42l43.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/sound/soc/codecs/cs42l43.c b/sound/soc/codecs/cs42l43.c
+index 94685449f0f48..92674314227c4 100644
+--- a/sound/soc/codecs/cs42l43.c
++++ b/sound/soc/codecs/cs42l43.c
+@@ -310,8 +310,9 @@ static int cs42l43_startup(struct snd_pcm_substream *substream, struct snd_soc_d
+       struct snd_soc_component *component = dai->component;
+       struct cs42l43_codec *priv = snd_soc_component_get_drvdata(component);
+       struct cs42l43 *cs42l43 = priv->core;
+-      int provider = !!regmap_test_bits(cs42l43->regmap, CS42L43_ASP_CLK_CONFIG2,
+-                                        CS42L43_ASP_MASTER_MODE_MASK);
++      int provider = !dai->id || !!regmap_test_bits(cs42l43->regmap,
++                                                    CS42L43_ASP_CLK_CONFIG2,
++                                                    CS42L43_ASP_MASTER_MODE_MASK);
+       if (provider)
+               priv->constraint.mask = CS42L43_PROVIDER_RATE_MASK;
+-- 
+2.43.0
+
diff --git a/queue-6.9/block-stack-max_user_sectors.patch b/queue-6.9/block-stack-max_user_sectors.patch
new file mode 100644 (file)
index 0000000..99236fe
--- /dev/null
@@ -0,0 +1,46 @@
+From b6cb69e22aa0e991caeb95281be35ee6daf51fb0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 May 2024 20:26:14 +0200
+Subject: block: stack max_user_sectors
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit e528bede6f4e6822afdf0fa80be46ea9199f0911 ]
+
+The max_user_sectors is one of the three factors determining the actual
+max_sectors limit for READ/WRITE requests.  Because of that it needs to
+be stacked at least for the device mapper multi-path case where requests
+are directly inserted on the lower device.  For SCSI disks this is
+important because the sd driver actually sets it's own advisory limit
+that is lower than max_hw_sectors based on the block limits VPD page.
+While this is a bit odd an unusual, the same effect can happen if a
+user or udev script tweaks the value manually.
+
+Fixes: 4f563a64732d ("block: add a max_user_discard_sectors queue limit")
+Reported-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Mike Snitzer <snitzer@kernel.org>
+Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
+Link: https://lore.kernel.org/r/20240523182618.602003-3-hch@lst.de
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-settings.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/block/blk-settings.c b/block/blk-settings.c
+index 9d6033e01f2e1..15319b217bf3f 100644
+--- a/block/blk-settings.c
++++ b/block/blk-settings.c
+@@ -751,6 +751,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
+       unsigned int top, bottom, alignment, ret = 0;
+       t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
++      t->max_user_sectors = min_not_zero(t->max_user_sectors,
++                      b->max_user_sectors);
+       t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
+       t->max_dev_sectors = min_not_zero(t->max_dev_sectors, b->max_dev_sectors);
+       t->max_write_zeroes_sectors = min(t->max_write_zeroes_sectors,
+-- 
+2.43.0
+
diff --git a/queue-6.9/bpf-allow-delete-from-sockmap-sockhash-only-if-updat.patch b/queue-6.9/bpf-allow-delete-from-sockmap-sockhash-only-if-updat.patch
new file mode 100644 (file)
index 0000000..7fdf68d
--- /dev/null
@@ -0,0 +1,79 @@
+From 3193cd23d4b3f7fc49796a937cabb7ea92ae84c2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 27 May 2024 13:20:07 +0200
+Subject: bpf: Allow delete from sockmap/sockhash only if update is allowed
+
+From: Jakub Sitnicki <jakub@cloudflare.com>
+
+[ Upstream commit 98e948fb60d41447fd8d2d0c3b8637fc6b6dc26d ]
+
+We have seen an influx of syzkaller reports where a BPF program attached to
+a tracepoint triggers a locking rule violation by performing a map_delete
+on a sockmap/sockhash.
+
+We don't intend to support this artificial use scenario. Extend the
+existing verifier allowed-program-type check for updating sockmap/sockhash
+to also cover deleting from a map.
+
+From now on only BPF programs which were previously allowed to update
+sockmap/sockhash can delete from these map types.
+
+Fixes: ff9105993240 ("bpf, sockmap: Prevent lock inversion deadlock in map delete elem")
+Reported-by: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
+Reported-by: syzbot+ec941d6e24f633a59172@syzkaller.appspotmail.com
+Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Tested-by: syzbot+ec941d6e24f633a59172@syzkaller.appspotmail.com
+Acked-by: John Fastabend <john.fastabend@gmail.com>
+Closes: https://syzkaller.appspot.com/bug?extid=ec941d6e24f633a59172
+Link: https://lore.kernel.org/bpf/20240527-sockmap-verify-deletes-v1-1-944b372f2101@cloudflare.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/verifier.c | 10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index 2c90b1eb12e2c..8a29309db4245 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -8845,7 +8845,8 @@ static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
+       enum bpf_attach_type eatype = env->prog->expected_attach_type;
+       enum bpf_prog_type type = resolve_prog_type(env->prog);
+-      if (func_id != BPF_FUNC_map_update_elem)
++      if (func_id != BPF_FUNC_map_update_elem &&
++          func_id != BPF_FUNC_map_delete_elem)
+               return false;
+       /* It's not possible to get access to a locked struct sock in these
+@@ -8856,6 +8857,11 @@ static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
+               if (eatype == BPF_TRACE_ITER)
+                       return true;
+               break;
++      case BPF_PROG_TYPE_SOCK_OPS:
++              /* map_update allowed only via dedicated helpers with event type checks */
++              if (func_id == BPF_FUNC_map_delete_elem)
++                      return true;
++              break;
+       case BPF_PROG_TYPE_SOCKET_FILTER:
+       case BPF_PROG_TYPE_SCHED_CLS:
+       case BPF_PROG_TYPE_SCHED_ACT:
+@@ -8951,7 +8957,6 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
+       case BPF_MAP_TYPE_SOCKMAP:
+               if (func_id != BPF_FUNC_sk_redirect_map &&
+                   func_id != BPF_FUNC_sock_map_update &&
+-                  func_id != BPF_FUNC_map_delete_elem &&
+                   func_id != BPF_FUNC_msg_redirect_map &&
+                   func_id != BPF_FUNC_sk_select_reuseport &&
+                   func_id != BPF_FUNC_map_lookup_elem &&
+@@ -8961,7 +8966,6 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
+       case BPF_MAP_TYPE_SOCKHASH:
+               if (func_id != BPF_FUNC_sk_redirect_hash &&
+                   func_id != BPF_FUNC_sock_hash_update &&
+-                  func_id != BPF_FUNC_map_delete_elem &&
+                   func_id != BPF_FUNC_msg_redirect_hash &&
+                   func_id != BPF_FUNC_sk_select_reuseport &&
+                   func_id != BPF_FUNC_map_lookup_elem &&
+-- 
+2.43.0
+
diff --git a/queue-6.9/bpf-fix-potential-integer-overflow-in-resolve_btfids.patch b/queue-6.9/bpf-fix-potential-integer-overflow-in-resolve_btfids.patch
new file mode 100644 (file)
index 0000000..f143c8f
--- /dev/null
@@ -0,0 +1,41 @@
+From 3d73a262ab31eb35d25ad1e762c6788df2c377f0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 14 May 2024 09:09:31 +0200
+Subject: bpf: Fix potential integer overflow in resolve_btfids
+
+From: Friedrich Vock <friedrich.vock@gmx.de>
+
+[ Upstream commit 44382b3ed6b2787710c8ade06c0e97f5970a47c8 ]
+
+err is a 32-bit integer, but elf_update returns an off_t, which is 64-bit
+at least on 64-bit platforms. If symbols_patch is called on a binary between
+2-4GB in size, the result will be negative when cast to a 32-bit integer,
+which the code assumes means an error occurred. This can wrongly trigger
+build failures when building very large kernel images.
+
+Fixes: fbbb68de80a4 ("bpf: Add resolve_btfids tool to resolve BTF IDs in ELF object")
+Signed-off-by: Friedrich Vock <friedrich.vock@gmx.de>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Link: https://lore.kernel.org/bpf/20240514070931.199694-1-friedrich.vock@gmx.de
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/bpf/resolve_btfids/main.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
+index d9520cb826b31..af393c7dee1f1 100644
+--- a/tools/bpf/resolve_btfids/main.c
++++ b/tools/bpf/resolve_btfids/main.c
+@@ -728,7 +728,7 @@ static int sets_patch(struct object *obj)
+ static int symbols_patch(struct object *obj)
+ {
+-      int err;
++      off_t err;
+       if (__symbols_patch(obj, &obj->structs)  ||
+           __symbols_patch(obj, &obj->unions)   ||
+-- 
+2.43.0
+
diff --git a/queue-6.9/dma-buf-sw-sync-don-t-enable-irq-from-sync_print_obj.patch b/queue-6.9/dma-buf-sw-sync-don-t-enable-irq-from-sync_print_obj.patch
new file mode 100644 (file)
index 0000000..1682819
--- /dev/null
@@ -0,0 +1,55 @@
+From af656bc4fe1eaf5d0fe645154078004a054c2bca Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 5 May 2024 23:08:31 +0900
+Subject: dma-buf/sw-sync: don't enable IRQ from sync_print_obj()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+
+[ Upstream commit b794918961516f667b0c745aebdfebbb8a98df39 ]
+
+Since commit a6aa8fca4d79 ("dma-buf/sw-sync: Reduce irqsave/irqrestore from
+known context") by error replaced spin_unlock_irqrestore() with
+spin_unlock_irq() for both sync_debugfs_show() and sync_print_obj() despite
+sync_print_obj() is called from sync_debugfs_show(), lockdep complains
+inconsistent lock state warning.
+
+Use plain spin_{lock,unlock}() for sync_print_obj(), for
+sync_debugfs_show() is already using spin_{lock,unlock}_irq().
+
+Reported-by: syzbot <syzbot+a225ee3df7e7f9372dbe@syzkaller.appspotmail.com>
+Closes: https://syzkaller.appspot.com/bug?extid=a225ee3df7e7f9372dbe
+Fixes: a6aa8fca4d79 ("dma-buf/sw-sync: Reduce irqsave/irqrestore from known context")
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/c2e46020-aaa6-4e06-bf73-f05823f913f0@I-love.SAKURA.ne.jp
+Signed-off-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/dma-buf/sync_debug.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/dma-buf/sync_debug.c b/drivers/dma-buf/sync_debug.c
+index 101394f16930f..237bce21d1e72 100644
+--- a/drivers/dma-buf/sync_debug.c
++++ b/drivers/dma-buf/sync_debug.c
+@@ -110,12 +110,12 @@ static void sync_print_obj(struct seq_file *s, struct sync_timeline *obj)
+       seq_printf(s, "%s: %d\n", obj->name, obj->value);
+-      spin_lock_irq(&obj->lock);
++      spin_lock(&obj->lock); /* Caller already disabled IRQ. */
+       list_for_each(pos, &obj->pt_list) {
+               struct sync_pt *pt = container_of(pos, struct sync_pt, link);
+               sync_print_fence(s, &pt->base, false);
+       }
+-      spin_unlock_irq(&obj->lock);
++      spin_unlock(&obj->lock);
+ }
+ static void sync_print_sync_file(struct seq_file *s,
+-- 
+2.43.0
+
diff --git a/queue-6.9/dma-mapping-benchmark-fix-node-id-validation.patch b/queue-6.9/dma-mapping-benchmark-fix-node-id-validation.patch
new file mode 100644 (file)
index 0000000..0563d3a
--- /dev/null
@@ -0,0 +1,64 @@
+From dbaadbeba8a42aafcf1cf1ef5afe5b812c3682dc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 4 May 2024 14:47:03 +0300
+Subject: dma-mapping: benchmark: fix node id validation
+
+From: Fedor Pchelkin <pchelkin@ispras.ru>
+
+[ Upstream commit 1ff05e723f7ca30644b8ec3fb093f16312e408ad ]
+
+While validating node ids in map_benchmark_ioctl(), node_possible() may
+be provided with invalid argument outside of [0,MAX_NUMNODES-1] range
+leading to:
+
+BUG: KASAN: wild-memory-access in map_benchmark_ioctl (kernel/dma/map_benchmark.c:214)
+Read of size 8 at addr 1fffffff8ccb6398 by task dma_map_benchma/971
+CPU: 7 PID: 971 Comm: dma_map_benchma Not tainted 6.9.0-rc6 #37
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996)
+Call Trace:
+ <TASK>
+dump_stack_lvl (lib/dump_stack.c:117)
+kasan_report (mm/kasan/report.c:603)
+kasan_check_range (mm/kasan/generic.c:189)
+variable_test_bit (arch/x86/include/asm/bitops.h:227) [inline]
+arch_test_bit (arch/x86/include/asm/bitops.h:239) [inline]
+_test_bit at (include/asm-generic/bitops/instrumented-non-atomic.h:142) [inline]
+node_state (include/linux/nodemask.h:423) [inline]
+map_benchmark_ioctl (kernel/dma/map_benchmark.c:214)
+full_proxy_unlocked_ioctl (fs/debugfs/file.c:333)
+__x64_sys_ioctl (fs/ioctl.c:890)
+do_syscall_64 (arch/x86/entry/common.c:83)
+entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
+
+Compare node ids with sane bounds first. NUMA_NO_NODE is considered a
+special valid case meaning that benchmarking kthreads won't be bound to a
+cpuset of a given node.
+
+Found by Linux Verification Center (linuxtesting.org).
+
+Fixes: 65789daa8087 ("dma-mapping: add benchmark support for streaming DMA APIs")
+Signed-off-by: Fedor Pchelkin <pchelkin@ispras.ru>
+Reviewed-by: Robin Murphy <robin.murphy@arm.com>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/dma/map_benchmark.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c
+index 2478957cf9f83..59fb3f849b351 100644
+--- a/kernel/dma/map_benchmark.c
++++ b/kernel/dma/map_benchmark.c
+@@ -212,7 +212,8 @@ static long map_benchmark_ioctl(struct file *file, unsigned int cmd,
+               }
+               if (map->bparam.node != NUMA_NO_NODE &&
+-                  !node_possible(map->bparam.node)) {
++                  (map->bparam.node < 0 || map->bparam.node >= MAX_NUMNODES ||
++                   !node_possible(map->bparam.node))) {
+                       pr_err("invalid numa node\n");
+                       return -EINVAL;
+               }
+-- 
+2.43.0
+
diff --git a/queue-6.9/dma-mapping-benchmark-fix-up-kthread-related-error-h.patch b/queue-6.9/dma-mapping-benchmark-fix-up-kthread-related-error-h.patch
new file mode 100644 (file)
index 0000000..ad1c5dc
--- /dev/null
@@ -0,0 +1,78 @@
+From 34883b251cd1c66ce17e46d6f6eb8a0961838e2f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 4 May 2024 14:47:01 +0300
+Subject: dma-mapping: benchmark: fix up kthread-related error handling
+
+From: Fedor Pchelkin <pchelkin@ispras.ru>
+
+[ Upstream commit bb9025f4432f8c158322cf2c04c2b492f23eb511 ]
+
+kthread creation failure is invalidly handled inside do_map_benchmark().
+The put_task_struct() calls on the error path are supposed to balance the
+get_task_struct() calls which only happen after all the kthreads are
+successfully created. Rollback using kthread_stop() for already created
+kthreads in case of such failure.
+
+In normal situation call kthread_stop_put() to gracefully stop kthreads
+and put their task refcounts. This should be done for all started
+kthreads.
+
+Found by Linux Verification Center (linuxtesting.org).
+
+Fixes: 65789daa8087 ("dma-mapping: add benchmark support for streaming DMA APIs")
+Suggested-by: Robin Murphy <robin.murphy@arm.com>
+Signed-off-by: Fedor Pchelkin <pchelkin@ispras.ru>
+Reviewed-by: Robin Murphy <robin.murphy@arm.com>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/dma/map_benchmark.c | 16 ++++++++++------
+ 1 file changed, 10 insertions(+), 6 deletions(-)
+
+diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c
+index 02205ab53b7e9..2478957cf9f83 100644
+--- a/kernel/dma/map_benchmark.c
++++ b/kernel/dma/map_benchmark.c
+@@ -118,6 +118,8 @@ static int do_map_benchmark(struct map_benchmark_data *map)
+               if (IS_ERR(tsk[i])) {
+                       pr_err("create dma_map thread failed\n");
+                       ret = PTR_ERR(tsk[i]);
++                      while (--i >= 0)
++                              kthread_stop(tsk[i]);
+                       goto out;
+               }
+@@ -139,13 +141,17 @@ static int do_map_benchmark(struct map_benchmark_data *map)
+       msleep_interruptible(map->bparam.seconds * 1000);
+-      /* wait for the completion of benchmark threads */
++      /* wait for the completion of all started benchmark threads */
+       for (i = 0; i < threads; i++) {
+-              ret = kthread_stop(tsk[i]);
+-              if (ret)
+-                      goto out;
++              int kthread_ret = kthread_stop_put(tsk[i]);
++
++              if (kthread_ret)
++                      ret = kthread_ret;
+       }
++      if (ret)
++              goto out;
++
+       loops = atomic64_read(&map->loops);
+       if (likely(loops > 0)) {
+               u64 map_variance, unmap_variance;
+@@ -170,8 +176,6 @@ static int do_map_benchmark(struct map_benchmark_data *map)
+       }
+ out:
+-      for (i = 0; i < threads; i++)
+-              put_task_struct(tsk[i]);
+       put_device(map->dev);
+       kfree(tsk);
+       return ret;
+-- 
+2.43.0
+
diff --git a/queue-6.9/dma-mapping-benchmark-handle-numa_no_node-correctly.patch b/queue-6.9/dma-mapping-benchmark-handle-numa_no_node-correctly.patch
new file mode 100644 (file)
index 0000000..2a89123
--- /dev/null
@@ -0,0 +1,70 @@
+From 24a42eda2197ef832c9c962dc0bb779afc4a1e55 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 4 May 2024 14:47:04 +0300
+Subject: dma-mapping: benchmark: handle NUMA_NO_NODE correctly
+
+From: Fedor Pchelkin <pchelkin@ispras.ru>
+
+[ Upstream commit e64746e74f717961250a155e14c156616fcd981f ]
+
+cpumask_of_node() can be called for NUMA_NO_NODE inside do_map_benchmark()
+resulting in the following sanitizer report:
+
+UBSAN: array-index-out-of-bounds in ./arch/x86/include/asm/topology.h:72:28
+index -1 is out of range for type 'cpumask [64][1]'
+CPU: 1 PID: 990 Comm: dma_map_benchma Not tainted 6.9.0-rc6 #29
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996)
+Call Trace:
+ <TASK>
+dump_stack_lvl (lib/dump_stack.c:117)
+ubsan_epilogue (lib/ubsan.c:232)
+__ubsan_handle_out_of_bounds (lib/ubsan.c:429)
+cpumask_of_node (arch/x86/include/asm/topology.h:72) [inline]
+do_map_benchmark (kernel/dma/map_benchmark.c:104)
+map_benchmark_ioctl (kernel/dma/map_benchmark.c:246)
+full_proxy_unlocked_ioctl (fs/debugfs/file.c:333)
+__x64_sys_ioctl (fs/ioctl.c:890)
+do_syscall_64 (arch/x86/entry/common.c:83)
+entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
+
+Use cpumask_of_node() in place when binding a kernel thread to a cpuset
+of a particular node.
+
+Note that the provided node id is checked inside map_benchmark_ioctl().
+It's just a NUMA_NO_NODE case which is not handled properly later.
+
+Found by Linux Verification Center (linuxtesting.org).
+
+Fixes: 65789daa8087 ("dma-mapping: add benchmark support for streaming DMA APIs")
+Signed-off-by: Fedor Pchelkin <pchelkin@ispras.ru>
+Acked-by: Barry Song <baohua@kernel.org>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/dma/map_benchmark.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c
+index 59fb3f849b351..f7f3d14fa69a7 100644
+--- a/kernel/dma/map_benchmark.c
++++ b/kernel/dma/map_benchmark.c
+@@ -101,7 +101,6 @@ static int do_map_benchmark(struct map_benchmark_data *map)
+       struct task_struct **tsk;
+       int threads = map->bparam.threads;
+       int node = map->bparam.node;
+-      const cpumask_t *cpu_mask = cpumask_of_node(node);
+       u64 loops;
+       int ret = 0;
+       int i;
+@@ -124,7 +123,7 @@ static int do_map_benchmark(struct map_benchmark_data *map)
+               }
+               if (node != NUMA_NO_NODE)
+-                      kthread_bind_mask(tsk[i], cpu_mask);
++                      kthread_bind_mask(tsk[i], cpumask_of_node(node));
+       }
+       /* clear the old value in the previous benchmark */
+-- 
+2.43.0
+
diff --git a/queue-6.9/drm-amd-display-enable-colorspace-property-for-mst-c.patch b/queue-6.9/drm-amd-display-enable-colorspace-property-for-mst-c.patch
new file mode 100644 (file)
index 0000000..41fbb58
--- /dev/null
@@ -0,0 +1,45 @@
+From 6ada8c237505f949a822a0d0865ae4b327e0119b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 8 May 2024 16:45:35 -0500
+Subject: drm/amd/display: Enable colorspace property for MST connectors
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mario Limonciello <mario.limonciello@amd.com>
+
+[ Upstream commit 8195979d2dd995d60c2663adf54c69c1bf4eadd1 ]
+
+MST colorspace property support was disabled due to a series of warnings
+that came up when the device was plugged in since the properties weren't
+made at device creation. Create the properties in advance instead.
+
+Suggested-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
+Fixes: 69a959610229 ("drm/amd/display: Temporary Disable MST DP Colorspace Property").
+Reported-and-tested-by: Tyler Schneider <tyler.schneider@amd.com>
+Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3353
+Reviewed-by: Harry Wentland <harry.wentland@amd.com>
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+index cb31a699c6622..1a269099f19f8 100644
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+@@ -613,6 +613,9 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
+               &connector->base,
+               dev->mode_config.tile_property,
+               0);
++      connector->colorspace_property = master->base.colorspace_property;
++      if (connector->colorspace_property)
++              drm_connector_attach_colorspace_property(connector);
+       drm_connector_set_path_property(connector, pathprop);
+-- 
+2.43.0
+
diff --git a/queue-6.9/drm-amdgpu-adjust-logic-in-amdgpu_device_partner_ban.patch b/queue-6.9/drm-amdgpu-adjust-logic-in-amdgpu_device_partner_ban.patch
new file mode 100644 (file)
index 0000000..146de9d
--- /dev/null
@@ -0,0 +1,57 @@
+From db02ae4ff398fa62ae1a430ea989b49e57648bad Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 May 2024 11:25:49 -0400
+Subject: drm/amdgpu: Adjust logic in amdgpu_device_partner_bandwidth()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Alex Deucher <alexander.deucher@amd.com>
+
+[ Upstream commit ba46b3bda296c4f82b061ac40b90f49d2a00a380 ]
+
+Use current speed/width on devices which don't support
+dynamic PCIe switching.
+
+Fixes: 466a7d115326 ("drm/amd: Use the first non-dGPU PCI device for BW limits")
+Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3289
+Acked-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 19 ++++++++++++-------
+ 1 file changed, 12 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+index 7753a2e64d411..941d6e379b8a6 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+@@ -5809,13 +5809,18 @@ static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
+       *speed = PCI_SPEED_UNKNOWN;
+       *width = PCIE_LNK_WIDTH_UNKNOWN;
+-      while ((parent = pci_upstream_bridge(parent))) {
+-              /* skip upstream/downstream switches internal to dGPU*/
+-              if (parent->vendor == PCI_VENDOR_ID_ATI)
+-                      continue;
+-              *speed = pcie_get_speed_cap(parent);
+-              *width = pcie_get_width_cap(parent);
+-              break;
++      if (amdgpu_device_pcie_dynamic_switching_supported(adev)) {
++              while ((parent = pci_upstream_bridge(parent))) {
++                      /* skip upstream/downstream switches internal to dGPU*/
++                      if (parent->vendor == PCI_VENDOR_ID_ATI)
++                              continue;
++                      *speed = pcie_get_speed_cap(parent);
++                      *width = pcie_get_width_cap(parent);
++                      break;
++              }
++      } else {
++              /* use the current speeds rather than max if switching is not supported */
++              pcie_bandwidth_available(adev->pdev, NULL, speed, width);
+       }
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.9/drm-i915-gt-fix-ccs-id-s-calculation-for-ccs-mode-se.patch b/queue-6.9/drm-i915-gt-fix-ccs-id-s-calculation-for-ccs-mode-se.patch
new file mode 100644 (file)
index 0000000..7c46a15
--- /dev/null
@@ -0,0 +1,98 @@
+From f310e66aa3a8500712de6bc6f765d2be702fb388 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 17 May 2024 11:06:16 +0200
+Subject: drm/i915/gt: Fix CCS id's calculation for CCS mode setting
+
+From: Andi Shyti <andi.shyti@linux.intel.com>
+
+[ Upstream commit ee01b6a386eaf9984b58a2476e8f531149679da9 ]
+
+The whole point of the previous fixes has been to change the CCS
+hardware configuration to generate only one stream available to
+the compute users. We did this by changing the info.engine_mask
+that is set during device probe, reset during the detection of
+the fused engines, and finally reset again when choosing the CCS
+mode.
+
+We can't use the engine_mask variable anymore, as with the
+current configuration, it imposes only one CCS no matter what the
+hardware configuration is.
+
+Before changing the engine_mask for the third time, save it and
+use it for calculating the CCS mode.
+
+After the previous changes, the user reported a performance drop
+to around 1/4. We have tested that the compute operations, with
+the current patch, have improved by the same factor.
+
+Fixes: 6db31251bb26 ("drm/i915/gt: Enable only one CCS for compute workload")
+Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
+Cc: Chris Wilson <chris.p.wilson@linux.intel.com>
+Cc: Gnattu OC <gnattuoc@me.com>
+Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
+Cc: Matt Roper <matthew.d.roper@intel.com>
+Tested-by: Jian Ye <jian.ye@intel.com>
+Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
+Tested-by: Gnattu OC <gnattuoc@me.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240517090616.242529-1-andi.shyti@linux.intel.com
+(cherry picked from commit a09d2327a9ba8e3f5be238bc1b7ca2809255b464)
+Signed-off-by: Jani Nikula <jani.nikula@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/i915/gt/intel_engine_cs.c   | 6 ++++++
+ drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c | 2 +-
+ drivers/gpu/drm/i915/gt/intel_gt_types.h    | 8 ++++++++
+ 3 files changed, 15 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+index 7a6dc371c384e..bc6209df0f680 100644
+--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
++++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+@@ -919,6 +919,12 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
+       if (IS_DG2(gt->i915)) {
+               u8 first_ccs = __ffs(CCS_MASK(gt));
++              /*
++               * Store the number of active cslices before
++               * changing the CCS engine configuration
++               */
++              gt->ccs.cslices = CCS_MASK(gt);
++
+               /* Mask off all the CCS engine */
+               info->engine_mask &= ~GENMASK(CCS3, CCS0);
+               /* Put back in the first CCS engine */
+diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
+index 99b71bb7da0a6..3c62a44e9106c 100644
+--- a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
++++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
+@@ -19,7 +19,7 @@ unsigned int intel_gt_apply_ccs_mode(struct intel_gt *gt)
+       /* Build the value for the fixed CCS load balancing */
+       for (cslice = 0; cslice < I915_MAX_CCS; cslice++) {
+-              if (CCS_MASK(gt) & BIT(cslice))
++              if (gt->ccs.cslices & BIT(cslice))
+                       /*
+                        * If available, assign the cslice
+                        * to the first available engine...
+diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
+index def7dd0eb6f19..cfdd2ad5e9549 100644
+--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
++++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
+@@ -207,6 +207,14 @@ struct intel_gt {
+                                           [MAX_ENGINE_INSTANCE + 1];
+       enum intel_submission_method submission_method;
++      struct {
++              /*
++               * Mask of the non fused CCS slices
++               * to be used for the load balancing
++               */
++              intel_engine_mask_t cslices;
++      } ccs;
++
+       /*
+        * Default address space (either GGTT or ppGTT depending on arch).
+        *
+-- 
+2.43.0
+
diff --git a/queue-6.9/drm-i915-guc-avoid-field_prep-warning.patch b/queue-6.9/drm-i915-guc-avoid-field_prep-warning.patch
new file mode 100644 (file)
index 0000000..e79b719
--- /dev/null
@@ -0,0 +1,58 @@
+From 661c384f1ae09aeebf2bd779599b6849d887a385 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 30 Apr 2024 09:48:09 -0700
+Subject: drm/i915/guc: avoid FIELD_PREP warning
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+[ Upstream commit d4f36db62396b73bed383c0b6e48d36278cafa78 ]
+
+With gcc-7 and earlier, there are lots of warnings like
+
+In file included from <command-line>:0:0:
+In function '__guc_context_policy_add_priority.isra.66',
+    inlined from '__guc_context_set_prio.isra.67' at drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:3292:3,
+    inlined from 'guc_context_set_prio' at drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:3320:2:
+include/linux/compiler_types.h:399:38: error: call to '__compiletime_assert_631' declared with attribute error: FIELD_PREP: mask is not constant
+  _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
+                                      ^
+...
+drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:2422:3: note: in expansion of macro 'FIELD_PREP'
+   FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \
+   ^~~~~~~~~~
+
+Make sure that GUC_KLV_0_KEY is an unsigned value to avoid the warning.
+
+Fixes: 77b6f79df66e ("drm/i915/guc: Update to GuC version 69.0.3")
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Reviewed-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
+Signed-off-by: Julia Filipchuk <julia.filipchuk@intel.com>
+Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240430164809.482131-1-julia.filipchuk@intel.com
+(cherry picked from commit 364e039827ef628c650c21c1afe1c54d9c3296d9)
+Signed-off-by: Jani Nikula <jani.nikula@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
+index 58012edd4eb0e..4f4f53c42a9c5 100644
+--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
++++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
+@@ -29,9 +29,9 @@
+  */
+ #define GUC_KLV_LEN_MIN                               1u
+-#define GUC_KLV_0_KEY                         (0xffff << 16)
+-#define GUC_KLV_0_LEN                         (0xffff << 0)
+-#define GUC_KLV_n_VALUE                               (0xffffffff << 0)
++#define GUC_KLV_0_KEY                         (0xffffu << 16)
++#define GUC_KLV_0_LEN                         (0xffffu << 0)
++#define GUC_KLV_n_VALUE                               (0xffffffffu << 0)
+ /**
+  * DOC: GuC Self Config KLVs
+-- 
+2.43.0
+
diff --git a/queue-6.9/drm-panel-sitronix-st7789v-fix-display-size-for-jt24.patch b/queue-6.9/drm-panel-sitronix-st7789v-fix-display-size-for-jt24.patch
new file mode 100644 (file)
index 0000000..e97fea0
--- /dev/null
@@ -0,0 +1,41 @@
+From 09eec7977d3a8a3daa3dd5b823294899f876a8b4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 29 May 2024 16:42:47 +0200
+Subject: drm/panel: sitronix-st7789v: fix display size for jt240mhqs_hwt_ek_e3
+ panel
+
+From: Gerald Loacker <gerald.loacker@wolfvision.net>
+
+[ Upstream commit b62c150c3bae72ac1910dcc588f360159eb0744a ]
+
+This is a portrait mode display. Change the dimensions accordingly.
+
+Fixes: 0fbbe96bfa08 ("drm/panel: sitronix-st7789v: add jasonic jt240mhqs-hwt-ek-e3 support")
+Signed-off-by: Gerald Loacker <gerald.loacker@wolfvision.net>
+Acked-by: Jessica Zhang <quic_jesszhan@quicinc.com>
+Link: https://lore.kernel.org/r/20240409-bugfix-jt240mhqs_hwt_ek_e3-timing-v2-3-e4821802443d@wolfvision.net
+Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240409-bugfix-jt240mhqs_hwt_ek_e3-timing-v2-3-e4821802443d@wolfvision.net
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/panel/panel-sitronix-st7789v.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/gpu/drm/panel/panel-sitronix-st7789v.c b/drivers/gpu/drm/panel/panel-sitronix-st7789v.c
+index c7e3f1280404d..e8f385b9c6182 100644
+--- a/drivers/gpu/drm/panel/panel-sitronix-st7789v.c
++++ b/drivers/gpu/drm/panel/panel-sitronix-st7789v.c
+@@ -289,8 +289,8 @@ static const struct drm_display_mode jt240mhqs_hwt_ek_e3_mode = {
+       .vsync_start = 280 + 48,
+       .vsync_end = 280 + 48 + 4,
+       .vtotal = 280 + 48 + 4 + 4,
+-      .width_mm = 43,
+-      .height_mm = 37,
++      .width_mm = 37,
++      .height_mm = 43,
+       .flags = DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC,
+ };
+-- 
+2.43.0
+
diff --git a/queue-6.9/drm-panel-sitronix-st7789v-fix-timing-for-jt240mhqs_.patch b/queue-6.9/drm-panel-sitronix-st7789v-fix-timing-for-jt240mhqs_.patch
new file mode 100644 (file)
index 0000000..7b4d259
--- /dev/null
@@ -0,0 +1,44 @@
+From 76ae1ea965dc35414266e3ce023bbe999b2f5495 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 29 May 2024 16:42:45 +0200
+Subject: drm/panel: sitronix-st7789v: fix timing for jt240mhqs_hwt_ek_e3 panel
+
+From: Gerald Loacker <gerald.loacker@wolfvision.net>
+
+[ Upstream commit 0e5895ff7fab0fc05ec17daf9a568368828fa6ea ]
+
+Flickering was observed when using partial mode. Moving the vsync to the
+same position as used by the default sitronix-st7789v timing resolves this
+issue.
+
+Fixes: 0fbbe96bfa08 ("drm/panel: sitronix-st7789v: add jasonic jt240mhqs-hwt-ek-e3 support")
+Acked-by: Jessica Zhang <quic_jesszhan@quicinc.com>
+Signed-off-by: Gerald Loacker <gerald.loacker@wolfvision.net>
+Link: https://lore.kernel.org/r/20240409-bugfix-jt240mhqs_hwt_ek_e3-timing-v2-1-e4821802443d@wolfvision.net
+Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240409-bugfix-jt240mhqs_hwt_ek_e3-timing-v2-1-e4821802443d@wolfvision.net
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/panel/panel-sitronix-st7789v.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/gpu/drm/panel/panel-sitronix-st7789v.c b/drivers/gpu/drm/panel/panel-sitronix-st7789v.c
+index 88e80fe98112d..32e5c03480381 100644
+--- a/drivers/gpu/drm/panel/panel-sitronix-st7789v.c
++++ b/drivers/gpu/drm/panel/panel-sitronix-st7789v.c
+@@ -286,9 +286,9 @@ static const struct drm_display_mode jt240mhqs_hwt_ek_e3_mode = {
+       .hsync_end = 240 + 28 + 10,
+       .htotal = 240 + 28 + 10 + 10,
+       .vdisplay = 280,
+-      .vsync_start = 280 + 8,
+-      .vsync_end = 280 + 8 + 4,
+-      .vtotal = 280 + 8 + 4 + 4,
++      .vsync_start = 280 + 48,
++      .vsync_end = 280 + 48 + 4,
++      .vtotal = 280 + 48 + 4 + 4,
+       .width_mm = 43,
+       .height_mm = 37,
+       .flags = DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC,
+-- 
+2.43.0
+
diff --git a/queue-6.9/drm-panel-sitronix-st7789v-tweak-timing-for-jt240mhq.patch b/queue-6.9/drm-panel-sitronix-st7789v-tweak-timing-for-jt240mhq.patch
new file mode 100644 (file)
index 0000000..621a592
--- /dev/null
@@ -0,0 +1,44 @@
+From c70ab474bd15b39fbd365bea340edc7c8be700fa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 29 May 2024 16:42:46 +0200
+Subject: drm/panel: sitronix-st7789v: tweak timing for jt240mhqs_hwt_ek_e3
+ panel
+
+From: Gerald Loacker <gerald.loacker@wolfvision.net>
+
+[ Upstream commit 2ba50582634d0bfe3a333ab7575a7f0122a7cde8 ]
+
+Use the default timing parameters to get a refresh rate of about 60 Hz for
+a clock of 6 MHz.
+
+Fixes: 0fbbe96bfa08 ("drm/panel: sitronix-st7789v: add jasonic jt240mhqs-hwt-ek-e3 support")
+Signed-off-by: Gerald Loacker <gerald.loacker@wolfvision.net>
+Acked-by: Jessica Zhang <quic_jesszhan@quicinc.com>
+Link: https://lore.kernel.org/r/20240409-bugfix-jt240mhqs_hwt_ek_e3-timing-v2-2-e4821802443d@wolfvision.net
+Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240409-bugfix-jt240mhqs_hwt_ek_e3-timing-v2-2-e4821802443d@wolfvision.net
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/panel/panel-sitronix-st7789v.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/gpu/drm/panel/panel-sitronix-st7789v.c b/drivers/gpu/drm/panel/panel-sitronix-st7789v.c
+index 32e5c03480381..c7e3f1280404d 100644
+--- a/drivers/gpu/drm/panel/panel-sitronix-st7789v.c
++++ b/drivers/gpu/drm/panel/panel-sitronix-st7789v.c
+@@ -282,9 +282,9 @@ static const struct drm_display_mode et028013dma_mode = {
+ static const struct drm_display_mode jt240mhqs_hwt_ek_e3_mode = {
+       .clock = 6000,
+       .hdisplay = 240,
+-      .hsync_start = 240 + 28,
+-      .hsync_end = 240 + 28 + 10,
+-      .htotal = 240 + 28 + 10 + 10,
++      .hsync_start = 240 + 38,
++      .hsync_end = 240 + 38 + 10,
++      .htotal = 240 + 38 + 10 + 10,
+       .vdisplay = 280,
+       .vsync_start = 280 + 48,
+       .vsync_end = 280 + 48 + 4,
+-- 
+2.43.0
+
diff --git a/queue-6.9/drm-xe-add-dbg-messages-on-the-suspend-resume-functi.patch b/queue-6.9/drm-xe-add-dbg-messages-on-the-suspend-resume-functi.patch
new file mode 100644 (file)
index 0000000..303f109
--- /dev/null
@@ -0,0 +1,108 @@
+From 99d14d03750ebc85594f34184491ebd956f26bde Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Mar 2024 14:01:41 -0400
+Subject: drm/xe: Add dbg messages on the suspend resume functions.
+
+From: Rodrigo Vivi <rodrigo.vivi@intel.com>
+
+[ Upstream commit f7f24b7950af4b1548ad5075ddb13eeb333bb782 ]
+
+In case of the suspend/resume flow getting locked up we
+can get reports with some useful hints on where it might
+get locked and if that has failed.
+
+Reviewed-by: Matthew Auld <matthew.auld@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240318180141.267458-2-rodrigo.vivi@intel.com
+Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Stable-dep-of: 77b79df0268b ("drm/xe: Change pcode timeout to 50msec while polling again")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/xe/xe_pm.c | 22 +++++++++++++++++-----
+ 1 file changed, 17 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
+index 53b3b0b019acd..669b626c06c22 100644
+--- a/drivers/gpu/drm/xe/xe_pm.c
++++ b/drivers/gpu/drm/xe/xe_pm.c
+@@ -54,13 +54,15 @@ int xe_pm_suspend(struct xe_device *xe)
+       u8 id;
+       int err;
++      drm_dbg(&xe->drm, "Suspending device\n");
++
+       for_each_gt(gt, xe, id)
+               xe_gt_suspend_prepare(gt);
+       /* FIXME: Super racey... */
+       err = xe_bo_evict_all(xe);
+       if (err)
+-              return err;
++              goto err;
+       xe_display_pm_suspend(xe);
+@@ -68,7 +70,7 @@ int xe_pm_suspend(struct xe_device *xe)
+               err = xe_gt_suspend(gt);
+               if (err) {
+                       xe_display_pm_resume(xe);
+-                      return err;
++                      goto err;
+               }
+       }
+@@ -76,7 +78,11 @@ int xe_pm_suspend(struct xe_device *xe)
+       xe_display_pm_suspend_late(xe);
++      drm_dbg(&xe->drm, "Device suspended\n");
+       return 0;
++err:
++      drm_dbg(&xe->drm, "Device suspend failed %d\n", err);
++      return err;
+ }
+ /**
+@@ -92,13 +98,15 @@ int xe_pm_resume(struct xe_device *xe)
+       u8 id;
+       int err;
++      drm_dbg(&xe->drm, "Resuming device\n");
++
+       for_each_tile(tile, xe, id)
+               xe_wa_apply_tile_workarounds(tile);
+       for_each_gt(gt, xe, id) {
+               err = xe_pcode_init(gt);
+               if (err)
+-                      return err;
++                      goto err;
+       }
+       xe_display_pm_resume_early(xe);
+@@ -109,7 +117,7 @@ int xe_pm_resume(struct xe_device *xe)
+        */
+       err = xe_bo_restore_kernel(xe);
+       if (err)
+-              return err;
++              goto err;
+       xe_irq_resume(xe);
+@@ -120,9 +128,13 @@ int xe_pm_resume(struct xe_device *xe)
+       err = xe_bo_restore_user(xe);
+       if (err)
+-              return err;
++              goto err;
++      drm_dbg(&xe->drm, "Device resumed\n");
+       return 0;
++err:
++      drm_dbg(&xe->drm, "Device resume failed %d\n", err);
++      return err;
+ }
+ static bool xe_pm_pci_d3cold_capable(struct xe_device *xe)
+-- 
+2.43.0
+
diff --git a/queue-6.9/drm-xe-change-pcode-timeout-to-50msec-while-polling-.patch b/queue-6.9/drm-xe-change-pcode-timeout-to-50msec-while-polling-.patch
new file mode 100644 (file)
index 0000000..97b7b27
--- /dev/null
@@ -0,0 +1,52 @@
+From 2125724e4128593da5b6dc6d2fd3fb6306ba3580 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 8 May 2024 20:52:15 +0530
+Subject: drm/xe: Change pcode timeout to 50msec while polling again
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
+
+[ Upstream commit 77b79df0268bee3ef38fd5e76e86a076ce02995d ]
+
+Polling is initially attempted with timeout_base_ms enabled for
+preemption, and if it exceeds this timeframe, another attempt is made
+without preemption, allowing an additional 50 ms before timing out.
+
+v2
+- Rebase
+
+v3
+- Move warnings to separate patch (Lucas)
+
+Cc: Lucas De Marchi <lucas.demarchi@intel.com>
+Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
+Fixes: 7dc9b92dcfef ("drm/xe: Remove i915_utils dependency from xe_pcode.")
+Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240508152216.3263109-2-himal.prasad.ghimiray@intel.com
+Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
+(cherry picked from commit c81858eb52266b3d6ba28ca4f62a198231a10cdc)
+Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/xe/xe_pcode.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c
+index c674c87c7f40b..81f4ae2ea08f3 100644
+--- a/drivers/gpu/drm/xe/xe_pcode.c
++++ b/drivers/gpu/drm/xe/xe_pcode.c
+@@ -191,7 +191,7 @@ int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request,
+       drm_WARN_ON_ONCE(&gt_to_xe(gt)->drm, timeout_base_ms > 1);
+       preempt_disable();
+       ret = pcode_try_request(gt, mbox, request, reply_mask, reply, &status,
+-                              true, timeout_base_ms * 1000, true);
++                              true, 50 * 1000, true);
+       preempt_enable();
+ out:
+-- 
+2.43.0
+
diff --git a/queue-6.9/drm-xe-check-pcode-init-status-only-on-root-gt-of-ro.patch b/queue-6.9/drm-xe-check-pcode-init-status-only-on-root-gt-of-ro.patch
new file mode 100644 (file)
index 0000000..63defb9
--- /dev/null
@@ -0,0 +1,348 @@
+From 3fa29436b1ea377173b46464e2b16a30663d11ea Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 10 Apr 2024 14:20:04 +0530
+Subject: drm/xe: check pcode init status only on root gt of root tile
+
+From: Riana Tauro <riana.tauro@intel.com>
+
+[ Upstream commit 933fd5ffaf87a60a019992d48e3a96b5c3403d9f ]
+
+The root tile indicates the pcode initialization is complete
+when all tiles have completed their initialization.
+So the mailbox can be polled only on the root tile.
+Check pcode init status only on root tile and move it to
+device probe early as root tile is initialized there.
+Also make similar changes in resume paths.
+
+v2: add lock/unlocked version of pcode_mailbox_rw
+    to allow pcode init to be called in device
+    early probe (Rodrigo)
+
+v3: add code description about using root tile
+    change function names to xe_pcode_probe_early
+    and xe_pcode_init (Rodrigo)
+
+Signed-off-by: Riana Tauro <riana.tauro@intel.com>
+Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240410085005.1126343-2-riana.tauro@intel.com
+Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Stable-dep-of: 77b79df0268b ("drm/xe: Change pcode timeout to 50msec while polling again")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/xe/xe_device.c |  21 ++++--
+ drivers/gpu/drm/xe/xe_pcode.c  | 115 ++++++++++++++++++++-------------
+ drivers/gpu/drm/xe/xe_pcode.h  |   6 +-
+ drivers/gpu/drm/xe/xe_pm.c     |  16 ++---
+ 4 files changed, 94 insertions(+), 64 deletions(-)
+
+diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
+index d32ff3857e658..b3b37ed832ca0 100644
+--- a/drivers/gpu/drm/xe/xe_device.c
++++ b/drivers/gpu/drm/xe/xe_device.c
+@@ -389,8 +389,14 @@ static int xe_set_dma_info(struct xe_device *xe)
+       return err;
+ }
+-/*
+- * Initialize MMIO resources that don't require any knowledge about tile count.
++/**
++ * xe_device_probe_early: Device early probe
++ * @xe: xe device instance
++ *
++ * Initialize MMIO resources that don't require any
++ * knowledge about tile count. Also initialize pcode
++ *
++ * Return: 0 on success, error code on failure
+  */
+ int xe_device_probe_early(struct xe_device *xe)
+ {
+@@ -404,6 +410,10 @@ int xe_device_probe_early(struct xe_device *xe)
+       if (err)
+               return err;
++      err = xe_pcode_probe_early(xe);
++      if (err)
++              return err;
++
+       return 0;
+ }
+@@ -482,11 +492,8 @@ int xe_device_probe(struct xe_device *xe)
+       if (err)
+               return err;
+-      for_each_gt(gt, xe, id) {
+-              err = xe_pcode_probe(gt);
+-              if (err)
+-                      return err;
+-      }
++      for_each_gt(gt, xe, id)
++              xe_pcode_init(gt);
+       err = xe_display_init_noirq(xe);
+       if (err)
+diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c
+index b324dc2a5debe..c674c87c7f40b 100644
+--- a/drivers/gpu/drm/xe/xe_pcode.c
++++ b/drivers/gpu/drm/xe/xe_pcode.c
+@@ -10,6 +10,7 @@
+ #include <drm/drm_managed.h>
++#include "xe_device.h"
+ #include "xe_gt.h"
+ #include "xe_mmio.h"
+ #include "xe_pcode_api.h"
+@@ -43,8 +44,6 @@ static int pcode_mailbox_status(struct xe_gt *gt)
+               [PCODE_ERROR_MASK] = {-EPROTO, "Unknown"},
+       };
+-      lockdep_assert_held(&gt->pcode.lock);
+-
+       err = xe_mmio_read32(gt, PCODE_MAILBOX) & PCODE_ERROR_MASK;
+       if (err) {
+               drm_err(&gt_to_xe(gt)->drm, "PCODE Mailbox failed: %d %s", err,
+@@ -55,17 +54,15 @@ static int pcode_mailbox_status(struct xe_gt *gt)
+       return 0;
+ }
+-static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1,
+-                          unsigned int timeout_ms, bool return_data,
+-                          bool atomic)
++static int __pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1,
++                            unsigned int timeout_ms, bool return_data,
++                            bool atomic)
+ {
+       int err;
+       if (gt_to_xe(gt)->info.skip_pcode)
+               return 0;
+-      lockdep_assert_held(&gt->pcode.lock);
+-
+       if ((xe_mmio_read32(gt, PCODE_MAILBOX) & PCODE_READY) != 0)
+               return -EAGAIN;
+@@ -87,6 +84,18 @@ static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1,
+       return pcode_mailbox_status(gt);
+ }
++static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1,
++                          unsigned int timeout_ms, bool return_data,
++                          bool atomic)
++{
++      if (gt_to_xe(gt)->info.skip_pcode)
++              return 0;
++
++      lockdep_assert_held(&gt->pcode.lock);
++
++      return __pcode_mailbox_rw(gt, mbox, data0, data1, timeout_ms, return_data, atomic);
++}
++
+ int xe_pcode_write_timeout(struct xe_gt *gt, u32 mbox, u32 data, int timeout)
+ {
+       int err;
+@@ -109,15 +118,19 @@ int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1)
+       return err;
+ }
+-static int xe_pcode_try_request(struct xe_gt *gt, u32 mbox,
+-                              u32 request, u32 reply_mask, u32 reply,
+-                              u32 *status, bool atomic, int timeout_us)
++static int pcode_try_request(struct xe_gt *gt, u32 mbox,
++                           u32 request, u32 reply_mask, u32 reply,
++                           u32 *status, bool atomic, int timeout_us, bool locked)
+ {
+       int slept, wait = 10;
+       for (slept = 0; slept < timeout_us; slept += wait) {
+-              *status = pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true,
+-                                         atomic);
++              if (locked)
++                      *status = pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true,
++                                                 atomic);
++              else
++                      *status = __pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true,
++                                                   atomic);
+               if ((*status == 0) && ((request & reply_mask) == reply))
+                       return 0;
+@@ -158,8 +171,8 @@ int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request,
+       mutex_lock(&gt->pcode.lock);
+-      ret = xe_pcode_try_request(gt, mbox, request, reply_mask, reply, &status,
+-                                 false, timeout_base_ms * 1000);
++      ret = pcode_try_request(gt, mbox, request, reply_mask, reply, &status,
++                              false, timeout_base_ms * 1000, true);
+       if (!ret)
+               goto out;
+@@ -177,8 +190,8 @@ int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request,
+               "PCODE timeout, retrying with preemption disabled\n");
+       drm_WARN_ON_ONCE(&gt_to_xe(gt)->drm, timeout_base_ms > 1);
+       preempt_disable();
+-      ret = xe_pcode_try_request(gt, mbox, request, reply_mask, reply, &status,
+-                                 true, timeout_base_ms * 1000);
++      ret = pcode_try_request(gt, mbox, request, reply_mask, reply, &status,
++                              true, timeout_base_ms * 1000, true);
+       preempt_enable();
+ out:
+@@ -238,59 +251,71 @@ int xe_pcode_init_min_freq_table(struct xe_gt *gt, u32 min_gt_freq,
+ }
+ /**
+- * xe_pcode_init - Ensure PCODE is initialized
+- * @gt: gt instance
++ * xe_pcode_ready - Ensure PCODE is initialized
++ * @xe: xe instance
++ * @locked: true if lock held, false otherwise
+  *
+- * This function ensures that PCODE is properly initialized. To be called during
+- * probe and resume paths.
++ * PCODE init mailbox is polled only on root gt of root tile
++ * as the root tile provides the initialization is complete only
++ * after all the tiles have completed the initialization.
++ * Called only on early probe without locks and with locks in
++ * resume path.
+  *
+- * It returns 0 on success, and -error number on failure.
++ * Returns 0 on success, and -error number on failure.
+  */
+-int xe_pcode_init(struct xe_gt *gt)
++int xe_pcode_ready(struct xe_device *xe, bool locked)
+ {
+       u32 status, request = DGFX_GET_INIT_STATUS;
++      struct xe_gt *gt = xe_root_mmio_gt(xe);
+       int timeout_us = 180000000; /* 3 min */
+       int ret;
+-      if (gt_to_xe(gt)->info.skip_pcode)
++      if (xe->info.skip_pcode)
+               return 0;
+-      if (!IS_DGFX(gt_to_xe(gt)))
++      if (!IS_DGFX(xe))
+               return 0;
+-      mutex_lock(&gt->pcode.lock);
+-      ret = xe_pcode_try_request(gt, DGFX_PCODE_STATUS, request,
+-                                 DGFX_INIT_STATUS_COMPLETE,
+-                                 DGFX_INIT_STATUS_COMPLETE,
+-                                 &status, false, timeout_us);
+-      mutex_unlock(&gt->pcode.lock);
++      if (locked)
++              mutex_lock(&gt->pcode.lock);
++
++      ret = pcode_try_request(gt, DGFX_PCODE_STATUS, request,
++                              DGFX_INIT_STATUS_COMPLETE,
++                              DGFX_INIT_STATUS_COMPLETE,
++                              &status, false, timeout_us, locked);
++
++      if (locked)
++              mutex_unlock(&gt->pcode.lock);
+       if (ret)
+-              drm_err(&gt_to_xe(gt)->drm,
++              drm_err(&xe->drm,
+                       "PCODE initialization timedout after: 3 min\n");
+       return ret;
+ }
+ /**
+- * xe_pcode_probe - Prepare xe_pcode and also ensure PCODE is initialized.
++ * xe_pcode_init: initialize components of PCODE
+  * @gt: gt instance
+  *
+- * This function initializes the xe_pcode component, and when needed, it ensures
+- * that PCODE has properly performed its initialization and it is really ready
+- * to go. To be called once only during probe.
+- *
+- * It returns 0 on success, and -error number on failure.
++ * This function initializes the xe_pcode component.
++ * To be called once only during probe.
+  */
+-int xe_pcode_probe(struct xe_gt *gt)
++void xe_pcode_init(struct xe_gt *gt)
+ {
+       drmm_mutex_init(&gt_to_xe(gt)->drm, &gt->pcode.lock);
++}
+-      if (gt_to_xe(gt)->info.skip_pcode)
+-              return 0;
+-
+-      if (!IS_DGFX(gt_to_xe(gt)))
+-              return 0;
+-
+-      return xe_pcode_init(gt);
++/**
++ * xe_pcode_probe_early: initializes PCODE
++ * @xe: xe instance
++ *
++ * This function checks the initialization status of PCODE
++ * To be called once only during early probe without locks.
++ *
++ * Returns 0 on success, error code otherwise
++ */
++int xe_pcode_probe_early(struct xe_device *xe)
++{
++      return xe_pcode_ready(xe, false);
+ }
+diff --git a/drivers/gpu/drm/xe/xe_pcode.h b/drivers/gpu/drm/xe/xe_pcode.h
+index 08cb1d047cba2..3f54c6d2a57d2 100644
+--- a/drivers/gpu/drm/xe/xe_pcode.h
++++ b/drivers/gpu/drm/xe/xe_pcode.h
+@@ -8,9 +8,11 @@
+ #include <linux/types.h>
+ struct xe_gt;
++struct xe_device;
+-int xe_pcode_probe(struct xe_gt *gt);
+-int xe_pcode_init(struct xe_gt *gt);
++void xe_pcode_init(struct xe_gt *gt);
++int xe_pcode_probe_early(struct xe_device *xe);
++int xe_pcode_ready(struct xe_device *xe, bool locked);
+ int xe_pcode_init_min_freq_table(struct xe_gt *gt, u32 min_gt_freq,
+                                u32 max_gt_freq);
+ int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1);
+diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
+index 669b626c06c22..944cf4d76099e 100644
+--- a/drivers/gpu/drm/xe/xe_pm.c
++++ b/drivers/gpu/drm/xe/xe_pm.c
+@@ -103,11 +103,9 @@ int xe_pm_resume(struct xe_device *xe)
+       for_each_tile(tile, xe, id)
+               xe_wa_apply_tile_workarounds(tile);
+-      for_each_gt(gt, xe, id) {
+-              err = xe_pcode_init(gt);
+-              if (err)
+-                      goto err;
+-      }
++      err = xe_pcode_ready(xe, true);
++      if (err)
++              return err;
+       xe_display_pm_resume_early(xe);
+@@ -322,11 +320,9 @@ int xe_pm_runtime_resume(struct xe_device *xe)
+       xe->d3cold.power_lost = xe_guc_in_reset(&gt->uc.guc);
+       if (xe->d3cold.allowed && xe->d3cold.power_lost) {
+-              for_each_gt(gt, xe, id) {
+-                      err = xe_pcode_init(gt);
+-                      if (err)
+-                              goto out;
+-              }
++              err = xe_pcode_ready(xe, true);
++              if (err)
++                      goto out;
+               /*
+                * This only restores pinned memory which is the memory
+-- 
+2.43.0
+
diff --git a/queue-6.9/drm-xe-only-use-reserved-bcs-instances-for-usm-migra.patch b/queue-6.9/drm-xe-only-use-reserved-bcs-instances-for-usm-migra.patch
new file mode 100644 (file)
index 0000000..f6196c3
--- /dev/null
@@ -0,0 +1,78 @@
+From 2219849b0495592e28b233267d555434f37ad62f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 15 Apr 2024 12:04:53 -0700
+Subject: drm/xe: Only use reserved BCS instances for usm migrate exec queue
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Matthew Brost <matthew.brost@intel.com>
+
+[ Upstream commit c8ea2c31f5ea437199b239d76ad5db27343edb0c ]
+
+The GuC context scheduling queue is 2 entires deep, thus it is possible
+for a migration job to be stuck behind a fault if migration exec queue
+shares engines with user jobs. This can deadlock as the migrate exec
+queue is required to service page faults. Avoid deadlock by only using
+reserved BCS instances for usm migrate exec queue.
+
+Fixes: a043fbab7af5 ("drm/xe/pvc: Use fast copy engines as migrate engine on PVC")
+Cc: Matt Roper <matthew.d.roper@intel.com>
+Cc: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
+Signed-off-by: Matthew Brost <matthew.brost@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240415190453.696553-2-matthew.brost@intel.com
+Reviewed-by: Brian Welty <brian.welty@intel.com>
+(cherry picked from commit 04f4a70a183a688a60fe3882d6e4236ea02cfc67)
+Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/xe/xe_migrate.c | 12 +++++-------
+ 1 file changed, 5 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
+index 2ba4fb9511f63..aca519f5b85d9 100644
+--- a/drivers/gpu/drm/xe/xe_migrate.c
++++ b/drivers/gpu/drm/xe/xe_migrate.c
+@@ -33,7 +33,6 @@
+ #include "xe_sync.h"
+ #include "xe_trace.h"
+ #include "xe_vm.h"
+-#include "xe_wa.h"
+ /**
+  * struct xe_migrate - migrate context.
+@@ -299,10 +298,6 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
+ }
+ /*
+- * Due to workaround 16017236439, odd instance hardware copy engines are
+- * faster than even instance ones.
+- * This function returns the mask involving all fast copy engines and the
+- * reserved copy engine to be used as logical mask for migrate engine.
+  * Including the reserved copy engine is required to avoid deadlocks due to
+  * migrate jobs servicing the faults gets stuck behind the job that faulted.
+  */
+@@ -316,8 +311,7 @@ static u32 xe_migrate_usm_logical_mask(struct xe_gt *gt)
+               if (hwe->class != XE_ENGINE_CLASS_COPY)
+                       continue;
+-              if (!XE_WA(gt, 16017236439) ||
+-                  xe_gt_is_usm_hwe(gt, hwe) || hwe->instance & 1)
++              if (xe_gt_is_usm_hwe(gt, hwe))
+                       logical_mask |= BIT(hwe->logical_instance);
+       }
+@@ -368,6 +362,10 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
+               if (!hwe || !logical_mask)
+                       return ERR_PTR(-EINVAL);
++              /*
++               * XXX: Currently only reserving 1 (likely slow) BCS instance on
++               * PVC, may want to revisit if performance is needed.
++               */
+               m->q = xe_exec_queue_create(xe, vm, logical_mask, 1, hwe,
+                                           EXEC_QUEUE_FLAG_KERNEL |
+                                           EXEC_QUEUE_FLAG_PERMANENT |
+-- 
+2.43.0
+
diff --git a/queue-6.9/e1000e-move-force-smbus-near-the-end-of-enable_ulp-f.patch b/queue-6.9/e1000e-move-force-smbus-near-the-end-of-enable_ulp-f.patch
new file mode 100644 (file)
index 0000000..913524d
--- /dev/null
@@ -0,0 +1,126 @@
+From ffe25a9c84779d37061e34c3d8864b48b41397e2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 May 2024 15:06:04 -0700
+Subject: e1000e: move force SMBUS near the end of enable_ulp function
+
+From: Hui Wang <hui.wang@canonical.com>
+
+[ Upstream commit bfd546a552e140b0a4c8a21527c39d6d21addb28 ]
+
+The commit 861e8086029e ("e1000e: move force SMBUS from enable ulp
+function to avoid PHY loss issue") introduces a regression on
+PCH_MTP_I219_LM18 (PCIID: 0x8086550A). Without the referred commit, the
+ethernet works well after suspend and resume, but after applying the
+commit, the ethernet couldn't work anymore after the resume and the
+dmesg shows that the NIC link changes to 10Mbps (1000Mbps originally):
+
+    [   43.305084] e1000e 0000:00:1f.6 enp0s31f6: NIC Link is Up 10 Mbps Full Duplex, Flow Control: Rx/Tx
+
+Without the commit, the force SMBUS code will not be executed if
+"return 0" or "goto out" is executed in the enable_ulp(), and in my
+case, the "goto out" is executed since FWSM_FW_VALID is set. But after
+applying the commit, the force SMBUS code will be ran unconditionally.
+
+Here move the force SMBUS code back to enable_ulp() and put it
+immediately ahead of hw->phy.ops.release(hw), this could allow the
+longest settling time as possible for interface in this function and
+doesn't change the original code logic.
+
+The issue was found on a Lenovo laptop with the ethernet hw as below:
+00:1f.6 Ethernet controller [0200]: Intel Corporation Device [8086:550a]
+(rev 20).
+
+And this patch is verified (cable plug and unplug, system suspend
+and resume) on Lenovo laptops with ethernet hw: [8086:550a],
+[8086:550b], [8086:15bb], [8086:15be], [8086:1a1f], [8086:1a1c] and
+[8086:0dc7].
+
+Fixes: 861e8086029e ("e1000e: move force SMBUS from enable ulp function to avoid PHY loss issue")
+Signed-off-by: Hui Wang <hui.wang@canonical.com>
+Acked-by: Vitaly Lifshits <vitaly.lifshits@intel.com>
+Tested-by: Naama Meir <naamax.meir@linux.intel.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Tested-by: Zhang Rui <rui.zhang@intel.com>
+Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
+Link: https://lore.kernel.org/r/20240528-net-2024-05-28-intel-net-fixes-v1-1-dc8593d2bbc6@intel.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/e1000e/ich8lan.c | 22 +++++++++++++++++++++
+ drivers/net/ethernet/intel/e1000e/netdev.c  | 18 -----------------
+ 2 files changed, 22 insertions(+), 18 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
+index f9e94be36e97f..2e98a2a0bead9 100644
+--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
++++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
+@@ -1225,6 +1225,28 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx)
+       }
+ release:
++      /* Switching PHY interface always returns MDI error
++       * so disable retry mechanism to avoid wasting time
++       */
++      e1000e_disable_phy_retry(hw);
++
++      /* Force SMBus mode in PHY */
++      ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg);
++      if (ret_val) {
++              e1000e_enable_phy_retry(hw);
++              hw->phy.ops.release(hw);
++              goto out;
++      }
++      phy_reg |= CV_SMB_CTRL_FORCE_SMBUS;
++      e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg);
++
++      e1000e_enable_phy_retry(hw);
++
++      /* Force SMBus mode in MAC */
++      mac_reg = er32(CTRL_EXT);
++      mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS;
++      ew32(CTRL_EXT, mac_reg);
++
+       hw->phy.ops.release(hw);
+ out:
+       if (ret_val)
+diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
+index 3692fce201959..cc8c531ec3dff 100644
+--- a/drivers/net/ethernet/intel/e1000e/netdev.c
++++ b/drivers/net/ethernet/intel/e1000e/netdev.c
+@@ -6623,7 +6623,6 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime)
+       struct e1000_hw *hw = &adapter->hw;
+       u32 ctrl, ctrl_ext, rctl, status, wufc;
+       int retval = 0;
+-      u16 smb_ctrl;
+       /* Runtime suspend should only enable wakeup for link changes */
+       if (runtime)
+@@ -6697,23 +6696,6 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime)
+                       if (retval)
+                               return retval;
+               }
+-
+-              /* Force SMBUS to allow WOL */
+-              /* Switching PHY interface always returns MDI error
+-               * so disable retry mechanism to avoid wasting time
+-               */
+-              e1000e_disable_phy_retry(hw);
+-
+-              e1e_rphy(hw, CV_SMB_CTRL, &smb_ctrl);
+-              smb_ctrl |= CV_SMB_CTRL_FORCE_SMBUS;
+-              e1e_wphy(hw, CV_SMB_CTRL, smb_ctrl);
+-
+-              e1000e_enable_phy_retry(hw);
+-
+-              /* Force SMBus mode in MAC */
+-              ctrl_ext = er32(CTRL_EXT);
+-              ctrl_ext |= E1000_CTRL_EXT_FORCE_SMBUS;
+-              ew32(CTRL_EXT, ctrl_ext);
+       }
+       /* Ensure that the appropriate bits are set in LPI_CTRL
+-- 
+2.43.0
+
diff --git a/queue-6.9/enic-validate-length-of-nl-attributes-in-enic_set_vf.patch b/queue-6.9/enic-validate-length-of-nl-attributes-in-enic_set_vf.patch
new file mode 100644 (file)
index 0000000..1813b65
--- /dev/null
@@ -0,0 +1,69 @@
+From 731a6e474f590597f25bf84139463e5ffab80bee Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 22 May 2024 10:30:44 +0300
+Subject: enic: Validate length of nl attributes in enic_set_vf_port
+
+From: Roded Zats <rzats@paloaltonetworks.com>
+
+[ Upstream commit e8021b94b0412c37bcc79027c2e382086b6ce449 ]
+
+enic_set_vf_port assumes that the nl attribute IFLA_PORT_PROFILE
+is of length PORT_PROFILE_MAX and that the nl attributes
+IFLA_PORT_INSTANCE_UUID, IFLA_PORT_HOST_UUID are of length PORT_UUID_MAX.
+These attributes are validated (in the function do_setlink in rtnetlink.c)
+using the nla_policy ifla_port_policy. The policy defines IFLA_PORT_PROFILE
+as NLA_STRING, IFLA_PORT_INSTANCE_UUID as NLA_BINARY and
+IFLA_PORT_HOST_UUID as NLA_STRING. That means that the length validation
+using the policy is for the max size of the attributes and not on exact
+size so the length of these attributes might be less than the sizes that
+enic_set_vf_port expects. This might cause an out of bands
+read access in the memcpys of the data of these
+attributes in enic_set_vf_port.
+
+Fixes: f8bd909183ac ("net: Add ndo_{set|get}_vf_port support for enic dynamic vnics")
+Signed-off-by: Roded Zats <rzats@paloaltonetworks.com>
+Link: https://lore.kernel.org/r/20240522073044.33519-1-rzats@paloaltonetworks.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/cisco/enic/enic_main.c | 12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
+index d266a87297a5e..54798df8e2544 100644
+--- a/drivers/net/ethernet/cisco/enic/enic_main.c
++++ b/drivers/net/ethernet/cisco/enic/enic_main.c
+@@ -1117,18 +1117,30 @@ static int enic_set_vf_port(struct net_device *netdev, int vf,
+       pp->request = nla_get_u8(port[IFLA_PORT_REQUEST]);
+       if (port[IFLA_PORT_PROFILE]) {
++              if (nla_len(port[IFLA_PORT_PROFILE]) != PORT_PROFILE_MAX) {
++                      memcpy(pp, &prev_pp, sizeof(*pp));
++                      return -EINVAL;
++              }
+               pp->set |= ENIC_SET_NAME;
+               memcpy(pp->name, nla_data(port[IFLA_PORT_PROFILE]),
+                       PORT_PROFILE_MAX);
+       }
+       if (port[IFLA_PORT_INSTANCE_UUID]) {
++              if (nla_len(port[IFLA_PORT_INSTANCE_UUID]) != PORT_UUID_MAX) {
++                      memcpy(pp, &prev_pp, sizeof(*pp));
++                      return -EINVAL;
++              }
+               pp->set |= ENIC_SET_INSTANCE;
+               memcpy(pp->instance_uuid,
+                       nla_data(port[IFLA_PORT_INSTANCE_UUID]), PORT_UUID_MAX);
+       }
+       if (port[IFLA_PORT_HOST_UUID]) {
++              if (nla_len(port[IFLA_PORT_HOST_UUID]) != PORT_UUID_MAX) {
++                      memcpy(pp, &prev_pp, sizeof(*pp));
++                      return -EINVAL;
++              }
+               pp->set |= ENIC_SET_HOST;
+               memcpy(pp->host_uuid,
+                       nla_data(port[IFLA_PORT_HOST_UUID]), PORT_UUID_MAX);
+-- 
+2.43.0
+
diff --git a/queue-6.9/hwmon-intel-m10-bmc-hwmon-fix-multiplier-for-n6000-b.patch b/queue-6.9/hwmon-intel-m10-bmc-hwmon-fix-multiplier-for-n6000-b.patch
new file mode 100644 (file)
index 0000000..205d5fb
--- /dev/null
@@ -0,0 +1,39 @@
+From 1fac5143427bed41ac22c48fcc185540f8eed0cf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 21 May 2024 14:12:46 -0400
+Subject: hwmon: (intel-m10-bmc-hwmon) Fix multiplier for N6000 board power
+ sensor
+
+From: Peter Colberg <peter.colberg@intel.com>
+
+[ Upstream commit 027a44fedd55fbdf1d45603894634acd960ad04b ]
+
+The Intel N6000 BMC outputs the board power value in milliwatt, whereas
+the hwmon sysfs interface must provide power values in microwatt.
+
+Fixes: e1983220ae14 ("hwmon: intel-m10-bmc-hwmon: Add N6000 sensors")
+Signed-off-by: Peter Colberg <peter.colberg@intel.com>
+Reviewed-by: Matthew Gerlach <matthew.gerlach@linux.intel.com>
+Link: https://lore.kernel.org/r/20240521181246.683833-1-peter.colberg@intel.com
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/hwmon/intel-m10-bmc-hwmon.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/hwmon/intel-m10-bmc-hwmon.c b/drivers/hwmon/intel-m10-bmc-hwmon.c
+index 6500ca548f9c7..ca2dff1589251 100644
+--- a/drivers/hwmon/intel-m10-bmc-hwmon.c
++++ b/drivers/hwmon/intel-m10-bmc-hwmon.c
+@@ -429,7 +429,7 @@ static const struct m10bmc_sdata n6000bmc_curr_tbl[] = {
+ };
+ static const struct m10bmc_sdata n6000bmc_power_tbl[] = {
+-      { 0x724, 0x0, 0x0, 0x0, 0x0, 1, "Board Power" },
++      { 0x724, 0x0, 0x0, 0x0, 0x0, 1000, "Board Power" },
+ };
+ static const struct hwmon_channel_info * const n6000bmc_hinfo[] = {
+-- 
+2.43.0
+
diff --git a/queue-6.9/hwmon-shtc1-fix-property-misspelling.patch b/queue-6.9/hwmon-shtc1-fix-property-misspelling.patch
new file mode 100644 (file)
index 0000000..dff2899
--- /dev/null
@@ -0,0 +1,36 @@
+From df5544de2dbfd2be8e54f4cce27f8b00a3a562e9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 30 May 2024 08:20:14 -0700
+Subject: hwmon: (shtc1) Fix property misspelling
+
+From: Guenter Roeck <linux@roeck-us.net>
+
+[ Upstream commit 52a2c70c3ec555e670a34dd1ab958986451d2dd2 ]
+
+The property name is "sensirion,low-precision", not
+"sensicon,low-precision".
+
+Cc: Chris Ruehl <chris.ruehl@gtsys.com.hk>
+Fixes: be7373b60df5 ("hwmon: shtc1: add support for device tree bindings")
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/hwmon/shtc1.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/hwmon/shtc1.c b/drivers/hwmon/shtc1.c
+index 1f96e94967ee8..439dd3dba5fc8 100644
+--- a/drivers/hwmon/shtc1.c
++++ b/drivers/hwmon/shtc1.c
+@@ -238,7 +238,7 @@ static int shtc1_probe(struct i2c_client *client)
+       if (np) {
+               data->setup.blocking_io = of_property_read_bool(np, "sensirion,blocking-io");
+-              data->setup.high_precision = !of_property_read_bool(np, "sensicon,low-precision");
++              data->setup.high_precision = !of_property_read_bool(np, "sensirion,low-precision");
+       } else {
+               if (client->dev.platform_data)
+                       data->setup = *(struct shtc1_platform_data *)dev->platform_data;
+-- 
+2.43.0
+
diff --git a/queue-6.9/ice-fix-200g-phy-types-to-link-speed-mapping.patch b/queue-6.9/ice-fix-200g-phy-types-to-link-speed-mapping.patch
new file mode 100644 (file)
index 0000000..0a65ae1
--- /dev/null
@@ -0,0 +1,53 @@
+From 02cc664e042d88bbde932eb36355ae09787e8744 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 May 2024 15:06:08 -0700
+Subject: ice: fix 200G PHY types to link speed mapping
+
+From: Paul Greenwalt <paul.greenwalt@intel.com>
+
+[ Upstream commit 2a6d8f2de2224ac46df94dc40f43f8b9701f6703 ]
+
+Commit 24407a01e57c ("ice: Add 200G speed/phy type use") added support
+for 200G PHY speeds, but did not include the mapping of 200G PHY types
+to link speed. As a result the driver is returning UNKNOWN link speed
+when setting 200G ethtool advertised link modes.
+
+To fix this add 200G PHY types to link speed mapping to
+ice_get_link_speed_based_on_phy_type().
+
+Fixes: 24407a01e57c ("ice: Add 200G speed/phy type use")
+Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
+Signed-off-by: Paul Greenwalt <paul.greenwalt@intel.com>
+Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@intel.com>
+Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
+Link: https://lore.kernel.org/r/20240528-net-2024-05-28-intel-net-fixes-v1-5-dc8593d2bbc6@intel.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_common.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
+index d9f6cc71d900a..e7d28432ba038 100644
+--- a/drivers/net/ethernet/intel/ice/ice_common.c
++++ b/drivers/net/ethernet/intel/ice/ice_common.c
+@@ -3135,6 +3135,16 @@ ice_get_link_speed_based_on_phy_type(u64 phy_type_low, u64 phy_type_high)
+       case ICE_PHY_TYPE_HIGH_100G_AUI2:
+               speed_phy_type_high = ICE_AQ_LINK_SPEED_100GB;
+               break;
++      case ICE_PHY_TYPE_HIGH_200G_CR4_PAM4:
++      case ICE_PHY_TYPE_HIGH_200G_SR4:
++      case ICE_PHY_TYPE_HIGH_200G_FR4:
++      case ICE_PHY_TYPE_HIGH_200G_LR4:
++      case ICE_PHY_TYPE_HIGH_200G_DR4:
++      case ICE_PHY_TYPE_HIGH_200G_KR4_PAM4:
++      case ICE_PHY_TYPE_HIGH_200G_AUI4_AOC_ACC:
++      case ICE_PHY_TYPE_HIGH_200G_AUI4:
++              speed_phy_type_high = ICE_AQ_LINK_SPEED_200GB;
++              break;
+       default:
+               speed_phy_type_high = ICE_AQ_LINK_SPEED_UNKNOWN;
+               break;
+-- 
+2.43.0
+
diff --git a/queue-6.9/ice-fix-accounting-if-a-vlan-already-exists.patch b/queue-6.9/ice-fix-accounting-if-a-vlan-already-exists.patch
new file mode 100644 (file)
index 0000000..234fc0b
--- /dev/null
@@ -0,0 +1,74 @@
+From 1f5344caca490f35acb058c314413953c024a038 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 May 2024 10:45:30 -0700
+Subject: ice: fix accounting if a VLAN already exists
+
+From: Jacob Keller <jacob.e.keller@intel.com>
+
+[ Upstream commit 82617b9a04649e83ee8731918aeadbb6e6d7cbc7 ]
+
+The ice_vsi_add_vlan() function is used to add a VLAN filter for the target
+VSI. This function prepares a filter in the switch table for the given VSI.
+If it succeeds, the vsi->num_vlan counter is incremented.
+
+It is not considered an error to add a VLAN which already exists in the
+switch table, so the function explicitly checks and ignores -EEXIST. The
+vsi->num_vlan counter is still incremented.
+
+This seems incorrect, as it means we can double-count in the case where the
+same VLAN is added twice by the caller. The actual table will have one less
+filter than the count.
+
+The ice_vsi_del_vlan() function similarly checks and handles the -ENOENT
+condition for when deleting a filter that doesn't exist. This flow only
+decrements the vsi->num_vlan if it actually deleted a filter.
+
+The vsi->num_vlan counter is used only in a few places, primarily related
+to tracking the number of non-zero VLANs. If the vsi->num_vlans gets out of
+sync, then ice_vsi_num_non_zero_vlans() will incorrectly report more VLANs
+than are present, and ice_vsi_has_non_zero_vlans() could return true
+potentially in cases where there are only VLAN 0 filters left.
+
+Fix this by only incrementing the vsi->num_vlan in the case where we
+actually added an entry, and not in the case where the entry already
+existed.
+
+Fixes: a1ffafb0b4a4 ("ice: Support configuring the device to Double VLAN Mode")
+Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
+Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@intel.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://lore.kernel.org/r/20240523-net-2024-05-23-intel-net-fixes-v1-2-17a923e0bb5f@intel.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c
+index 2e9ad27cb9d13..6e8f2aab60801 100644
+--- a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c
++++ b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c
+@@ -45,14 +45,15 @@ int ice_vsi_add_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan)
+               return -EINVAL;
+       err = ice_fltr_add_vlan(vsi, vlan);
+-      if (err && err != -EEXIST) {
++      if (!err)
++              vsi->num_vlan++;
++      else if (err == -EEXIST)
++              err = 0;
++      else
+               dev_err(ice_pf_to_dev(vsi->back), "Failure Adding VLAN %d on VSI %i, status %d\n",
+                       vlan->vid, vsi->vsi_num, err);
+-              return err;
+-      }
+-      vsi->num_vlan++;
+-      return 0;
++      return err;
+ }
+ /**
+-- 
+2.43.0
+
diff --git a/queue-6.9/idpf-don-t-enable-napi-and-interrupts-prior-to-alloc.patch b/queue-6.9/idpf-don-t-enable-napi-and-interrupts-prior-to-alloc.patch
new file mode 100644 (file)
index 0000000..7087a51
--- /dev/null
@@ -0,0 +1,113 @@
+From b7d517b221362aa6a829843945d5840eeaf99e86 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 May 2024 10:45:29 -0700
+Subject: idpf: don't enable NAPI and interrupts prior to allocating Rx buffers
+
+From: Alexander Lobakin <aleksander.lobakin@intel.com>
+
+[ Upstream commit d514c8b54209de7a95ab37259fe32c7406976bd9 ]
+
+Currently, idpf enables NAPI and interrupts prior to allocating Rx
+buffers.
+This may lead to frame loss (there are no buffers to place incoming
+frames) and even crashes on quick ifup-ifdown. Interrupts must be
+enabled only after all the resources are here and available.
+Split interrupt init into two phases: initialization and enabling,
+and perform the second only after the queues are fully initialized.
+Note that we can't just move interrupt initialization down the init
+process, as the queues must have correct a ::q_vector pointer set
+and NAPI already added in order to allocate buffers correctly.
+Also, during the deinit process, disable HW interrupts first and
+only then disable NAPI. Otherwise, there can be a HW event leading
+to napi_schedule(), but the NAPI will already be unavailable.
+
+Fixes: d4d558718266 ("idpf: initialize interrupts and enable vport")
+Reported-by: Michal Kubiak <michal.kubiak@intel.com>
+Reviewed-by: Wojciech Drewek <wojciech.drewek@intel.com>
+Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
+Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
+Link: https://lore.kernel.org/r/20240523-net-2024-05-23-intel-net-fixes-v1-1-17a923e0bb5f@intel.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/idpf/idpf_lib.c  |  1 +
+ drivers/net/ethernet/intel/idpf/idpf_txrx.c | 12 +++++++-----
+ drivers/net/ethernet/intel/idpf/idpf_txrx.h |  1 +
+ 3 files changed, 9 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c
+index 5d3532c27d57f..ae8a48c480708 100644
+--- a/drivers/net/ethernet/intel/idpf/idpf_lib.c
++++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c
+@@ -1394,6 +1394,7 @@ static int idpf_vport_open(struct idpf_vport *vport, bool alloc_res)
+       }
+       idpf_rx_init_buf_tail(vport);
++      idpf_vport_intr_ena(vport);
+       err = idpf_send_config_queues_msg(vport);
+       if (err) {
+diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+index f5bc4a2780745..7fc77ed9d1232 100644
+--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
++++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+@@ -3747,9 +3747,9 @@ static void idpf_vport_intr_ena_irq_all(struct idpf_vport *vport)
+  */
+ void idpf_vport_intr_deinit(struct idpf_vport *vport)
+ {
++      idpf_vport_intr_dis_irq_all(vport);
+       idpf_vport_intr_napi_dis_all(vport);
+       idpf_vport_intr_napi_del_all(vport);
+-      idpf_vport_intr_dis_irq_all(vport);
+       idpf_vport_intr_rel_irq(vport);
+ }
+@@ -4180,7 +4180,6 @@ int idpf_vport_intr_init(struct idpf_vport *vport)
+       idpf_vport_intr_map_vector_to_qs(vport);
+       idpf_vport_intr_napi_add_all(vport);
+-      idpf_vport_intr_napi_ena_all(vport);
+       err = vport->adapter->dev_ops.reg_ops.intr_reg_init(vport);
+       if (err)
+@@ -4194,17 +4193,20 @@ int idpf_vport_intr_init(struct idpf_vport *vport)
+       if (err)
+               goto unroll_vectors_alloc;
+-      idpf_vport_intr_ena_irq_all(vport);
+-
+       return 0;
+ unroll_vectors_alloc:
+-      idpf_vport_intr_napi_dis_all(vport);
+       idpf_vport_intr_napi_del_all(vport);
+       return err;
+ }
++void idpf_vport_intr_ena(struct idpf_vport *vport)
++{
++      idpf_vport_intr_napi_ena_all(vport);
++      idpf_vport_intr_ena_irq_all(vport);
++}
++
+ /**
+  * idpf_config_rss - Send virtchnl messages to configure RSS
+  * @vport: virtual port
+diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
+index df76493faa756..85a1466890d43 100644
+--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h
++++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
+@@ -988,6 +988,7 @@ int idpf_vport_intr_alloc(struct idpf_vport *vport);
+ void idpf_vport_intr_update_itr_ena_irq(struct idpf_q_vector *q_vector);
+ void idpf_vport_intr_deinit(struct idpf_vport *vport);
+ int idpf_vport_intr_init(struct idpf_vport *vport);
++void idpf_vport_intr_ena(struct idpf_vport *vport);
+ enum pkt_hash_types idpf_ptype_to_htype(const struct idpf_rx_ptype_decoded *decoded);
+ int idpf_config_rss(struct idpf_vport *vport);
+ int idpf_init_rss(struct idpf_vport *vport);
+-- 
+2.43.0
+
diff --git a/queue-6.9/inet-introduce-dst_rtable-helper.patch b/queue-6.9/inet-introduce-dst_rtable-helper.patch
new file mode 100644 (file)
index 0000000..52f0b43
--- /dev/null
@@ -0,0 +1,570 @@
+From 4ae8807a135f1d81f1e9e91a3a9cce0a47a29dcd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Apr 2024 13:30:09 +0000
+Subject: inet: introduce dst_rtable() helper
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 05d6d492097c55f2d153fc3fd33cbe78e1e28e0a ]
+
+I added dst_rt6_info() in commit
+e8dfd42c17fa ("ipv6: introduce dst_rt6_info() helper")
+
+This patch does a similar change for IPv4.
+
+Instead of (struct rtable *)dst casts, we can use :
+
+ #define dst_rtable(_ptr) \
+             container_of_const(_ptr, struct rtable, dst)
+
+Patch is smaller than IPv6 one, because IPv4 has skb_rtable() helper.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Reviewed-by: Sabrina Dubroca <sd@queasysnail.net>
+Link: https://lore.kernel.org/r/20240429133009.1227754-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 92f1655aa2b2 ("net: fix __dst_negative_advice() race")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/infiniband/core/addr.c   | 12 +++---------
+ drivers/net/vrf.c                |  2 +-
+ drivers/s390/net/qeth_core.h     |  5 ++---
+ include/linux/skbuff.h           |  9 ---------
+ include/net/ip.h                 |  4 ++--
+ include/net/route.h              | 11 +++++++++++
+ net/atm/clip.c                   |  2 +-
+ net/core/dst_cache.c             |  2 +-
+ net/core/filter.c                |  3 +--
+ net/ipv4/af_inet.c               |  2 +-
+ net/ipv4/icmp.c                  | 26 ++++++++++++++------------
+ net/ipv4/ip_input.c              |  2 +-
+ net/ipv4/ip_output.c             |  8 ++++----
+ net/ipv4/route.c                 | 24 +++++++++++-------------
+ net/ipv4/udp.c                   |  2 +-
+ net/ipv4/xfrm4_policy.c          |  2 +-
+ net/l2tp/l2tp_ip.c               |  2 +-
+ net/mpls/mpls_iptunnel.c         |  2 +-
+ net/netfilter/ipvs/ip_vs_xmit.c  |  2 +-
+ net/netfilter/nf_flow_table_ip.c |  4 ++--
+ net/netfilter/nft_rt.c           |  2 +-
+ net/sctp/protocol.c              |  4 ++--
+ net/tipc/udp_media.c             |  2 +-
+ 23 files changed, 64 insertions(+), 70 deletions(-)
+
+diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
+index f20dfe70fa0e4..be0743dac3fff 100644
+--- a/drivers/infiniband/core/addr.c
++++ b/drivers/infiniband/core/addr.c
+@@ -348,16 +348,10 @@ static int dst_fetch_ha(const struct dst_entry *dst,
+ static bool has_gateway(const struct dst_entry *dst, sa_family_t family)
+ {
+-      const struct rtable *rt;
+-      const struct rt6_info *rt6;
++      if (family == AF_INET)
++              return dst_rtable(dst)->rt_uses_gateway;
+-      if (family == AF_INET) {
+-              rt = container_of(dst, struct rtable, dst);
+-              return rt->rt_uses_gateway;
+-      }
+-
+-      rt6 = dst_rt6_info(dst);
+-      return rt6->rt6i_flags & RTF_GATEWAY;
++      return dst_rt6_info(dst)->rt6i_flags & RTF_GATEWAY;
+ }
+ static int fetch_ha(const struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
+diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
+index 71cfa03a77449..c3af9ad5e1547 100644
+--- a/drivers/net/vrf.c
++++ b/drivers/net/vrf.c
+@@ -860,7 +860,7 @@ static int vrf_rt6_create(struct net_device *dev)
+ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+ {
+       struct dst_entry *dst = skb_dst(skb);
+-      struct rtable *rt = (struct rtable *)dst;
++      struct rtable *rt = dst_rtable(dst);
+       struct net_device *dev = dst->dev;
+       unsigned int hh_len = LL_RESERVED_SPACE(dev);
+       struct neighbour *neigh;
+diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
+index 5f17a2a5d0e33..41fe8a043d61f 100644
+--- a/drivers/s390/net/qeth_core.h
++++ b/drivers/s390/net/qeth_core.h
+@@ -970,9 +970,8 @@ static inline struct dst_entry *qeth_dst_check_rcu(struct sk_buff *skb,
+ static inline __be32 qeth_next_hop_v4_rcu(struct sk_buff *skb,
+                                         struct dst_entry *dst)
+ {
+-      struct rtable *rt = (struct rtable *) dst;
+-
+-      return (rt) ? rt_nexthop(rt, ip_hdr(skb)->daddr) : ip_hdr(skb)->daddr;
++      return (dst) ? rt_nexthop(dst_rtable(dst), ip_hdr(skb)->daddr) :
++                     ip_hdr(skb)->daddr;
+ }
+ static inline struct in6_addr *qeth_next_hop_v6_rcu(struct sk_buff *skb,
+diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
+index 4ff48eda3f642..5b1078c160f27 100644
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -1174,15 +1174,6 @@ static inline bool skb_dst_is_noref(const struct sk_buff *skb)
+       return (skb->_skb_refdst & SKB_DST_NOREF) && skb_dst(skb);
+ }
+-/**
+- * skb_rtable - Returns the skb &rtable
+- * @skb: buffer
+- */
+-static inline struct rtable *skb_rtable(const struct sk_buff *skb)
+-{
+-      return (struct rtable *)skb_dst(skb);
+-}
+-
+ /* For mangling skb->pkt_type from user space side from applications
+  * such as nft, tc, etc, we only allow a conservative subset of
+  * possible pkt_types to be set.
+diff --git a/include/net/ip.h b/include/net/ip.h
+index 25cb688bdc623..6d735e00d3f3e 100644
+--- a/include/net/ip.h
++++ b/include/net/ip.h
+@@ -423,7 +423,7 @@ int ip_decrease_ttl(struct iphdr *iph)
+ static inline int ip_mtu_locked(const struct dst_entry *dst)
+ {
+-      const struct rtable *rt = (const struct rtable *)dst;
++      const struct rtable *rt = dst_rtable(dst);
+       return rt->rt_mtu_locked || dst_metric_locked(dst, RTAX_MTU);
+ }
+@@ -461,7 +461,7 @@ static inline bool ip_sk_ignore_df(const struct sock *sk)
+ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
+                                                   bool forwarding)
+ {
+-      const struct rtable *rt = container_of(dst, struct rtable, dst);
++      const struct rtable *rt = dst_rtable(dst);
+       struct net *net = dev_net(dst->dev);
+       unsigned int mtu;
+diff --git a/include/net/route.h b/include/net/route.h
+index d4a0147942f1a..af55401aa8f40 100644
+--- a/include/net/route.h
++++ b/include/net/route.h
+@@ -77,6 +77,17 @@ struct rtable {
+                               rt_pmtu:31;
+ };
++#define dst_rtable(_ptr) container_of_const(_ptr, struct rtable, dst)
++
++/**
++ * skb_rtable - Returns the skb &rtable
++ * @skb: buffer
++ */
++static inline struct rtable *skb_rtable(const struct sk_buff *skb)
++{
++      return dst_rtable(skb_dst(skb));
++}
++
+ static inline bool rt_is_input_route(const struct rtable *rt)
+ {
+       return rt->rt_is_input != 0;
+diff --git a/net/atm/clip.c b/net/atm/clip.c
+index 294cb9efe3d38..015fb679be425 100644
+--- a/net/atm/clip.c
++++ b/net/atm/clip.c
+@@ -345,7 +345,7 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb,
+               dev->stats.tx_dropped++;
+               return NETDEV_TX_OK;
+       }
+-      rt = (struct rtable *) dst;
++      rt = dst_rtable(dst);
+       if (rt->rt_gw_family == AF_INET)
+               daddr = &rt->rt_gw4;
+       else
+diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c
+index b17171345d649..0c0bdb058c5b1 100644
+--- a/net/core/dst_cache.c
++++ b/net/core/dst_cache.c
+@@ -83,7 +83,7 @@ struct rtable *dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr)
+               return NULL;
+       *saddr = idst->in_saddr.s_addr;
+-      return container_of(dst, struct rtable, dst);
++      return dst_rtable(dst);
+ }
+ EXPORT_SYMBOL_GPL(dst_cache_get_ip4);
+diff --git a/net/core/filter.c b/net/core/filter.c
+index 03c1fdd111f25..a5856a8b4498b 100644
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -2314,8 +2314,7 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb,
+       rcu_read_lock();
+       if (!nh) {
+-              struct dst_entry *dst = skb_dst(skb);
+-              struct rtable *rt = container_of(dst, struct rtable, dst);
++              struct rtable *rt = skb_rtable(skb);
+               neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
+       } else if (nh->nh_family == AF_INET6) {
+diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
+index c6bebca49591f..5622ddd3bf55b 100644
+--- a/net/ipv4/af_inet.c
++++ b/net/ipv4/af_inet.c
+@@ -1308,8 +1308,8 @@ static int inet_sk_reselect_saddr(struct sock *sk)
+ int inet_sk_rebuild_header(struct sock *sk)
+ {
++      struct rtable *rt = dst_rtable(__sk_dst_check(sk, 0));
+       struct inet_sock *inet = inet_sk(sk);
+-      struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0);
+       __be32 daddr;
+       struct ip_options_rcu *inet_opt;
+       struct flowi4 *fl4;
+diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
+index 437e782b9663b..207482d30dc7e 100644
+--- a/net/ipv4/icmp.c
++++ b/net/ipv4/icmp.c
+@@ -483,6 +483,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
+                                       struct icmp_bxm *param)
+ {
+       struct net_device *route_lookup_dev;
++      struct dst_entry *dst, *dst2;
+       struct rtable *rt, *rt2;
+       struct flowi4 fl4_dec;
+       int err;
+@@ -508,16 +509,17 @@ static struct rtable *icmp_route_lookup(struct net *net,
+       /* No need to clone since we're just using its address. */
+       rt2 = rt;
+-      rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
+-                                         flowi4_to_flowi(fl4), NULL, 0);
+-      if (!IS_ERR(rt)) {
++      dst = xfrm_lookup(net, &rt->dst,
++                        flowi4_to_flowi(fl4), NULL, 0);
++      rt = dst_rtable(dst);
++      if (!IS_ERR(dst)) {
+               if (rt != rt2)
+                       return rt;
+-      } else if (PTR_ERR(rt) == -EPERM) {
++      } else if (PTR_ERR(dst) == -EPERM) {
+               rt = NULL;
+-      } else
++      } else {
+               return rt;
+-
++      }
+       err = xfrm_decode_session_reverse(net, skb_in, flowi4_to_flowi(&fl4_dec), AF_INET);
+       if (err)
+               goto relookup_failed;
+@@ -551,19 +553,19 @@ static struct rtable *icmp_route_lookup(struct net *net,
+       if (err)
+               goto relookup_failed;
+-      rt2 = (struct rtable *) xfrm_lookup(net, &rt2->dst,
+-                                          flowi4_to_flowi(&fl4_dec), NULL,
+-                                          XFRM_LOOKUP_ICMP);
+-      if (!IS_ERR(rt2)) {
++      dst2 = xfrm_lookup(net, &rt2->dst, flowi4_to_flowi(&fl4_dec), NULL,
++                         XFRM_LOOKUP_ICMP);
++      rt2 = dst_rtable(dst2);
++      if (!IS_ERR(dst2)) {
+               dst_release(&rt->dst);
+               memcpy(fl4, &fl4_dec, sizeof(*fl4));
+               rt = rt2;
+-      } else if (PTR_ERR(rt2) == -EPERM) {
++      } else if (PTR_ERR(dst2) == -EPERM) {
+               if (rt)
+                       dst_release(&rt->dst);
+               return rt2;
+       } else {
+-              err = PTR_ERR(rt2);
++              err = PTR_ERR(dst2);
+               goto relookup_failed;
+       }
+       return rt;
+diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
+index 5e9c8156656a7..d6fbcbd2358a5 100644
+--- a/net/ipv4/ip_input.c
++++ b/net/ipv4/ip_input.c
+@@ -616,7 +616,7 @@ static void ip_list_rcv_finish(struct net *net, struct sock *sk,
+               dst = skb_dst(skb);
+               if (curr_dst != dst) {
+                       hint = ip_extract_route_hint(net, skb,
+-                                             ((struct rtable *)dst)->rt_type);
++                                                   dst_rtable(dst)->rt_type);
+                       /* dispatch old sublist */
+                       if (!list_empty(&sublist))
+diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
+index 39229fd0601a1..9500031a1f55b 100644
+--- a/net/ipv4/ip_output.c
++++ b/net/ipv4/ip_output.c
+@@ -198,7 +198,7 @@ EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
+ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
+ {
+       struct dst_entry *dst = skb_dst(skb);
+-      struct rtable *rt = (struct rtable *)dst;
++      struct rtable *rt = dst_rtable(dst);
+       struct net_device *dev = dst->dev;
+       unsigned int hh_len = LL_RESERVED_SPACE(dev);
+       struct neighbour *neigh;
+@@ -475,7 +475,7 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
+               goto packet_routed;
+       /* Make sure we can route this packet. */
+-      rt = (struct rtable *)__sk_dst_check(sk, 0);
++      rt = dst_rtable(__sk_dst_check(sk, 0));
+       if (!rt) {
+               __be32 daddr;
+@@ -971,7 +971,7 @@ static int __ip_append_data(struct sock *sk,
+       bool zc = false;
+       unsigned int maxfraglen, fragheaderlen, maxnonfragsize;
+       int csummode = CHECKSUM_NONE;
+-      struct rtable *rt = (struct rtable *)cork->dst;
++      struct rtable *rt = dst_rtable(cork->dst);
+       bool paged, hold_tskey, extra_uref = false;
+       unsigned int wmem_alloc_delta = 0;
+       u32 tskey = 0;
+@@ -1390,7 +1390,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
+       struct inet_sock *inet = inet_sk(sk);
+       struct net *net = sock_net(sk);
+       struct ip_options *opt = NULL;
+-      struct rtable *rt = (struct rtable *)cork->dst;
++      struct rtable *rt = dst_rtable(cork->dst);
+       struct iphdr *iph;
+       u8 pmtudisc, ttl;
+       __be16 df = 0;
+diff --git a/net/ipv4/route.c b/net/ipv4/route.c
+index b814fdab19f71..12738051ebea7 100644
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -831,7 +831,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf
+       u32 mark = skb->mark;
+       __u8 tos = iph->tos;
+-      rt = (struct rtable *) dst;
++      rt = dst_rtable(dst);
+       __build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0);
+       __ip_do_redirect(rt, skb, &fl4, true);
+@@ -839,7 +839,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf
+ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
+ {
+-      struct rtable *rt = (struct rtable *)dst;
++      struct rtable *rt = dst_rtable(dst);
+       struct dst_entry *ret = dst;
+       if (rt) {
+@@ -1056,7 +1056,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+                             struct sk_buff *skb, u32 mtu,
+                             bool confirm_neigh)
+ {
+-      struct rtable *rt = (struct rtable *) dst;
++      struct rtable *rt = dst_rtable(dst);
+       struct flowi4 fl4;
+       ip_rt_build_flow_key(&fl4, sk, skb);
+@@ -1127,7 +1127,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
+       __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
+-      rt = (struct rtable *)odst;
++      rt = dst_rtable(odst);
+       if (odst->obsolete && !odst->ops->check(odst, 0)) {
+               rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
+               if (IS_ERR(rt))
+@@ -1136,7 +1136,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
+               new = true;
+       }
+-      __ip_rt_update_pmtu((struct rtable *)xfrm_dst_path(&rt->dst), &fl4, mtu);
++      __ip_rt_update_pmtu(dst_rtable(xfrm_dst_path(&rt->dst)), &fl4, mtu);
+       if (!dst_check(&rt->dst, 0)) {
+               if (new)
+@@ -1193,7 +1193,7 @@ EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
+ INDIRECT_CALLABLE_SCOPE struct dst_entry *ipv4_dst_check(struct dst_entry *dst,
+                                                        u32 cookie)
+ {
+-      struct rtable *rt = (struct rtable *) dst;
++      struct rtable *rt = dst_rtable(dst);
+       /* All IPV4 dsts are created with ->obsolete set to the value
+        * DST_OBSOLETE_FORCE_CHK which forces validation calls down
+@@ -1528,10 +1528,8 @@ void rt_del_uncached_list(struct rtable *rt)
+ static void ipv4_dst_destroy(struct dst_entry *dst)
+ {
+-      struct rtable *rt = (struct rtable *)dst;
+-
+       ip_dst_metrics_put(dst);
+-      rt_del_uncached_list(rt);
++      rt_del_uncached_list(dst_rtable(dst));
+ }
+ void rt_flush_dev(struct net_device *dev)
+@@ -2832,7 +2830,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
+ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
+ {
+-      struct rtable *ort = (struct rtable *) dst_orig;
++      struct rtable *ort = dst_rtable(dst_orig);
+       struct rtable *rt;
+       rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, DST_OBSOLETE_DEAD, 0);
+@@ -2877,9 +2875,9 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
+       if (flp4->flowi4_proto) {
+               flp4->flowi4_oif = rt->dst.dev->ifindex;
+-              rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst,
+-                                                      flowi4_to_flowi(flp4),
+-                                                      sk, 0);
++              rt = dst_rtable(xfrm_lookup_route(net, &rt->dst,
++                                                flowi4_to_flowi(flp4),
++                                                sk, 0));
+       }
+       return rt;
+diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
+index b5ad0c527c521..72d3bf136810d 100644
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -1218,7 +1218,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+       }
+       if (connected)
+-              rt = (struct rtable *)sk_dst_check(sk, 0);
++              rt = dst_rtable(sk_dst_check(sk, 0));
+       if (!rt) {
+               struct net *net = sock_net(sk);
+diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
+index c33bca2c38415..1853a8415d9f1 100644
+--- a/net/ipv4/xfrm4_policy.c
++++ b/net/ipv4/xfrm4_policy.c
+@@ -69,7 +69,7 @@ static int xfrm4_get_saddr(struct net *net, int oif,
+ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
+                         const struct flowi *fl)
+ {
+-      struct rtable *rt = (struct rtable *)xdst->route;
++      struct rtable *rt = dst_rtable(xdst->route);
+       const struct flowi4 *fl4 = &fl->u.ip4;
+       xdst->u.rt.rt_iif = fl4->flowi4_iif;
+diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
+index 970af3983d116..19c8cc5289d59 100644
+--- a/net/l2tp/l2tp_ip.c
++++ b/net/l2tp/l2tp_ip.c
+@@ -459,7 +459,7 @@ static int l2tp_ip_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+       fl4 = &inet->cork.fl.u.ip4;
+       if (connected)
+-              rt = (struct rtable *)__sk_dst_check(sk, 0);
++              rt = dst_rtable(__sk_dst_check(sk, 0));
+       rcu_read_lock();
+       if (!rt) {
+diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
+index 606349c8df0e6..4385fd3b13be3 100644
+--- a/net/mpls/mpls_iptunnel.c
++++ b/net/mpls/mpls_iptunnel.c
+@@ -81,7 +81,7 @@ static int mpls_xmit(struct sk_buff *skb)
+                       ttl = net->mpls.default_ttl;
+               else
+                       ttl = ip_hdr(skb)->ttl;
+-              rt = (struct rtable *)dst;
++              rt = dst_rtable(dst);
+       } else if (dst->ops->family == AF_INET6) {
+               if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DISABLED)
+                       ttl = tun_encap_info->default_ttl;
+diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
+index 5cd511162bc03..e1f17392f58c1 100644
+--- a/net/netfilter/ipvs/ip_vs_xmit.c
++++ b/net/netfilter/ipvs/ip_vs_xmit.c
+@@ -318,7 +318,7 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
+       if (dest) {
+               dest_dst = __ip_vs_dst_check(dest);
+               if (likely(dest_dst))
+-                      rt = (struct rtable *) dest_dst->dst_cache;
++                      rt = dst_rtable(dest_dst->dst_cache);
+               else {
+                       dest_dst = ip_vs_dest_dst_alloc();
+                       spin_lock_bh(&dest->dst_lock);
+diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
+index 100887beed314..c2c005234dcd3 100644
+--- a/net/netfilter/nf_flow_table_ip.c
++++ b/net/netfilter/nf_flow_table_ip.c
+@@ -434,7 +434,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
+               return NF_ACCEPT;
+       if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
+-              rt = (struct rtable *)tuplehash->tuple.dst_cache;
++              rt = dst_rtable(tuplehash->tuple.dst_cache);
+               memset(skb->cb, 0, sizeof(struct inet_skb_parm));
+               IPCB(skb)->iif = skb->dev->ifindex;
+               IPCB(skb)->flags = IPSKB_FORWARDED;
+@@ -446,7 +446,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
+       switch (tuplehash->tuple.xmit_type) {
+       case FLOW_OFFLOAD_XMIT_NEIGH:
+-              rt = (struct rtable *)tuplehash->tuple.dst_cache;
++              rt = dst_rtable(tuplehash->tuple.dst_cache);
+               outdev = rt->dst.dev;
+               skb->dev = outdev;
+               nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
+diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
+index 2434c624aafde..14d88394bcb7f 100644
+--- a/net/netfilter/nft_rt.c
++++ b/net/netfilter/nft_rt.c
+@@ -73,7 +73,7 @@ void nft_rt_get_eval(const struct nft_expr *expr,
+               if (nft_pf(pkt) != NFPROTO_IPV4)
+                       goto err;
+-              *dest = (__force u32)rt_nexthop((const struct rtable *)dst,
++              *dest = (__force u32)rt_nexthop(dst_rtable(dst),
+                                               ip_hdr(skb)->daddr);
+               break;
+       case NFT_RT_NEXTHOP6:
+diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
+index e849f368ed913..5a7436a13b741 100644
+--- a/net/sctp/protocol.c
++++ b/net/sctp/protocol.c
+@@ -552,7 +552,7 @@ static void sctp_v4_get_saddr(struct sctp_sock *sk,
+                             struct flowi *fl)
+ {
+       union sctp_addr *saddr = &t->saddr;
+-      struct rtable *rt = (struct rtable *)t->dst;
++      struct rtable *rt = dst_rtable(t->dst);
+       if (rt) {
+               saddr->v4.sin_family = AF_INET;
+@@ -1085,7 +1085,7 @@ static inline int sctp_v4_xmit(struct sk_buff *skb, struct sctp_transport *t)
+       skb_reset_inner_mac_header(skb);
+       skb_reset_inner_transport_header(skb);
+       skb_set_inner_ipproto(skb, IPPROTO_SCTP);
+-      udp_tunnel_xmit_skb((struct rtable *)dst, sk, skb, fl4->saddr,
++      udp_tunnel_xmit_skb(dst_rtable(dst), sk, skb, fl4->saddr,
+                           fl4->daddr, dscp, ip4_dst_hoplimit(dst), df,
+                           sctp_sk(sk)->udp_port, t->encap_port, false, false);
+       return 0;
+diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
+index f892b0903dbaf..b849a3d133a01 100644
+--- a/net/tipc/udp_media.c
++++ b/net/tipc/udp_media.c
+@@ -174,7 +174,7 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb,
+       local_bh_disable();
+       ndst = dst_cache_get(cache);
+       if (dst->proto == htons(ETH_P_IP)) {
+-              struct rtable *rt = (struct rtable *)ndst;
++              struct rtable *rt = dst_rtable(ndst);
+               if (!rt) {
+                       struct flowi4 fl = {
+-- 
+2.43.0
+
diff --git a/queue-6.9/ipv4-correctly-iterate-over-the-target-netns-in-inet.patch b/queue-6.9/ipv4-correctly-iterate-over-the-target-netns-in-inet.patch
new file mode 100644 (file)
index 0000000..53f2b8a
--- /dev/null
@@ -0,0 +1,47 @@
+From 942774d1433ca6d7939413077bfc1e4c738d60ae Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 May 2024 22:30:30 +0200
+Subject: ipv4: correctly iterate over the target netns in inet_dump_ifaddr()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
+
+[ Upstream commit b8c8abefc07b47f0dc9342530b7618237df96724 ]
+
+A recent change to inet_dump_ifaddr had the function incorrectly iterate
+over net rather than tgt_net, resulting in the data coming for the
+incorrect network namespace.
+
+Fixes: cdb2f80f1c10 ("inet: use xa_array iterator to implement inet_dump_ifaddr()")
+Reported-by: Stéphane Graber <stgraber@stgraber.org>
+Closes: https://github.com/lxc/incus/issues/892
+Bisected-by: Stéphane Graber <stgraber@stgraber.org>
+Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
+Tested-by: Stéphane Graber <stgraber@stgraber.org>
+Acked-by: Christian Brauner <brauner@kernel.org>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20240528203030.10839-1-aleksandr.mikhalitsyn@canonical.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/devinet.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
+index ee5fbc19b85fc..8382cc998bff8 100644
+--- a/net/ipv4/devinet.c
++++ b/net/ipv4/devinet.c
+@@ -1898,7 +1898,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
+       cb->seq = inet_base_seq(tgt_net);
+-      for_each_netdev_dump(net, dev, ctx->ifindex) {
++      for_each_netdev_dump(tgt_net, dev, ctx->ifindex) {
+               in_dev = __in_dev_get_rcu(dev);
+               if (!in_dev)
+                       continue;
+-- 
+2.43.0
+
diff --git a/queue-6.9/ipv4-fix-address-dump-when-ipv4-is-disabled-on-an-in.patch b/queue-6.9/ipv4-fix-address-dump-when-ipv4-is-disabled-on-an-in.patch
new file mode 100644 (file)
index 0000000..9aff25d
--- /dev/null
@@ -0,0 +1,93 @@
+From aed202b4490b49790d10e01d5c4acf1b13404769 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 May 2024 14:02:57 +0300
+Subject: ipv4: Fix address dump when IPv4 is disabled on an interface
+
+From: Ido Schimmel <idosch@nvidia.com>
+
+[ Upstream commit 7b05ab85e28f615e70520d24c075249b4512044e ]
+
+Cited commit started returning an error when user space requests to dump
+the interface's IPv4 addresses and IPv4 is disabled on the interface.
+Restore the previous behavior and do not return an error.
+
+Before cited commit:
+
+ # ip address show dev dummy1
+ 10: dummy1: <BROADCAST,NOARP,UP,LOWER_UP> mtu 1500 qdisc noqueue state UNKNOWN group default qlen 1000
+     link/ether e2:40:68:98:d0:18 brd ff:ff:ff:ff:ff:ff
+     inet6 fe80::e040:68ff:fe98:d018/64 scope link proto kernel_ll
+        valid_lft forever preferred_lft forever
+ # ip link set dev dummy1 mtu 67
+ # ip address show dev dummy1
+ 10: dummy1: <BROADCAST,NOARP,UP,LOWER_UP> mtu 67 qdisc noqueue state UNKNOWN group default qlen 1000
+     link/ether e2:40:68:98:d0:18 brd ff:ff:ff:ff:ff:ff
+
+After cited commit:
+
+ # ip address show dev dummy1
+ 10: dummy1: <BROADCAST,NOARP,UP,LOWER_UP> mtu 1500 qdisc noqueue state UNKNOWN group default qlen 1000
+     link/ether 32:2d:69:f2:9c:99 brd ff:ff:ff:ff:ff:ff
+     inet6 fe80::302d:69ff:fef2:9c99/64 scope link proto kernel_ll
+        valid_lft forever preferred_lft forever
+ # ip link set dev dummy1 mtu 67
+ # ip address show dev dummy1
+ RTNETLINK answers: No such device
+ Dump terminated
+
+With this patch:
+
+ # ip address show dev dummy1
+ 10: dummy1: <BROADCAST,NOARP,UP,LOWER_UP> mtu 1500 qdisc noqueue state UNKNOWN group default qlen 1000
+     link/ether de:17:56:bb:57:c0 brd ff:ff:ff:ff:ff:ff
+     inet6 fe80::dc17:56ff:febb:57c0/64 scope link proto kernel_ll
+        valid_lft forever preferred_lft forever
+ # ip link set dev dummy1 mtu 67
+ # ip address show dev dummy1
+ 10: dummy1: <BROADCAST,NOARP,UP,LOWER_UP> mtu 67 qdisc noqueue state UNKNOWN group default qlen 1000
+     link/ether de:17:56:bb:57:c0 brd ff:ff:ff:ff:ff:ff
+
+I fixed the exact same issue for IPv6 in commit c04f7dfe6ec2 ("ipv6: Fix
+address dump when IPv6 is disabled on an interface"), but noted [1] that
+I am not doing the change for IPv4 because I am not aware of a way to
+disable IPv4 on an interface other than unregistering it. I clearly
+missed the above case.
+
+[1] https://lore.kernel.org/netdev/20240321173042.2151756-1-idosch@nvidia.com/
+
+Fixes: cdb2f80f1c10 ("inet: use xa_array iterator to implement inet_dump_ifaddr()")
+Reported-by: Carolina Jubran <cjubran@nvidia.com>
+Reported-by: Yamen Safadi <ysafadi@nvidia.com>
+Tested-by: Carolina Jubran <cjubran@nvidia.com>
+Reviewed-by: Petr Machata <petrm@nvidia.com>
+Signed-off-by: Ido Schimmel <idosch@nvidia.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://lore.kernel.org/r/20240523110257.334315-1-idosch@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/devinet.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
+index 7e45c34c8340a..ee5fbc19b85fc 100644
+--- a/net/ipv4/devinet.c
++++ b/net/ipv4/devinet.c
+@@ -1882,10 +1882,11 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
+                       goto done;
+               if (fillargs.ifindex) {
+-                      err = -ENODEV;
+                       dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex);
+-                      if (!dev)
++                      if (!dev) {
++                              err = -ENODEV;
+                               goto done;
++                      }
+                       in_dev = __in_dev_get_rcu(dev);
+                       if (!in_dev)
+                               goto done;
+-- 
+2.43.0
+
diff --git a/queue-6.9/ipv6-introduce-dst_rt6_info-helper.patch b/queue-6.9/ipv6-introduce-dst_rt6_info-helper.patch
new file mode 100644 (file)
index 0000000..7aa4547
--- /dev/null
@@ -0,0 +1,841 @@
+From aeca0b8ce99996b73fe3ef480125e0ae1c76559b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 26 Apr 2024 15:19:52 +0000
+Subject: ipv6: introduce dst_rt6_info() helper
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit e8dfd42c17faf183415323db1ef0c977be0d6489 ]
+
+Instead of (struct rt6_info *)dst casts, we can use :
+
+ #define dst_rt6_info(_ptr) \
+         container_of_const(_ptr, struct rt6_info, dst)
+
+Some places needed missing const qualifiers :
+
+ip6_confirm_neigh(), ipv6_anycast_destination(),
+ipv6_unicast_destination(), has_gateway()
+
+v2: added missing parts (David Ahern)
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: 92f1655aa2b2 ("net: fix __dst_negative_advice() race")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/infiniband/core/addr.c                |  6 ++--
+ .../ethernet/mellanox/mlxsw/spectrum_span.c   |  2 +-
+ drivers/net/vrf.c                             |  2 +-
+ drivers/net/vxlan/vxlan_core.c                |  2 +-
+ drivers/s390/net/qeth_core.h                  |  4 +--
+ include/net/ip6_fib.h                         |  6 ++--
+ include/net/ip6_route.h                       | 11 ++++----
+ net/bluetooth/6lowpan.c                       |  2 +-
+ net/core/dst_cache.c                          |  2 +-
+ net/core/filter.c                             |  2 +-
+ net/ipv4/ip_tunnel.c                          |  2 +-
+ net/ipv6/icmp.c                               |  8 +++---
+ net/ipv6/ila/ila_lwt.c                        |  4 +--
+ net/ipv6/ip6_output.c                         | 18 ++++++------
+ net/ipv6/ip6mr.c                              |  2 +-
+ net/ipv6/ndisc.c                              |  2 +-
+ net/ipv6/ping.c                               |  2 +-
+ net/ipv6/raw.c                                |  4 +--
+ net/ipv6/route.c                              | 28 +++++++++----------
+ net/ipv6/tcp_ipv6.c                           |  4 +--
+ net/ipv6/udp.c                                | 11 +++-----
+ net/ipv6/xfrm6_policy.c                       |  2 +-
+ net/l2tp/l2tp_ip6.c                           |  2 +-
+ net/mpls/mpls_iptunnel.c                      |  2 +-
+ net/netfilter/ipvs/ip_vs_xmit.c               | 14 +++++-----
+ net/netfilter/nf_flow_table_core.c            |  8 ++----
+ net/netfilter/nf_flow_table_ip.c              |  4 +--
+ net/netfilter/nft_rt.c                        |  2 +-
+ net/sctp/ipv6.c                               |  2 +-
+ net/xfrm/xfrm_policy.c                        |  3 +-
+ 30 files changed, 77 insertions(+), 86 deletions(-)
+
+diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
+index f253295795f0a..f20dfe70fa0e4 100644
+--- a/drivers/infiniband/core/addr.c
++++ b/drivers/infiniband/core/addr.c
+@@ -348,15 +348,15 @@ static int dst_fetch_ha(const struct dst_entry *dst,
+ static bool has_gateway(const struct dst_entry *dst, sa_family_t family)
+ {
+-      struct rtable *rt;
+-      struct rt6_info *rt6;
++      const struct rtable *rt;
++      const struct rt6_info *rt6;
+       if (family == AF_INET) {
+               rt = container_of(dst, struct rtable, dst);
+               return rt->rt_uses_gateway;
+       }
+-      rt6 = container_of(dst, struct rt6_info, dst);
++      rt6 = dst_rt6_info(dst);
+       return rt6->rt6i_flags & RTF_GATEWAY;
+ }
+diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+index af50ff9e5f267..ce49c9514f911 100644
+--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+@@ -539,7 +539,7 @@ mlxsw_sp_span_gretap6_route(const struct net_device *to_dev,
+       if (!dst || dst->error)
+               goto out;
+-      rt6 = container_of(dst, struct rt6_info, dst);
++      rt6 = dst_rt6_info(dst);
+       dev = dst->dev;
+       *saddrp = fl6.saddr;
+diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
+index bb95ce43cd97d..71cfa03a77449 100644
+--- a/drivers/net/vrf.c
++++ b/drivers/net/vrf.c
+@@ -653,7 +653,7 @@ static int vrf_finish_output6(struct net *net, struct sock *sk,
+       skb->dev = dev;
+       rcu_read_lock();
+-      nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
++      nexthop = rt6_nexthop(dst_rt6_info(dst), &ipv6_hdr(skb)->daddr);
+       neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
+       if (unlikely(!neigh))
+               neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
+diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
+index 3a9148fb1422b..6b64f28a9174d 100644
+--- a/drivers/net/vxlan/vxlan_core.c
++++ b/drivers/net/vxlan/vxlan_core.c
+@@ -2528,7 +2528,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
+               }
+               if (!info) {
+-                      u32 rt6i_flags = ((struct rt6_info *)ndst)->rt6i_flags;
++                      u32 rt6i_flags = dst_rt6_info(ndst)->rt6i_flags;
+                       err = encap_bypass_if_local(skb, dev, vxlan, AF_INET6,
+                                                   dst_port, ifindex, vni,
+diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
+index 613eab7297046..5f17a2a5d0e33 100644
+--- a/drivers/s390/net/qeth_core.h
++++ b/drivers/s390/net/qeth_core.h
+@@ -956,7 +956,7 @@ static inline struct dst_entry *qeth_dst_check_rcu(struct sk_buff *skb,
+       struct dst_entry *dst = skb_dst(skb);
+       struct rt6_info *rt;
+-      rt = (struct rt6_info *) dst;
++      rt = dst_rt6_info(dst);
+       if (dst) {
+               if (proto == htons(ETH_P_IPV6))
+                       dst = dst_check(dst, rt6_get_cookie(rt));
+@@ -978,7 +978,7 @@ static inline __be32 qeth_next_hop_v4_rcu(struct sk_buff *skb,
+ static inline struct in6_addr *qeth_next_hop_v6_rcu(struct sk_buff *skb,
+                                                   struct dst_entry *dst)
+ {
+-      struct rt6_info *rt = (struct rt6_info *) dst;
++      struct rt6_info *rt = dst_rt6_info(dst);
+       if (rt && !ipv6_addr_any(&rt->rt6i_gateway))
+               return &rt->rt6i_gateway;
+diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
+index 323c94f1845b9..73524fa0c064b 100644
+--- a/include/net/ip6_fib.h
++++ b/include/net/ip6_fib.h
+@@ -234,9 +234,11 @@ struct fib6_result {
+       for (rt = (w)->leaf; rt;                                        \
+            rt = rcu_dereference_protected(rt->fib6_next, 1))
+-static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst)
++#define dst_rt6_info(_ptr) container_of_const(_ptr, struct rt6_info, dst)
++
++static inline struct inet6_dev *ip6_dst_idev(const struct dst_entry *dst)
+ {
+-      return ((struct rt6_info *)dst)->rt6i_idev;
++      return dst_rt6_info(dst)->rt6i_idev;
+ }
+ static inline bool fib6_requires_src(const struct fib6_info *rt)
+diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
+index a30c6aa9e5cf3..a18ed24fed948 100644
+--- a/include/net/ip6_route.h
++++ b/include/net/ip6_route.h
+@@ -210,12 +210,11 @@ void rt6_uncached_list_del(struct rt6_info *rt);
+ static inline const struct rt6_info *skb_rt6_info(const struct sk_buff *skb)
+ {
+       const struct dst_entry *dst = skb_dst(skb);
+-      const struct rt6_info *rt6 = NULL;
+       if (dst)
+-              rt6 = container_of(dst, struct rt6_info, dst);
++              return dst_rt6_info(dst);
+-      return rt6;
++      return NULL;
+ }
+ /*
+@@ -227,7 +226,7 @@ static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst,
+ {
+       struct ipv6_pinfo *np = inet6_sk(sk);
+-      np->dst_cookie = rt6_get_cookie((struct rt6_info *)dst);
++      np->dst_cookie = rt6_get_cookie(dst_rt6_info(dst));
+       sk_setup_caps(sk, dst);
+       np->daddr_cache = daddr;
+ #ifdef CONFIG_IPV6_SUBTREES
+@@ -240,7 +239,7 @@ void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
+ static inline bool ipv6_unicast_destination(const struct sk_buff *skb)
+ {
+-      struct rt6_info *rt = (struct rt6_info *) skb_dst(skb);
++      const struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
+       return rt->rt6i_flags & RTF_LOCAL;
+ }
+@@ -248,7 +247,7 @@ static inline bool ipv6_unicast_destination(const struct sk_buff *skb)
+ static inline bool ipv6_anycast_destination(const struct dst_entry *dst,
+                                           const struct in6_addr *daddr)
+ {
+-      struct rt6_info *rt = (struct rt6_info *)dst;
++      const struct rt6_info *rt = dst_rt6_info(dst);
+       return rt->rt6i_flags & RTF_ANYCAST ||
+               (rt->rt6i_dst.plen < 127 &&
+diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
+index 27520a8a486f3..50cfec8ccac4f 100644
+--- a/net/bluetooth/6lowpan.c
++++ b/net/bluetooth/6lowpan.c
+@@ -133,7 +133,7 @@ static inline struct lowpan_peer *peer_lookup_dst(struct lowpan_btle_dev *dev,
+                                                 struct in6_addr *daddr,
+                                                 struct sk_buff *skb)
+ {
+-      struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
++      struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
+       int count = atomic_read(&dev->peer_count);
+       const struct in6_addr *nexthop;
+       struct lowpan_peer *peer;
+diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c
+index 0ccfd5fa5cb9b..b17171345d649 100644
+--- a/net/core/dst_cache.c
++++ b/net/core/dst_cache.c
+@@ -112,7 +112,7 @@ void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
+       idst = this_cpu_ptr(dst_cache->cache);
+       dst_cache_per_cpu_dst_set(this_cpu_ptr(dst_cache->cache), dst,
+-                                rt6_get_cookie((struct rt6_info *)dst));
++                                rt6_get_cookie(dst_rt6_info(dst)));
+       idst->in6_saddr = *saddr;
+ }
+ EXPORT_SYMBOL_GPL(dst_cache_set_ip6);
+diff --git a/net/core/filter.c b/net/core/filter.c
+index ae5254f712c94..03c1fdd111f25 100644
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -2215,7 +2215,7 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
+       rcu_read_lock();
+       if (!nh) {
+               dst = skb_dst(skb);
+-              nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
++              nexthop = rt6_nexthop(dst_rt6_info(dst),
+                                     &ipv6_hdr(skb)->daddr);
+       } else {
+               nexthop = &nh->ipv6_nh;
+diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
+index 1b8d8ff9a2375..0e4bd528428e9 100644
+--- a/net/ipv4/ip_tunnel.c
++++ b/net/ipv4/ip_tunnel.c
+@@ -543,7 +543,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
+               struct rt6_info *rt6;
+               __be32 daddr;
+-              rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) :
++              rt6 = skb_valid_dst(skb) ? dst_rt6_info(skb_dst(skb)) :
+                                          NULL;
+               daddr = md ? dst : tunnel->parms.iph.daddr;
+diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
+index 1635da07285f2..d285c1f6f1a61 100644
+--- a/net/ipv6/icmp.c
++++ b/net/ipv6/icmp.c
+@@ -212,7 +212,7 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
+       } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
+               res = true;
+       } else {
+-              struct rt6_info *rt = (struct rt6_info *)dst;
++              struct rt6_info *rt = dst_rt6_info(dst);
+               int tmo = net->ipv6.sysctl.icmpv6_time;
+               struct inet_peer *peer;
+@@ -241,7 +241,7 @@ static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type,
+       dst = ip6_route_output(net, sk, fl6);
+       if (!dst->error) {
+-              struct rt6_info *rt = (struct rt6_info *)dst;
++              struct rt6_info *rt = dst_rt6_info(dst);
+               struct in6_addr prefsrc;
+               rt6_get_prefsrc(rt, &prefsrc);
+@@ -616,7 +616,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
+       if (ip6_append_data(sk, icmpv6_getfrag, &msg,
+                           len + sizeof(struct icmp6hdr),
+                           sizeof(struct icmp6hdr),
+-                          &ipc6, &fl6, (struct rt6_info *)dst,
++                          &ipc6, &fl6, dst_rt6_info(dst),
+                           MSG_DONTWAIT)) {
+               ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
+               ip6_flush_pending_frames(sk);
+@@ -803,7 +803,7 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
+       if (ip6_append_data(sk, icmpv6_getfrag, &msg,
+                           skb->len + sizeof(struct icmp6hdr),
+                           sizeof(struct icmp6hdr), &ipc6, &fl6,
+-                          (struct rt6_info *)dst, MSG_DONTWAIT)) {
++                          dst_rt6_info(dst), MSG_DONTWAIT)) {
+               __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
+               ip6_flush_pending_frames(sk);
+       } else {
+diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
+index 8c1ce78956bae..0601bad798221 100644
+--- a/net/ipv6/ila/ila_lwt.c
++++ b/net/ipv6/ila/ila_lwt.c
+@@ -38,7 +38,7 @@ static inline struct ila_params *ila_params_lwtunnel(
+ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+ {
+       struct dst_entry *orig_dst = skb_dst(skb);
+-      struct rt6_info *rt = (struct rt6_info *)orig_dst;
++      struct rt6_info *rt = dst_rt6_info(orig_dst);
+       struct ila_lwt *ilwt = ila_lwt_lwtunnel(orig_dst->lwtstate);
+       struct dst_entry *dst;
+       int err = -EINVAL;
+@@ -70,7 +70,7 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+               memset(&fl6, 0, sizeof(fl6));
+               fl6.flowi6_oif = orig_dst->dev->ifindex;
+               fl6.flowi6_iif = LOOPBACK_IFINDEX;
+-              fl6.daddr = *rt6_nexthop((struct rt6_info *)orig_dst,
++              fl6.daddr = *rt6_nexthop(dst_rt6_info(orig_dst),
+                                        &ip6h->daddr);
+               dst = ip6_route_output(net, NULL, &fl6);
+diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
+index 97b0788b31bae..27d8725445e35 100644
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -120,7 +120,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
+       IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
+       rcu_read_lock();
+-      nexthop = rt6_nexthop((struct rt6_info *)dst, daddr);
++      nexthop = rt6_nexthop(dst_rt6_info(dst), daddr);
+       neigh = __ipv6_neigh_lookup_noref(dev, nexthop);
+       if (unlikely(IS_ERR_OR_NULL(neigh))) {
+@@ -599,7 +599,7 @@ int ip6_forward(struct sk_buff *skb)
+                *      send a redirect.
+                */
+-              rt = (struct rt6_info *) dst;
++              rt = dst_rt6_info(dst);
+               if (rt->rt6i_flags & RTF_GATEWAY)
+                       target = &rt->rt6i_gateway;
+               else
+@@ -856,7 +856,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
+                int (*output)(struct net *, struct sock *, struct sk_buff *))
+ {
+       struct sk_buff *frag;
+-      struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
++      struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
+       struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
+                               inet6_sk(skb->sk) : NULL;
+       bool mono_delivery_time = skb->mono_delivery_time;
+@@ -1063,7 +1063,7 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
+               return NULL;
+       }
+-      rt = (struct rt6_info *)dst;
++      rt = dst_rt6_info(dst);
+       /* Yes, checking route validity in not connected
+        * case is not very simple. Take into account,
+        * that we do not support routing by source, TOS,
+@@ -1118,7 +1118,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
+               struct rt6_info *rt;
+               *dst = ip6_route_output(net, sk, fl6);
+-              rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
++              rt = (*dst)->error ? NULL : dst_rt6_info(*dst);
+               rcu_read_lock();
+               from = rt ? rcu_dereference(rt->from) : NULL;
+@@ -1159,7 +1159,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
+        * dst entry and replace it instead with the
+        * dst entry of the nexthop router
+        */
+-      rt = (struct rt6_info *) *dst;
++      rt = dst_rt6_info(*dst);
+       rcu_read_lock();
+       n = __ipv6_neigh_lookup_noref(rt->dst.dev,
+                                     rt6_nexthop(rt, &fl6->daddr));
+@@ -1423,7 +1423,7 @@ static int __ip6_append_data(struct sock *sk,
+       int offset = 0;
+       bool zc = false;
+       u32 tskey = 0;
+-      struct rt6_info *rt = (struct rt6_info *)cork->dst;
++      struct rt6_info *rt = dst_rt6_info(cork->dst);
+       bool paged, hold_tskey, extra_uref = false;
+       struct ipv6_txoptions *opt = v6_cork->opt;
+       int csummode = CHECKSUM_NONE;
+@@ -1877,7 +1877,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
+       struct net *net = sock_net(sk);
+       struct ipv6hdr *hdr;
+       struct ipv6_txoptions *opt = v6_cork->opt;
+-      struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
++      struct rt6_info *rt = dst_rt6_info(cork->base.dst);
+       struct flowi6 *fl6 = &cork->fl.u.ip6;
+       unsigned char proto = fl6->flowi6_proto;
+@@ -1949,7 +1949,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
+ int ip6_send_skb(struct sk_buff *skb)
+ {
+       struct net *net = sock_net(skb->sk);
+-      struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
++      struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
+       int err;
+       err = ip6_local_out(net, skb->sk, skb);
+diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
+index cb0ee81a068a4..dd342e6ecf3f4 100644
+--- a/net/ipv6/ip6mr.c
++++ b/net/ipv6/ip6mr.c
+@@ -2273,7 +2273,7 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
+       int err;
+       struct mr_table *mrt;
+       struct mfc6_cache *cache;
+-      struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
++      struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
+       mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
+       if (!mrt)
+diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
+index ae134634c323c..d914b23256ce6 100644
+--- a/net/ipv6/ndisc.c
++++ b/net/ipv6/ndisc.c
+@@ -1722,7 +1722,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
+       if (IS_ERR(dst))
+               return;
+-      rt = (struct rt6_info *) dst;
++      rt = dst_rt6_info(dst);
+       if (rt->rt6i_flags & RTF_GATEWAY) {
+               ND_PRINTK(2, warn,
+diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
+index ef2059c889554..88b3fcacd4f94 100644
+--- a/net/ipv6/ping.c
++++ b/net/ipv6/ping.c
+@@ -154,7 +154,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+       dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, false);
+       if (IS_ERR(dst))
+               return PTR_ERR(dst);
+-      rt = (struct rt6_info *) dst;
++      rt = dst_rt6_info(dst);
+       if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
+               fl6.flowi6_oif = READ_ONCE(np->mcast_oif);
+diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
+index 0d896ca7b5891..2eedf255600b9 100644
+--- a/net/ipv6/raw.c
++++ b/net/ipv6/raw.c
+@@ -598,7 +598,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
+       struct ipv6hdr *iph;
+       struct sk_buff *skb;
+       int err;
+-      struct rt6_info *rt = (struct rt6_info *)*dstp;
++      struct rt6_info *rt = dst_rt6_info(*dstp);
+       int hlen = LL_RESERVED_SPACE(rt->dst.dev);
+       int tlen = rt->dst.dev->needed_tailroom;
+@@ -917,7 +917,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+               ipc6.opt = opt;
+               lock_sock(sk);
+               err = ip6_append_data(sk, raw6_getfrag, &rfv,
+-                      len, 0, &ipc6, &fl6, (struct rt6_info *)dst,
++                      len, 0, &ipc6, &fl6, dst_rt6_info(dst),
+                       msg->msg_flags);
+               if (err)
+diff --git a/net/ipv6/route.c b/net/ipv6/route.c
+index 1f4b935a0e57a..3e0b2cb20fd20 100644
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -226,7 +226,7 @@ static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
+                                             struct sk_buff *skb,
+                                             const void *daddr)
+ {
+-      const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
++      const struct rt6_info *rt = dst_rt6_info(dst);
+       return ip6_neigh_lookup(rt6_nexthop(rt, &in6addr_any),
+                               dst->dev, skb, daddr);
+@@ -234,8 +234,8 @@ static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
+ static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
+ {
++      const struct rt6_info *rt = dst_rt6_info(dst);
+       struct net_device *dev = dst->dev;
+-      struct rt6_info *rt = (struct rt6_info *)dst;
+       daddr = choose_neigh_daddr(rt6_nexthop(rt, &in6addr_any), NULL, daddr);
+       if (!daddr)
+@@ -354,7 +354,7 @@ EXPORT_SYMBOL(ip6_dst_alloc);
+ static void ip6_dst_destroy(struct dst_entry *dst)
+ {
+-      struct rt6_info *rt = (struct rt6_info *)dst;
++      struct rt6_info *rt = dst_rt6_info(dst);
+       struct fib6_info *from;
+       struct inet6_dev *idev;
+@@ -373,7 +373,7 @@ static void ip6_dst_destroy(struct dst_entry *dst)
+ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
+ {
+-      struct rt6_info *rt = (struct rt6_info *)dst;
++      struct rt6_info *rt = dst_rt6_info(dst);
+       struct inet6_dev *idev = rt->rt6i_idev;
+       if (idev && idev->dev != blackhole_netdev) {
+@@ -1288,7 +1288,7 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
+       dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
+       if (dst->error == 0)
+-              return (struct rt6_info *) dst;
++              return dst_rt6_info(dst);
+       dst_release(dst);
+@@ -2647,7 +2647,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net,
+       rcu_read_lock();
+       dst = ip6_route_output_flags_noref(net, sk, fl6, flags);
+-      rt6 = (struct rt6_info *)dst;
++      rt6 = dst_rt6_info(dst);
+       /* For dst cached in uncached_list, refcnt is already taken. */
+       if (list_empty(&rt6->dst.rt_uncached) && !dst_hold_safe(dst)) {
+               dst = &net->ipv6.ip6_null_entry->dst;
+@@ -2661,7 +2661,7 @@ EXPORT_SYMBOL_GPL(ip6_route_output_flags);
+ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
+ {
+-      struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
++      struct rt6_info *rt, *ort = dst_rt6_info(dst_orig);
+       struct net_device *loopback_dev = net->loopback_dev;
+       struct dst_entry *new = NULL;
+@@ -2744,7 +2744,7 @@ INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst,
+       struct fib6_info *from;
+       struct rt6_info *rt;
+-      rt = container_of(dst, struct rt6_info, dst);
++      rt = dst_rt6_info(dst);
+       if (rt->sernum)
+               return rt6_is_valid(rt) ? dst : NULL;
+@@ -2772,7 +2772,7 @@ EXPORT_INDIRECT_CALLABLE(ip6_dst_check);
+ static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
+ {
+-      struct rt6_info *rt = (struct rt6_info *) dst;
++      struct rt6_info *rt = dst_rt6_info(dst);
+       if (rt) {
+               if (rt->rt6i_flags & RTF_CACHE) {
+@@ -2796,7 +2796,7 @@ static void ip6_link_failure(struct sk_buff *skb)
+       icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
+-      rt = (struct rt6_info *) skb_dst(skb);
++      rt = dst_rt6_info(skb_dst(skb));
+       if (rt) {
+               rcu_read_lock();
+               if (rt->rt6i_flags & RTF_CACHE) {
+@@ -2852,7 +2852,7 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
+                                bool confirm_neigh)
+ {
+       const struct in6_addr *daddr, *saddr;
+-      struct rt6_info *rt6 = (struct rt6_info *)dst;
++      struct rt6_info *rt6 = dst_rt6_info(dst);
+       /* Note: do *NOT* check dst_metric_locked(dst, RTAX_MTU)
+        * IPv6 pmtu discovery isn't optional, so 'mtu lock' cannot disable it.
+@@ -4174,7 +4174,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
+               }
+       }
+-      rt = (struct rt6_info *) dst;
++      rt = dst_rt6_info(dst);
+       if (rt->rt6i_flags & RTF_REJECT) {
+               net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
+               return;
+@@ -5608,7 +5608,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
+                        int iif, int type, u32 portid, u32 seq,
+                        unsigned int flags)
+ {
+-      struct rt6_info *rt6 = (struct rt6_info *)dst;
++      struct rt6_info *rt6 = dst_rt6_info(dst);
+       struct rt6key *rt6_dst, *rt6_src;
+       u32 *pmetrics, table, rt6_flags;
+       unsigned char nh_flags = 0;
+@@ -6111,7 +6111,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+       }
+-      rt = container_of(dst, struct rt6_info, dst);
++      rt = dst_rt6_info(dst);
+       if (rt->dst.error) {
+               err = rt->dst.error;
+               ip6_rt_put(rt);
+diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
+index b149f54120682..423538535df67 100644
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -95,11 +95,9 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
+       struct dst_entry *dst = skb_dst(skb);
+       if (dst && dst_hold_safe(dst)) {
+-              const struct rt6_info *rt = (const struct rt6_info *)dst;
+-
+               rcu_assign_pointer(sk->sk_rx_dst, dst);
+               sk->sk_rx_dst_ifindex = skb->skb_iif;
+-              sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
++              sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst));
+       }
+ }
+diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
+index e0dd5bc2b30eb..acafa0cdf74a8 100644
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -910,11 +910,8 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
+ static void udp6_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
+ {
+-      if (udp_sk_rx_dst_set(sk, dst)) {
+-              const struct rt6_info *rt = (const struct rt6_info *)dst;
+-
+-              sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
+-      }
++      if (udp_sk_rx_dst_set(sk, dst))
++              sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst));
+ }
+ /* wrapper for udp_queue_rcv_skb tacking care of csum conversion and
+@@ -1585,7 +1582,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+               skb = ip6_make_skb(sk, getfrag, msg, ulen,
+                                  sizeof(struct udphdr), &ipc6,
+-                                 (struct rt6_info *)dst,
++                                 dst_rt6_info(dst),
+                                  msg->msg_flags, &cork);
+               err = PTR_ERR(skb);
+               if (!IS_ERR_OR_NULL(skb))
+@@ -1612,7 +1609,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+               ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk);
+       up->len += ulen;
+       err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr),
+-                            &ipc6, fl6, (struct rt6_info *)dst,
++                            &ipc6, fl6, dst_rt6_info(dst),
+                             corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
+       if (err)
+               udp_v6_flush_pending_frames(sk);
+diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
+index 42fb6996b0777..ce48173c60e56 100644
+--- a/net/ipv6/xfrm6_policy.c
++++ b/net/ipv6/xfrm6_policy.c
+@@ -70,7 +70,7 @@ static int xfrm6_get_saddr(struct net *net, int oif,
+ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
+                         const struct flowi *fl)
+ {
+-      struct rt6_info *rt = (struct rt6_info *)xdst->route;
++      struct rt6_info *rt = dst_rt6_info(xdst->route);
+       xdst->u.dst.dev = dev;
+       netdev_hold(dev, &xdst->u.dst.dev_tracker, GFP_ATOMIC);
+diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
+index 7bf14cf9ffaa9..8780ec64f3769 100644
+--- a/net/l2tp/l2tp_ip6.c
++++ b/net/l2tp/l2tp_ip6.c
+@@ -630,7 +630,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+       ulen = len + (skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0);
+       err = ip6_append_data(sk, ip_generic_getfrag, msg,
+                             ulen, transhdrlen, &ipc6,
+-                            &fl6, (struct rt6_info *)dst,
++                            &fl6, dst_rt6_info(dst),
+                             msg->msg_flags);
+       if (err)
+               ip6_flush_pending_frames(sk);
+diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
+index 8fc790f2a01bb..606349c8df0e6 100644
+--- a/net/mpls/mpls_iptunnel.c
++++ b/net/mpls/mpls_iptunnel.c
+@@ -90,7 +90,7 @@ static int mpls_xmit(struct sk_buff *skb)
+                       ttl = net->mpls.default_ttl;
+               else
+                       ttl = ipv6_hdr(skb)->hop_limit;
+-              rt6 = (struct rt6_info *)dst;
++              rt6 = dst_rt6_info(dst);
+       } else {
+               goto drop;
+       }
+diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
+index 65e0259178da4..5cd511162bc03 100644
+--- a/net/netfilter/ipvs/ip_vs_xmit.c
++++ b/net/netfilter/ipvs/ip_vs_xmit.c
+@@ -180,7 +180,7 @@ static inline bool crosses_local_route_boundary(int skb_af, struct sk_buff *skb,
+                       (!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
+                       (addr_type & IPV6_ADDR_LOOPBACK);
+               old_rt_is_local = __ip_vs_is_local_route6(
+-                      (struct rt6_info *)skb_dst(skb));
++                      dst_rt6_info(skb_dst(skb)));
+       } else
+ #endif
+       {
+@@ -481,7 +481,7 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
+       if (dest) {
+               dest_dst = __ip_vs_dst_check(dest);
+               if (likely(dest_dst))
+-                      rt = (struct rt6_info *) dest_dst->dst_cache;
++                      rt = dst_rt6_info(dest_dst->dst_cache);
+               else {
+                       u32 cookie;
+@@ -501,7 +501,7 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
+                               ip_vs_dest_dst_free(dest_dst);
+                               goto err_unreach;
+                       }
+-                      rt = (struct rt6_info *) dst;
++                      rt = dst_rt6_info(dst);
+                       cookie = rt6_get_cookie(rt);
+                       __ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie);
+                       spin_unlock_bh(&dest->dst_lock);
+@@ -517,7 +517,7 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
+                                             rt_mode);
+               if (!dst)
+                       goto err_unreach;
+-              rt = (struct rt6_info *) dst;
++              rt = dst_rt6_info(dst);
+       }
+       local = __ip_vs_is_local_route6(rt);
+@@ -862,7 +862,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+                                     IP_VS_RT_MODE_RDR);
+       if (local < 0)
+               goto tx_error;
+-      rt = (struct rt6_info *) skb_dst(skb);
++      rt = dst_rt6_info(skb_dst(skb));
+       /*
+        * Avoid duplicate tuple in reply direction for NAT traffic
+        * to local address when connection is sync-ed
+@@ -1288,7 +1288,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+       if (local)
+               return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
+-      rt = (struct rt6_info *) skb_dst(skb);
++      rt = dst_rt6_info(skb_dst(skb));
+       tdev = rt->dst.dev;
+       /*
+@@ -1590,7 +1590,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
+                                     &cp->daddr.in6, NULL, ipvsh, 0, rt_mode);
+       if (local < 0)
+               goto tx_error;
+-      rt = (struct rt6_info *) skb_dst(skb);
++      rt = dst_rt6_info(skb_dst(skb));
+       /*
+        * Avoid duplicate tuple in reply direction for NAT traffic
+        * to local address when connection is sync-ed
+diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
+index a0571339239c4..5c1ff07eaee0b 100644
+--- a/net/netfilter/nf_flow_table_core.c
++++ b/net/netfilter/nf_flow_table_core.c
+@@ -77,12 +77,8 @@ EXPORT_SYMBOL_GPL(flow_offload_alloc);
+ static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
+ {
+-      const struct rt6_info *rt;
+-
+-      if (flow_tuple->l3proto == NFPROTO_IPV6) {
+-              rt = (const struct rt6_info *)flow_tuple->dst_cache;
+-              return rt6_get_cookie(rt);
+-      }
++      if (flow_tuple->l3proto == NFPROTO_IPV6)
++              return rt6_get_cookie(dst_rt6_info(flow_tuple->dst_cache));
+       return 0;
+ }
+diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
+index 5383bed3d3e00..100887beed314 100644
+--- a/net/netfilter/nf_flow_table_ip.c
++++ b/net/netfilter/nf_flow_table_ip.c
+@@ -729,7 +729,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
+               return NF_ACCEPT;
+       if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
+-              rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
++              rt = dst_rt6_info(tuplehash->tuple.dst_cache);
+               memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
+               IP6CB(skb)->iif = skb->dev->ifindex;
+               IP6CB(skb)->flags = IP6SKB_FORWARDED;
+@@ -741,7 +741,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
+       switch (tuplehash->tuple.xmit_type) {
+       case FLOW_OFFLOAD_XMIT_NEIGH:
+-              rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
++              rt = dst_rt6_info(tuplehash->tuple.dst_cache);
+               outdev = rt->dst.dev;
+               skb->dev = outdev;
+               nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
+diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
+index 24d9771385729..2434c624aafde 100644
+--- a/net/netfilter/nft_rt.c
++++ b/net/netfilter/nft_rt.c
+@@ -80,7 +80,7 @@ void nft_rt_get_eval(const struct nft_expr *expr,
+               if (nft_pf(pkt) != NFPROTO_IPV6)
+                       goto err;
+-              memcpy(dest, rt6_nexthop((struct rt6_info *)dst,
++              memcpy(dest, rt6_nexthop(dst_rt6_info(dst),
+                                        &ipv6_hdr(skb)->daddr),
+                      sizeof(struct in6_addr));
+               break;
+diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
+index 24368f755ab19..f7b809c0d142c 100644
+--- a/net/sctp/ipv6.c
++++ b/net/sctp/ipv6.c
+@@ -415,7 +415,7 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
+       if (!IS_ERR_OR_NULL(dst)) {
+               struct rt6_info *rt;
+-              rt = (struct rt6_info *)dst;
++              rt = dst_rt6_info(dst);
+               t->dst_cookie = rt6_get_cookie(rt);
+               pr_debug("rt6_dst:%pI6/%d rt6_src:%pI6\n",
+                        &rt->rt6i_dst.addr, rt->rt6i_dst.plen,
+diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
+index 53d8fabfa6858..1702eea537e7e 100644
+--- a/net/xfrm/xfrm_policy.c
++++ b/net/xfrm/xfrm_policy.c
+@@ -2598,8 +2598,7 @@ static void xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
+                          int nfheader_len)
+ {
+       if (dst->ops->family == AF_INET6) {
+-              struct rt6_info *rt = (struct rt6_info *)dst;
+-              path->path_cookie = rt6_get_cookie(rt);
++              path->path_cookie = rt6_get_cookie(dst_rt6_info(dst));
+               path->u.rt6.rt6i_nfheader_len = nfheader_len;
+       }
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.9/ipvlan-dont-use-skb-sk-in-ipvlan_process_v-4-6-_outb.patch b/queue-6.9/ipvlan-dont-use-skb-sk-in-ipvlan_process_v-4-6-_outb.patch
new file mode 100644 (file)
index 0000000..33b366d
--- /dev/null
@@ -0,0 +1,107 @@
+From d7e0abcfac9cdf96919fbef640f14c0ad9f4de03 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 29 May 2024 17:56:33 +0800
+Subject: ipvlan: Dont Use skb->sk in ipvlan_process_v{4,6}_outbound
+
+From: Yue Haibing <yuehaibing@huawei.com>
+
+[ Upstream commit b3dc6e8003b500861fa307e9a3400c52e78e4d3a ]
+
+Raw packet from PF_PACKET socket ontop of an IPv6-backed ipvlan device will
+hit WARN_ON_ONCE() in sk_mc_loop() through sch_direct_xmit() path.
+
+WARNING: CPU: 2 PID: 0 at net/core/sock.c:775 sk_mc_loop+0x2d/0x70
+Modules linked in: sch_netem ipvlan rfkill cirrus drm_shmem_helper sg drm_kms_helper
+CPU: 2 PID: 0 Comm: swapper/2 Kdump: loaded Not tainted 6.9.0+ #279
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
+RIP: 0010:sk_mc_loop+0x2d/0x70
+Code: fa 0f 1f 44 00 00 65 0f b7 15 f7 96 a3 4f 31 c0 66 85 d2 75 26 48 85 ff 74 1c
+RSP: 0018:ffffa9584015cd78 EFLAGS: 00010212
+RAX: 0000000000000011 RBX: ffff91e585793e00 RCX: 0000000002c6a001
+RDX: 0000000000000000 RSI: 0000000000000040 RDI: ffff91e589c0f000
+RBP: ffff91e5855bd100 R08: 0000000000000000 R09: 3d00545216f43d00
+R10: ffff91e584fdcc50 R11: 00000060dd8616f4 R12: ffff91e58132d000
+R13: ffff91e584fdcc68 R14: ffff91e5869ce800 R15: ffff91e589c0f000
+FS:  0000000000000000(0000) GS:ffff91e898100000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007f788f7c44c0 CR3: 0000000008e1a000 CR4: 00000000000006f0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+<IRQ>
+ ? __warn (kernel/panic.c:693)
+ ? sk_mc_loop (net/core/sock.c:760)
+ ? report_bug (lib/bug.c:201 lib/bug.c:219)
+ ? handle_bug (arch/x86/kernel/traps.c:239)
+ ? exc_invalid_op (arch/x86/kernel/traps.c:260 (discriminator 1))
+ ? asm_exc_invalid_op (./arch/x86/include/asm/idtentry.h:621)
+ ? sk_mc_loop (net/core/sock.c:760)
+ ip6_finish_output2 (net/ipv6/ip6_output.c:83 (discriminator 1))
+ ? nf_hook_slow (net/netfilter/core.c:626)
+ ip6_finish_output (net/ipv6/ip6_output.c:222)
+ ? __pfx_ip6_finish_output (net/ipv6/ip6_output.c:215)
+ ipvlan_xmit_mode_l3 (drivers/net/ipvlan/ipvlan_core.c:602) ipvlan
+ ipvlan_start_xmit (drivers/net/ipvlan/ipvlan_main.c:226) ipvlan
+ dev_hard_start_xmit (net/core/dev.c:3594)
+ sch_direct_xmit (net/sched/sch_generic.c:343)
+ __qdisc_run (net/sched/sch_generic.c:416)
+ net_tx_action (net/core/dev.c:5286)
+ handle_softirqs (kernel/softirq.c:555)
+ __irq_exit_rcu (kernel/softirq.c:589)
+ sysvec_apic_timer_interrupt (arch/x86/kernel/apic/apic.c:1043)
+
+The warning triggers as this:
+packet_sendmsg
+   packet_snd //skb->sk is packet sk
+      __dev_queue_xmit
+         __dev_xmit_skb //q->enqueue is not NULL
+             __qdisc_run
+               sch_direct_xmit
+                 dev_hard_start_xmit
+                   ipvlan_start_xmit
+                      ipvlan_xmit_mode_l3 //l3 mode
+                        ipvlan_process_outbound //vepa flag
+                          ipvlan_process_v6_outbound
+                            ip6_local_out
+                                __ip6_finish_output
+                                  ip6_finish_output2 //multicast packet
+                                    sk_mc_loop //sk->sk_family is AF_PACKET
+
+Call ip{6}_local_out() with NULL sk in ipvlan as other tunnels to fix this.
+
+Fixes: 2ad7bf363841 ("ipvlan: Initial check-in of the IPVLAN driver.")
+Suggested-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Yue Haibing <yuehaibing@huawei.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20240529095633.613103-1-yuehaibing@huawei.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ipvlan/ipvlan_core.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
+index 2d5b021b4ea60..fef4eff7753a7 100644
+--- a/drivers/net/ipvlan/ipvlan_core.c
++++ b/drivers/net/ipvlan/ipvlan_core.c
+@@ -439,7 +439,7 @@ static noinline_for_stack int ipvlan_process_v4_outbound(struct sk_buff *skb)
+       memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+-      err = ip_local_out(net, skb->sk, skb);
++      err = ip_local_out(net, NULL, skb);
+       if (unlikely(net_xmit_eval(err)))
+               DEV_STATS_INC(dev, tx_errors);
+       else
+@@ -494,7 +494,7 @@ static int ipvlan_process_v6_outbound(struct sk_buff *skb)
+       memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+-      err = ip6_local_out(dev_net(dev), skb->sk, skb);
++      err = ip6_local_out(dev_net(dev), NULL, skb);
+       if (unlikely(net_xmit_eval(err)))
+               DEV_STATS_INC(dev, tx_errors);
+       else
+-- 
+2.43.0
+
diff --git a/queue-6.9/kconfig-fix-comparison-to-constant-symbols-m-n.patch b/queue-6.9/kconfig-fix-comparison-to-constant-symbols-m-n.patch
new file mode 100644 (file)
index 0000000..57db783
--- /dev/null
@@ -0,0 +1,130 @@
+From abfec13214d324dadf62942be8062c101450681c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 19 May 2024 18:22:27 +0900
+Subject: kconfig: fix comparison to constant symbols, 'm', 'n'
+
+From: Masahiro Yamada <masahiroy@kernel.org>
+
+[ Upstream commit aabdc960a283ba78086b0bf66ee74326f49e218e ]
+
+Currently, comparisons to 'm' or 'n' result in incorrect output.
+
+[Test Code]
+
+    config MODULES
+            def_bool y
+            modules
+
+    config A
+            def_tristate m
+
+    config B
+            def_bool A > n
+
+CONFIG_B is unset, while CONFIG_B=y is expected.
+
+The reason for the issue is because Kconfig compares the tristate values
+as strings.
+
+Currently, the .type fields in the constant symbol definitions,
+symbol_{yes,mod,no} are unspecified, i.e., S_UNKNOWN.
+
+When expr_calc_value() evaluates 'A > n', it checks the types of 'A' and
+'n' to determine how to compare them.
+
+The left-hand side, 'A', is a tristate symbol with a value of 'm', which
+corresponds to a numeric value of 1. (Internally, 'y', 'm', and 'n' are
+represented as 2, 1, and 0, respectively.)
+
+The right-hand side, 'n', has an unknown type, so it is treated as the
+string "n" during the comparison.
+
+expr_calc_value() compares two values numerically only when both can
+have numeric values. Otherwise, they are compared as strings.
+
+    symbol    numeric value    ASCII code
+    -------------------------------------
+      y           2             0x79
+      m           1             0x6d
+      n           0             0x6e
+
+'m' is greater than 'n' if compared numerically (since 1 is greater
+than 0), but smaller than 'n' if compared as strings (since the ASCII
+code 0x6d is smaller than 0x6e).
+
+Specifying .type=S_TRISTATE for symbol_{yes,mod,no} fixes the above
+test code.
+
+Doing so, however, would cause a regression to the following test code.
+
+[Test Code 2]
+
+    config MODULES
+            def_bool n
+            modules
+
+    config A
+            def_tristate n
+
+    config B
+            def_bool A = m
+
+You would get CONFIG_B=y, while CONFIG_B should not be set.
+
+The reason is because sym_get_string_value() turns 'm' into 'n' when the
+module feature is disabled. Consequently, expr_calc_value() evaluates
+'A = n' instead of 'A = m'. This oddity has been hidden because the type
+of 'm' was previously S_UNKNOWN instead of S_TRISTATE.
+
+sym_get_string_value() should not tweak the string because the tristate
+value has already been correctly calculated. There is no reason to
+return the string "n" where its tristate value is mod.
+
+Fixes: 31847b67bec0 ("kconfig: allow use of relations other than (in)equality")
+Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ scripts/kconfig/symbol.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c
+index 81fe1884ef8ae..67a509778135b 100644
+--- a/scripts/kconfig/symbol.c
++++ b/scripts/kconfig/symbol.c
+@@ -14,6 +14,7 @@
+ struct symbol symbol_yes = {
+       .name = "y",
++      .type = S_TRISTATE,
+       .curr = { "y", yes },
+       .menus = LIST_HEAD_INIT(symbol_yes.menus),
+       .flags = SYMBOL_CONST|SYMBOL_VALID,
+@@ -21,6 +22,7 @@ struct symbol symbol_yes = {
+ struct symbol symbol_mod = {
+       .name = "m",
++      .type = S_TRISTATE,
+       .curr = { "m", mod },
+       .menus = LIST_HEAD_INIT(symbol_mod.menus),
+       .flags = SYMBOL_CONST|SYMBOL_VALID,
+@@ -28,6 +30,7 @@ struct symbol symbol_mod = {
+ struct symbol symbol_no = {
+       .name = "n",
++      .type = S_TRISTATE,
+       .curr = { "n", no },
+       .menus = LIST_HEAD_INIT(symbol_no.menus),
+       .flags = SYMBOL_CONST|SYMBOL_VALID,
+@@ -788,8 +791,7 @@ const char *sym_get_string_value(struct symbol *sym)
+               case no:
+                       return "n";
+               case mod:
+-                      sym_calc_value(modules_sym);
+-                      return (modules_sym->curr.tri == no) ? "n" : "m";
++                      return "m";
+               case yes:
+                       return "y";
+               }
+-- 
+2.43.0
+
diff --git a/queue-6.9/kheaders-use-command-v-to-test-for-existence-of-cpio.patch b/queue-6.9/kheaders-use-command-v-to-test-for-existence-of-cpio.patch
new file mode 100644 (file)
index 0000000..2c71eaa
--- /dev/null
@@ -0,0 +1,57 @@
+From c668f976b45f1326e3afea2262d68b50bd544adf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 May 2024 18:31:50 +0200
+Subject: kheaders: use `command -v` to test for existence of `cpio`
+
+From: Miguel Ojeda <ojeda@kernel.org>
+
+[ Upstream commit 6e58e0173507e506a5627741358bc770f220e356 ]
+
+Commit 13e1df09284d ("kheaders: explicitly validate existence of cpio
+command") added an explicit check for `cpio` using `type`.
+
+However, `type` in `dash` (which is used in some popular distributions
+and base images as the shell script runner) prints the missing message
+to standard output, and thus no error is printed:
+
+    $ bash -c 'type missing >/dev/null'
+    bash: line 1: type: missing: not found
+    $ dash -c 'type missing >/dev/null'
+    $
+
+For instance, this issue may be seen by loongarch builders, given its
+defconfig enables CONFIG_IKHEADERS since commit 9cc1df421f00 ("LoongArch:
+Update Loongson-3 default config file").
+
+Therefore, use `command -v` instead to have consistent behavior, and
+take the chance to provide a more explicit error.
+
+Fixes: 13e1df09284d ("kheaders: explicitly validate existence of cpio command")
+Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
+Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/gen_kheaders.sh | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh
+index 6d443ea22bb73..4ba5fd3d73ae2 100755
+--- a/kernel/gen_kheaders.sh
++++ b/kernel/gen_kheaders.sh
+@@ -14,7 +14,12 @@ include/
+ arch/$SRCARCH/include/
+ "
+-type cpio > /dev/null
++if ! command -v cpio >/dev/null; then
++      echo >&2 "***"
++      echo >&2 "*** 'cpio' could not be found."
++      echo >&2 "***"
++      exit 1
++fi
+ # Support incremental builds by skipping archive generation
+ # if timestamps of files being archived are not changed.
+-- 
+2.43.0
+
diff --git a/queue-6.9/net-dsa-microchip-fix-rgmii-error-in-ksz-dsa-driver.patch b/queue-6.9/net-dsa-microchip-fix-rgmii-error-in-ksz-dsa-driver.patch
new file mode 100644 (file)
index 0000000..af5062e
--- /dev/null
@@ -0,0 +1,39 @@
+From 18388cccf79b9124d9f65d118d6a8f62a5310ccf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 May 2024 14:34:26 -0700
+Subject: net: dsa: microchip: fix RGMII error in KSZ DSA driver
+
+From: Tristram Ha <tristram.ha@microchip.com>
+
+[ Upstream commit 278d65ccdadb5f0fa0ceaf7b9cc97b305cd72822 ]
+
+The driver should return RMII interface when XMII is running in RMII mode.
+
+Fixes: 0ab7f6bf1675 ("net: dsa: microchip: ksz9477: use common xmii function")
+Signed-off-by: Tristram Ha <tristram.ha@microchip.com>
+Acked-by: Arun Ramadoss <arun.ramadoss@microchip.com>
+Acked-by: Jerry Ray <jerry.ray@microchip.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Link: https://lore.kernel.org/r/1716932066-3342-1-git-send-email-Tristram.Ha@microchip.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/microchip/ksz_common.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
+index 2b510f150dd88..2a5861a88d0e6 100644
+--- a/drivers/net/dsa/microchip/ksz_common.c
++++ b/drivers/net/dsa/microchip/ksz_common.c
+@@ -3050,7 +3050,7 @@ phy_interface_t ksz_get_xmii(struct ksz_device *dev, int port, bool gbit)
+               else
+                       interface = PHY_INTERFACE_MODE_MII;
+       } else if (val == bitval[P_RMII_SEL]) {
+-              interface = PHY_INTERFACE_MODE_RGMII;
++              interface = PHY_INTERFACE_MODE_RMII;
+       } else {
+               interface = PHY_INTERFACE_MODE_RGMII;
+               if (data8 & P_RGMII_ID_EG_ENABLE)
+-- 
+2.43.0
+
diff --git a/queue-6.9/net-ena-fix-redundant-device-numa-node-override.patch b/queue-6.9/net-ena-fix-redundant-device-numa-node-override.patch
new file mode 100644 (file)
index 0000000..941c338
--- /dev/null
@@ -0,0 +1,85 @@
+From 71425eacdfe1086fec65640c93cfeeba4e1b3df5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 May 2024 20:09:12 +0300
+Subject: net: ena: Fix redundant device NUMA node override
+
+From: Shay Agroskin <shayagr@amazon.com>
+
+[ Upstream commit 2dc8b1e7177d4f49f492ce648440caf2de0c3616 ]
+
+The driver overrides the NUMA node id of the device regardless of
+whether it knows its correct value (often setting it to -1 even though
+the node id is advertised in 'struct device'). This can lead to
+suboptimal configurations.
+
+This patch fixes this behavior and makes the shared memory allocation
+functions use the NUMA node id advertised by the underlying device.
+
+Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)")
+Signed-off-by: Shay Agroskin <shayagr@amazon.com>
+Link: https://lore.kernel.org/r/20240528170912.1204417-1-shayagr@amazon.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/amazon/ena/ena_com.c | 11 -----------
+ 1 file changed, 11 deletions(-)
+
+diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
+index 2d8a66ea82fab..713a595370bff 100644
+--- a/drivers/net/ethernet/amazon/ena/ena_com.c
++++ b/drivers/net/ethernet/amazon/ena/ena_com.c
+@@ -312,7 +312,6 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
+                             struct ena_com_io_sq *io_sq)
+ {
+       size_t size;
+-      int dev_node = 0;
+       memset(&io_sq->desc_addr, 0x0, sizeof(io_sq->desc_addr));
+@@ -325,12 +324,9 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
+       size = io_sq->desc_entry_size * io_sq->q_depth;
+       if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) {
+-              dev_node = dev_to_node(ena_dev->dmadev);
+-              set_dev_node(ena_dev->dmadev, ctx->numa_node);
+               io_sq->desc_addr.virt_addr =
+                       dma_alloc_coherent(ena_dev->dmadev, size, &io_sq->desc_addr.phys_addr,
+                                          GFP_KERNEL);
+-              set_dev_node(ena_dev->dmadev, dev_node);
+               if (!io_sq->desc_addr.virt_addr) {
+                       io_sq->desc_addr.virt_addr =
+                               dma_alloc_coherent(ena_dev->dmadev, size,
+@@ -354,10 +350,7 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
+               size = (size_t)io_sq->bounce_buf_ctrl.buffer_size *
+                       io_sq->bounce_buf_ctrl.buffers_num;
+-              dev_node = dev_to_node(ena_dev->dmadev);
+-              set_dev_node(ena_dev->dmadev, ctx->numa_node);
+               io_sq->bounce_buf_ctrl.base_buffer = devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL);
+-              set_dev_node(ena_dev->dmadev, dev_node);
+               if (!io_sq->bounce_buf_ctrl.base_buffer)
+                       io_sq->bounce_buf_ctrl.base_buffer =
+                               devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL);
+@@ -397,7 +390,6 @@ static int ena_com_init_io_cq(struct ena_com_dev *ena_dev,
+                             struct ena_com_io_cq *io_cq)
+ {
+       size_t size;
+-      int prev_node = 0;
+       memset(&io_cq->cdesc_addr, 0x0, sizeof(io_cq->cdesc_addr));
+@@ -409,11 +401,8 @@ static int ena_com_init_io_cq(struct ena_com_dev *ena_dev,
+       size = io_cq->cdesc_entry_size_in_bytes * io_cq->q_depth;
+-      prev_node = dev_to_node(ena_dev->dmadev);
+-      set_dev_node(ena_dev->dmadev, ctx->numa_node);
+       io_cq->cdesc_addr.virt_addr =
+               dma_alloc_coherent(ena_dev->dmadev, size, &io_cq->cdesc_addr.phys_addr, GFP_KERNEL);
+-      set_dev_node(ena_dev->dmadev, prev_node);
+       if (!io_cq->cdesc_addr.virt_addr) {
+               io_cq->cdesc_addr.virt_addr =
+                       dma_alloc_coherent(ena_dev->dmadev, size, &io_cq->cdesc_addr.phys_addr,
+-- 
+2.43.0
+
diff --git a/queue-6.9/net-fec-add-fec_enet_deinit.patch b/queue-6.9/net-fec-add-fec_enet_deinit.patch
new file mode 100644 (file)
index 0000000..c23df7f
--- /dev/null
@@ -0,0 +1,63 @@
+From 285631c7f6079d5f3bad85274910b71c9c5c8ad5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 24 May 2024 13:05:28 +0800
+Subject: net:fec: Add fec_enet_deinit()
+
+From: Xiaolei Wang <xiaolei.wang@windriver.com>
+
+[ Upstream commit bf0497f53c8535f99b72041529d3f7708a6e2c0d ]
+
+When fec_probe() fails or fec_drv_remove() needs to release the
+fec queue and remove a NAPI context, therefore add a function
+corresponding to fec_enet_init() and call fec_enet_deinit() which
+does the opposite to release memory and remove a NAPI context.
+
+Fixes: 59d0f7465644 ("net: fec: init multi queue date structure")
+Signed-off-by: Xiaolei Wang <xiaolei.wang@windriver.com>
+Reviewed-by: Wei Fang <wei.fang@nxp.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Link: https://lore.kernel.org/r/20240524050528.4115581-1-xiaolei.wang@windriver.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/freescale/fec_main.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
+index a72d8a2eb0b31..881ece735dcf1 100644
+--- a/drivers/net/ethernet/freescale/fec_main.c
++++ b/drivers/net/ethernet/freescale/fec_main.c
+@@ -4130,6 +4130,14 @@ static int fec_enet_init(struct net_device *ndev)
+       return ret;
+ }
++static void fec_enet_deinit(struct net_device *ndev)
++{
++      struct fec_enet_private *fep = netdev_priv(ndev);
++
++      netif_napi_del(&fep->napi);
++      fec_enet_free_queue(ndev);
++}
++
+ #ifdef CONFIG_OF
+ static int fec_reset_phy(struct platform_device *pdev)
+ {
+@@ -4524,6 +4532,7 @@ fec_probe(struct platform_device *pdev)
+       fec_enet_mii_remove(fep);
+ failed_mii_init:
+ failed_irq:
++      fec_enet_deinit(ndev);
+ failed_init:
+       fec_ptp_stop(pdev);
+ failed_reset:
+@@ -4587,6 +4596,7 @@ fec_drv_remove(struct platform_device *pdev)
+       pm_runtime_put_noidle(&pdev->dev);
+       pm_runtime_disable(&pdev->dev);
++      fec_enet_deinit(ndev);
+       free_netdev(ndev);
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.9/net-fix-__dst_negative_advice-race.patch b/queue-6.9/net-fix-__dst_negative_advice-race.patch
new file mode 100644 (file)
index 0000000..7141903
--- /dev/null
@@ -0,0 +1,206 @@
+From bced247749735087e6c7b06a0675d0fa62eb2674 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 May 2024 11:43:53 +0000
+Subject: net: fix __dst_negative_advice() race
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 92f1655aa2b2294d0b49925f3b875a634bd3b59e ]
+
+__dst_negative_advice() does not enforce proper RCU rules when
+sk->dst_cache must be cleared, leading to possible UAF.
+
+RCU rules are that we must first clear sk->sk_dst_cache,
+then call dst_release(old_dst).
+
+Note that sk_dst_reset(sk) is implementing this protocol correctly,
+while __dst_negative_advice() uses the wrong order.
+
+Given that ip6_negative_advice() has special logic
+against RTF_CACHE, this means each of the three ->negative_advice()
+existing methods must perform the sk_dst_reset() themselves.
+
+Note the check against NULL dst is centralized in
+__dst_negative_advice(), there is no need to duplicate
+it in various callbacks.
+
+Many thanks to Clement Lecigne for tracking this issue.
+
+This old bug became visible after the blamed commit, using UDP sockets.
+
+Fixes: a87cb3e48ee8 ("net: Facility to report route quality of connected sockets")
+Reported-by: Clement Lecigne <clecigne@google.com>
+Diagnosed-by: Clement Lecigne <clecigne@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Tom Herbert <tom@herbertland.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://lore.kernel.org/r/20240528114353.1794151-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/dst_ops.h  |  2 +-
+ include/net/sock.h     | 13 +++----------
+ net/ipv4/route.c       | 22 ++++++++--------------
+ net/ipv6/route.c       | 29 +++++++++++++++--------------
+ net/xfrm/xfrm_policy.c | 11 +++--------
+ 5 files changed, 30 insertions(+), 47 deletions(-)
+
+diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h
+index 6d1c8541183db..3a9001a042a5c 100644
+--- a/include/net/dst_ops.h
++++ b/include/net/dst_ops.h
+@@ -24,7 +24,7 @@ struct dst_ops {
+       void                    (*destroy)(struct dst_entry *);
+       void                    (*ifdown)(struct dst_entry *,
+                                         struct net_device *dev);
+-      struct dst_entry *      (*negative_advice)(struct dst_entry *);
++      void                    (*negative_advice)(struct sock *sk, struct dst_entry *);
+       void                    (*link_failure)(struct sk_buff *);
+       void                    (*update_pmtu)(struct dst_entry *dst, struct sock *sk,
+                                              struct sk_buff *skb, u32 mtu,
+diff --git a/include/net/sock.h b/include/net/sock.h
+index b4b553df7870c..944f71a8ab223 100644
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -2134,17 +2134,10 @@ sk_dst_get(const struct sock *sk)
+ static inline void __dst_negative_advice(struct sock *sk)
+ {
+-      struct dst_entry *ndst, *dst = __sk_dst_get(sk);
++      struct dst_entry *dst = __sk_dst_get(sk);
+-      if (dst && dst->ops->negative_advice) {
+-              ndst = dst->ops->negative_advice(dst);
+-
+-              if (ndst != dst) {
+-                      rcu_assign_pointer(sk->sk_dst_cache, ndst);
+-                      sk_tx_queue_clear(sk);
+-                      WRITE_ONCE(sk->sk_dst_pending_confirm, 0);
+-              }
+-      }
++      if (dst && dst->ops->negative_advice)
++              dst->ops->negative_advice(sk, dst);
+ }
+ static inline void dst_negative_advice(struct sock *sk)
+diff --git a/net/ipv4/route.c b/net/ipv4/route.c
+index 12738051ebea7..3fcf084fbda5d 100644
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -132,7 +132,8 @@ struct dst_entry   *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
+ static unsigned int    ipv4_default_advmss(const struct dst_entry *dst);
+ INDIRECT_CALLABLE_SCOPE
+ unsigned int          ipv4_mtu(const struct dst_entry *dst);
+-static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
++static void           ipv4_negative_advice(struct sock *sk,
++                                           struct dst_entry *dst);
+ static void            ipv4_link_failure(struct sk_buff *skb);
+ static void            ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+                                          struct sk_buff *skb, u32 mtu,
+@@ -837,22 +838,15 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf
+       __ip_do_redirect(rt, skb, &fl4, true);
+ }
+-static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
++static void ipv4_negative_advice(struct sock *sk,
++                               struct dst_entry *dst)
+ {
+       struct rtable *rt = dst_rtable(dst);
+-      struct dst_entry *ret = dst;
+-      if (rt) {
+-              if (dst->obsolete > 0) {
+-                      ip_rt_put(rt);
+-                      ret = NULL;
+-              } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
+-                         rt->dst.expires) {
+-                      ip_rt_put(rt);
+-                      ret = NULL;
+-              }
+-      }
+-      return ret;
++      if ((dst->obsolete > 0) ||
++          (rt->rt_flags & RTCF_REDIRECTED) ||
++          rt->dst.expires)
++              sk_dst_reset(sk);
+ }
+ /*
+diff --git a/net/ipv6/route.c b/net/ipv6/route.c
+index 3e0b2cb20fd20..8d5257c3f0842 100644
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -87,7 +87,8 @@ struct dst_entry     *ip6_dst_check(struct dst_entry *dst, u32 cookie);
+ static unsigned int    ip6_default_advmss(const struct dst_entry *dst);
+ INDIRECT_CALLABLE_SCOPE
+ unsigned int          ip6_mtu(const struct dst_entry *dst);
+-static struct dst_entry *ip6_negative_advice(struct dst_entry *);
++static void           ip6_negative_advice(struct sock *sk,
++                                          struct dst_entry *dst);
+ static void           ip6_dst_destroy(struct dst_entry *);
+ static void           ip6_dst_ifdown(struct dst_entry *,
+                                      struct net_device *dev);
+@@ -2770,24 +2771,24 @@ INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst,
+ }
+ EXPORT_INDIRECT_CALLABLE(ip6_dst_check);
+-static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
++static void ip6_negative_advice(struct sock *sk,
++                              struct dst_entry *dst)
+ {
+       struct rt6_info *rt = dst_rt6_info(dst);
+-      if (rt) {
+-              if (rt->rt6i_flags & RTF_CACHE) {
+-                      rcu_read_lock();
+-                      if (rt6_check_expired(rt)) {
+-                              rt6_remove_exception_rt(rt);
+-                              dst = NULL;
+-                      }
+-                      rcu_read_unlock();
+-              } else {
+-                      dst_release(dst);
+-                      dst = NULL;
++      if (rt->rt6i_flags & RTF_CACHE) {
++              rcu_read_lock();
++              if (rt6_check_expired(rt)) {
++                      /* counteract the dst_release() in sk_dst_reset() */
++                      dst_hold(dst);
++                      sk_dst_reset(sk);
++
++                      rt6_remove_exception_rt(rt);
+               }
++              rcu_read_unlock();
++              return;
+       }
+-      return dst;
++      sk_dst_reset(sk);
+ }
+ static void ip6_link_failure(struct sk_buff *skb)
+diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
+index 1702eea537e7e..d154597728d51 100644
+--- a/net/xfrm/xfrm_policy.c
++++ b/net/xfrm/xfrm_policy.c
+@@ -3904,15 +3904,10 @@ static void xfrm_link_failure(struct sk_buff *skb)
+       /* Impossible. Such dst must be popped before reaches point of failure. */
+ }
+-static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
++static void xfrm_negative_advice(struct sock *sk, struct dst_entry *dst)
+ {
+-      if (dst) {
+-              if (dst->obsolete) {
+-                      dst_release(dst);
+-                      dst = NULL;
+-              }
+-      }
+-      return dst;
++      if (dst->obsolete)
++              sk_dst_reset(sk);
+ }
+ static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr)
+-- 
+2.43.0
+
diff --git a/queue-6.9/net-micrel-fix-lan8841_config_intr-after-getting-out.patch b/queue-6.9/net-micrel-fix-lan8841_config_intr-after-getting-out.patch
new file mode 100644 (file)
index 0000000..4c23e4d
--- /dev/null
@@ -0,0 +1,62 @@
+From f6e6aaa617c2f8a51f5ade9b8d29381851563fc9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 24 May 2024 10:53:50 +0200
+Subject: net: micrel: Fix lan8841_config_intr after getting out of sleep mode
+
+From: Horatiu Vultur <horatiu.vultur@microchip.com>
+
+[ Upstream commit 4fb679040d9f758eeb3b4d01bbde6405bf20e64e ]
+
+When the interrupt is enabled, the function lan8841_config_intr tries to
+clear any pending interrupts by reading the interrupt status, then
+checks the return value for errors and then continue to enable the
+interrupt. It has been seen that once the system gets out of sleep mode,
+the interrupt status has the value 0x400 meaning that the PHY detected
+that the link was in low power. That is correct value but the problem is
+that the check is wrong.  We try to check for errors but we return an
+error also in this case which is not an error. Therefore fix this by
+returning only when there is an error.
+
+Fixes: a8f1a19d27ef ("net: micrel: Add support for lan8841 PHY")
+Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
+Reviewed-by: Suman Ghosh <sumang@marvell.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+Link: https://lore.kernel.org/r/20240524085350.359812-1-horatiu.vultur@microchip.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/micrel.c | 10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
+index 18dee364e2b31..13370439a7cae 100644
+--- a/drivers/net/phy/micrel.c
++++ b/drivers/net/phy/micrel.c
+@@ -3516,7 +3516,7 @@ static int lan8841_config_intr(struct phy_device *phydev)
+       if (phydev->interrupts == PHY_INTERRUPT_ENABLED) {
+               err = phy_read(phydev, LAN8814_INTS);
+-              if (err)
++              if (err < 0)
+                       return err;
+               /* Enable / disable interrupts. It is OK to enable PTP interrupt
+@@ -3532,6 +3532,14 @@ static int lan8841_config_intr(struct phy_device *phydev)
+                       return err;
+               err = phy_read(phydev, LAN8814_INTS);
++              if (err < 0)
++                      return err;
++
++              /* Getting a positive value doesn't mean that is an error, it
++               * just indicates what was the status. Therefore make sure to
++               * clear the value and say that there is no error.
++               */
++              err = 0;
+       }
+       return err;
+-- 
+2.43.0
+
diff --git a/queue-6.9/net-mlx5-do-not-query-mpir-on-embedded-cpu-function.patch b/queue-6.9/net-mlx5-do-not-query-mpir-on-embedded-cpu-function.patch
new file mode 100644 (file)
index 0000000..2736993
--- /dev/null
@@ -0,0 +1,59 @@
+From 015f3fc129ab95839e23c4d52884e97ae3d0ef9d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 22 May 2024 22:26:53 +0300
+Subject: net/mlx5: Do not query MPIR on embedded CPU function
+
+From: Tariq Toukan <tariqt@nvidia.com>
+
+[ Upstream commit fca3b4791850b7e2181f0b3195b66d53df83151b ]
+
+A proper query to MPIR needs to set the correct value in the depth field.
+On embedded CPU this value is not necessarily zero. As there is no real
+use case for multi-PF netdev on the embedded CPU of the smart NIC, block
+this option.
+
+This fixes the following failure:
+ACCESS_REG(0x805) op_mod(0x1) failed, status bad system state(0x4), syndrome (0x685f19), err(-5)
+
+Fixes: 678eb448055a ("net/mlx5: SD, Implement basic query and instantiation")
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
+index dd5d186dc6148..f6deb5a3f8202 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
+@@ -100,10 +100,6 @@ static bool ft_create_alias_supported(struct mlx5_core_dev *dev)
+ static bool mlx5_sd_is_supported(struct mlx5_core_dev *dev, u8 host_buses)
+ {
+-      /* Feature is currently implemented for PFs only */
+-      if (!mlx5_core_is_pf(dev))
+-              return false;
+-
+       /* Honor the SW implementation limit */
+       if (host_buses > MLX5_SD_MAX_GROUP_SZ)
+               return false;
+@@ -162,6 +158,14 @@ static int sd_init(struct mlx5_core_dev *dev)
+       bool sdm;
+       int err;
++      /* Feature is currently implemented for PFs only */
++      if (!mlx5_core_is_pf(dev))
++              return 0;
++
++      /* Block on embedded CPU PFs */
++      if (mlx5_core_is_ecpf(dev))
++              return 0;
++
+       if (!MLX5_CAP_MCAM_REG(dev, mpir))
+               return 0;
+-- 
+2.43.0
+
diff --git a/queue-6.9/net-mlx5-fix-mtmp-register-capability-offset-in-mcam.patch b/queue-6.9/net-mlx5-fix-mtmp-register-capability-offset-in-mcam.patch
new file mode 100644 (file)
index 0000000..1880525
--- /dev/null
@@ -0,0 +1,42 @@
+From 99cf8e448dc95df57524e8d8e1e44186130764ca Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 22 May 2024 22:26:54 +0300
+Subject: net/mlx5: Fix MTMP register capability offset in MCAM register
+
+From: Gal Pressman <gal@nvidia.com>
+
+[ Upstream commit 1b9f86c6d53245dab087f1b2c05727b5982142ff ]
+
+The MTMP register (0x900a) capability offset is off-by-one, move it to
+the right place.
+
+Fixes: 1f507e80c700 ("net/mlx5: Expose NIC temperature via hardware monitoring kernel API")
+Signed-off-by: Gal Pressman <gal@nvidia.com>
+Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/mlx5/mlx5_ifc.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
+index c940b329a475f..5e5a9c6774bde 100644
+--- a/include/linux/mlx5/mlx5_ifc.h
++++ b/include/linux/mlx5/mlx5_ifc.h
+@@ -10267,9 +10267,9 @@ struct mlx5_ifc_mcam_access_reg_bits {
+       u8         mfrl[0x1];
+       u8         regs_39_to_32[0x8];
+-      u8         regs_31_to_10[0x16];
++      u8         regs_31_to_11[0x15];
+       u8         mtmp[0x1];
+-      u8         regs_8_to_0[0x9];
++      u8         regs_9_to_0[0xa];
+ };
+ struct mlx5_ifc_mcam_access_reg_bits1 {
+-- 
+2.43.0
+
diff --git a/queue-6.9/net-mlx5-lag-do-bond-only-if-slaves-agree-on-roce-st.patch b/queue-6.9/net-mlx5-lag-do-bond-only-if-slaves-agree-on-roce-st.patch
new file mode 100644 (file)
index 0000000..458b320
--- /dev/null
@@ -0,0 +1,74 @@
+From c57eb2105b9a35a29b298734092b115978ca736b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 22 May 2024 22:26:52 +0300
+Subject: net/mlx5: Lag, do bond only if slaves agree on roce state
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Maher Sanalla <msanalla@nvidia.com>
+
+[ Upstream commit 51ef9305b8f40946d65c40368ffb4c14636d369a ]
+
+Currently, the driver does not enforce that lag bond slaves must have
+matching roce capabilities. Yet, in mlx5_do_bond(), the driver attempts
+to enable roce on all vports of the bond slaves, causing the following
+syndrome when one slave has no roce fw support:
+
+mlx5_cmd_out_err:809:(pid 25427): MODIFY_NIC_VPORT_CONTEXT(0×755) op_mod(0×0)
+failed, status bad parameter(0×3), syndrome (0xc1f678), err(-22)
+
+Thus, create HW lag only if bond's slaves agree on roce state,
+either all slaves have roce support resulting in a roce lag bond,
+or none do, resulting in a raw eth bond.
+
+Fixes: 7907f23adc18 ("net/mlx5: Implement RoCE LAG feature")
+Signed-off-by: Maher Sanalla <msanalla@nvidia.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+index 37598d116f3b8..58a452d20daf7 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+@@ -720,6 +720,7 @@ bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
+       struct mlx5_core_dev *dev;
+       u8 mode;
+ #endif
++      bool roce_support;
+       int i;
+       for (i = 0; i < ldev->ports; i++)
+@@ -746,6 +747,11 @@ bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
+               if (mlx5_sriov_is_enabled(ldev->pf[i].dev))
+                       return false;
+ #endif
++      roce_support = mlx5_get_roce_state(ldev->pf[MLX5_LAG_P1].dev);
++      for (i = 1; i < ldev->ports; i++)
++              if (mlx5_get_roce_state(ldev->pf[i].dev) != roce_support)
++                      return false;
++
+       return true;
+ }
+@@ -913,8 +919,10 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
+               } else if (roce_lag) {
+                       dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
+                       mlx5_rescan_drivers_locked(dev0);
+-                      for (i = 1; i < ldev->ports; i++)
+-                              mlx5_nic_vport_enable_roce(ldev->pf[i].dev);
++                      for (i = 1; i < ldev->ports; i++) {
++                              if (mlx5_get_roce_state(ldev->pf[i].dev))
++                                      mlx5_nic_vport_enable_roce(ldev->pf[i].dev);
++                      }
+               } else if (shared_fdb) {
+                       int i;
+-- 
+2.43.0
+
diff --git a/queue-6.9/net-mlx5-use-mlx5_ipsec_rx_status_destroy-to-correct.patch b/queue-6.9/net-mlx5-use-mlx5_ipsec_rx_status_destroy-to-correct.patch
new file mode 100644 (file)
index 0000000..7413a52
--- /dev/null
@@ -0,0 +1,70 @@
+From c153349c1c70bd5e395db68cee5221aaf1d8fd6e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 22 May 2024 22:26:55 +0300
+Subject: net/mlx5: Use mlx5_ipsec_rx_status_destroy to correctly delete status
+ rules
+
+From: Rahul Rameshbabu <rrameshbabu@nvidia.com>
+
+[ Upstream commit 16d66a4fa81da07bc4ed19f4e53b87263c2f8d38 ]
+
+rx_create no longer allocates a modify_hdr instance that needs to be
+cleaned up. The mlx5_modify_header_dealloc call will lead to a NULL pointer
+dereference. A leak in the rules also previously occurred since there are
+now two rules populated related to status.
+
+  BUG: kernel NULL pointer dereference, address: 0000000000000000
+  #PF: supervisor read access in kernel mode
+  #PF: error_code(0x0000) - not-present page
+  PGD 109907067 P4D 109907067 PUD 116890067 PMD 0
+  Oops: 0000 [#1] SMP
+  CPU: 1 PID: 484 Comm: ip Not tainted 6.9.0-rc2-rrameshbabu+ #254
+  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS Arch Linux 1.16.3-1-1 04/01/2014
+  RIP: 0010:mlx5_modify_header_dealloc+0xd/0x70
+  <snip>
+  Call Trace:
+   <TASK>
+   ? show_regs+0x60/0x70
+   ? __die+0x24/0x70
+   ? page_fault_oops+0x15f/0x430
+   ? free_to_partial_list.constprop.0+0x79/0x150
+   ? do_user_addr_fault+0x2c9/0x5c0
+   ? exc_page_fault+0x63/0x110
+   ? asm_exc_page_fault+0x27/0x30
+   ? mlx5_modify_header_dealloc+0xd/0x70
+   rx_create+0x374/0x590
+   rx_add_rule+0x3ad/0x500
+   ? rx_add_rule+0x3ad/0x500
+   ? mlx5_cmd_exec+0x2c/0x40
+   ? mlx5_create_ipsec_obj+0xd6/0x200
+   mlx5e_accel_ipsec_fs_add_rule+0x31/0xf0
+   mlx5e_xfrm_add_state+0x426/0xc00
+  <snip>
+
+Fixes: 94af50c0a9bb ("net/mlx5e: Unify esw and normal IPsec status table creation/destruction")
+Signed-off-by: Rahul Rameshbabu <rrameshbabu@nvidia.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+index 41a2543a52cda..e51b03d4c717f 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+@@ -750,8 +750,7 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
+ err_fs_ft:
+       if (rx->allow_tunnel_mode)
+               mlx5_eswitch_unblock_encap(mdev);
+-      mlx5_del_flow_rules(rx->status.rule);
+-      mlx5_modify_header_dealloc(mdev, rx->status.modify_hdr);
++      mlx5_ipsec_rx_status_destroy(ipsec, rx);
+ err_add:
+       mlx5_destroy_flow_table(rx->ft.status);
+ err_fs_ft_status:
+-- 
+2.43.0
+
diff --git a/queue-6.9/net-mlx5e-fix-ipsec-tunnel-mode-offload-feature-chec.patch b/queue-6.9/net-mlx5e-fix-ipsec-tunnel-mode-offload-feature-chec.patch
new file mode 100644 (file)
index 0000000..6d8babf
--- /dev/null
@@ -0,0 +1,55 @@
+From 258649b957f537c2f8c16bfe0764cc591bf6b481 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 22 May 2024 22:26:56 +0300
+Subject: net/mlx5e: Fix IPsec tunnel mode offload feature check
+
+From: Rahul Rameshbabu <rrameshbabu@nvidia.com>
+
+[ Upstream commit 9a52f6d44f4521773b4699b4ed34b8e21d5a175c ]
+
+Remove faulty check disabling checksum offload and GSO for offload of
+simple IPsec tunnel L4 traffic. Comment previously describing the deleted
+code incorrectly claimed the check prevented double tunnel (or three layers
+of ip headers).
+
+Fixes: f1267798c980 ("net/mlx5: Fix checksum issue of VXLAN and IPsec crypto offload")
+Signed-off-by: Rahul Rameshbabu <rrameshbabu@nvidia.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../mellanox/mlx5/core/en_accel/ipsec_rxtx.h    | 17 +++++------------
+ 1 file changed, 5 insertions(+), 12 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
+index 82064614846f5..359050f0b54dd 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
+@@ -97,18 +97,11 @@ mlx5e_ipsec_feature_check(struct sk_buff *skb, netdev_features_t features)
+               if (!x || !x->xso.offload_handle)
+                       goto out_disable;
+-              if (xo->inner_ipproto) {
+-                      /* Cannot support tunnel packet over IPsec tunnel mode
+-                       * because we cannot offload three IP header csum
+-                       */
+-                      if (x->props.mode == XFRM_MODE_TUNNEL)
+-                              goto out_disable;
+-
+-                      /* Only support UDP or TCP L4 checksum */
+-                      if (xo->inner_ipproto != IPPROTO_UDP &&
+-                          xo->inner_ipproto != IPPROTO_TCP)
+-                              goto out_disable;
+-              }
++              /* Only support UDP or TCP L4 checksum */
++              if (xo->inner_ipproto &&
++                  xo->inner_ipproto != IPPROTO_UDP &&
++                  xo->inner_ipproto != IPPROTO_TCP)
++                      goto out_disable;
+               return features;
+-- 
+2.43.0
+
diff --git a/queue-6.9/net-mlx5e-fix-udp-gso-for-encapsulated-packets.patch b/queue-6.9/net-mlx5e-fix-udp-gso-for-encapsulated-packets.patch
new file mode 100644 (file)
index 0000000..0eeaf80
--- /dev/null
@@ -0,0 +1,68 @@
+From 01246565db41a335b8c33cf80180e40f8abae236 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 22 May 2024 22:26:59 +0300
+Subject: net/mlx5e: Fix UDP GSO for encapsulated packets
+
+From: Gal Pressman <gal@nvidia.com>
+
+[ Upstream commit 83fea49f2711fc90c0d115b0ed04046b45155b65 ]
+
+When the skb is encapsulated, adjust the inner UDP header instead of the
+outer one, and account for UDP header (instead of TCP) in the inline
+header size calculation.
+
+Fixes: 689adf0d4892 ("net/mlx5e: Add UDP GSO support")
+Reported-by: Jason Baron <jbaron@akamai.com>
+Closes: https://lore.kernel.org/netdev/c42961cb-50b9-4a9a-bd43-87fe48d88d29@akamai.com/
+Signed-off-by: Gal Pressman <gal@nvidia.com>
+Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
+Reviewed-by: Boris Pismenny <borisp@nvidia.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h   | 8 +++++++-
+ drivers/net/ethernet/mellanox/mlx5/core/en_tx.c           | 6 +++++-
+ 2 files changed, 12 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
+index caa34b9c161e5..33e32584b07f5 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
+@@ -102,8 +102,14 @@ static inline void
+ mlx5e_udp_gso_handle_tx_skb(struct sk_buff *skb)
+ {
+       int payload_len = skb_shinfo(skb)->gso_size + sizeof(struct udphdr);
++      struct udphdr *udphdr;
+-      udp_hdr(skb)->len = htons(payload_len);
++      if (skb->encapsulation)
++              udphdr = (struct udphdr *)skb_inner_transport_header(skb);
++      else
++              udphdr = udp_hdr(skb);
++
++      udphdr->len = htons(payload_len);
+ }
+ struct mlx5e_accel_tx_state {
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+index e21a3b4128ce8..0964b16ca5619 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+@@ -153,7 +153,11 @@ mlx5e_tx_get_gso_ihs(struct mlx5e_txqsq *sq, struct sk_buff *skb, int *hopbyhop)
+       *hopbyhop = 0;
+       if (skb->encapsulation) {
+-              ihs = skb_inner_tcp_all_headers(skb);
++              if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
++                      ihs = skb_inner_transport_offset(skb) +
++                            sizeof(struct udphdr);
++              else
++                      ihs = skb_inner_tcp_all_headers(skb);
+               stats->tso_inner_packets++;
+               stats->tso_inner_bytes += skb->len - ihs;
+       } else {
+-- 
+2.43.0
+
diff --git a/queue-6.9/net-mlx5e-use-rx_missed_errors-instead-of-rx_dropped.patch b/queue-6.9/net-mlx5e-use-rx_missed_errors-instead-of-rx_dropped.patch
new file mode 100644 (file)
index 0000000..857ba92
--- /dev/null
@@ -0,0 +1,46 @@
+From b608e3b1de7bd76ed4ee96f8943aa22405889631 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 22 May 2024 22:26:58 +0300
+Subject: net/mlx5e: Use rx_missed_errors instead of rx_dropped for reporting
+ buffer exhaustion
+
+From: Carolina Jubran <cjubran@nvidia.com>
+
+[ Upstream commit 5c74195d5dd977e97556e6fa76909b831c241230 ]
+
+Previously, the driver incorrectly used rx_dropped to report device
+buffer exhaustion.
+
+According to the documentation, rx_dropped should not be used to count
+packets dropped due to buffer exhaustion, which is the purpose of
+rx_missed_errors.
+
+Use rx_missed_errors as intended for counting packets dropped due to
+buffer exhaustion.
+
+Fixes: 269e6b3af3bf ("net/mlx5e: Report additional error statistics in get stats ndo")
+Signed-off-by: Carolina Jubran <cjubran@nvidia.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+index 64497b6eebd36..47be07af214ff 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+@@ -3790,7 +3790,7 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
+               mlx5e_fold_sw_stats64(priv, stats);
+       }
+-      stats->rx_dropped = priv->stats.qcnt.rx_out_of_buffer;
++      stats->rx_missed_errors = priv->stats.qcnt.rx_out_of_buffer;
+       stats->rx_length_errors =
+               PPORT_802_3_GET(pstats, a_in_range_length_errors) +
+-- 
+2.43.0
+
diff --git a/queue-6.9/net-phy-micrel-set-soft_reset-callback-to-genphy_sof.patch b/queue-6.9/net-phy-micrel-set-soft_reset-callback-to-genphy_sof.patch
new file mode 100644 (file)
index 0000000..79155d9
--- /dev/null
@@ -0,0 +1,46 @@
+From 35f9074a06989a96258181a7f0329d7ad05f8382 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 21 May 2024 08:54:06 +0200
+Subject: net: phy: micrel: set soft_reset callback to genphy_soft_reset for
+ KSZ8061
+
+From: Mathieu Othacehe <othacehe@gnu.org>
+
+[ Upstream commit 128d54fbcb14b8717ecf596d3dbded327b9980b3 ]
+
+Following a similar reinstate for the KSZ8081 and KSZ9031.
+
+Older kernels would use the genphy_soft_reset if the PHY did not implement
+a .soft_reset.
+
+The KSZ8061 errata described here:
+https://ww1.microchip.com/downloads/en/DeviceDoc/KSZ8061-Errata-DS80000688B.pdf
+and worked around with 232ba3a51c ("net: phy: Micrel KSZ8061: link failure after cable connect")
+is back again without this soft reset.
+
+Fixes: 6e2d85ec0559 ("net: phy: Stop with excessive soft reset")
+Tested-by: Karim Ben Houcine <karim.benhoucine@landisgyr.com>
+Signed-off-by: Mathieu Othacehe <othacehe@gnu.org>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/micrel.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
+index 87780465cd0d5..18dee364e2b31 100644
+--- a/drivers/net/phy/micrel.c
++++ b/drivers/net/phy/micrel.c
+@@ -4814,6 +4814,7 @@ static struct phy_driver ksphy_driver[] = {
+       /* PHY_BASIC_FEATURES */
+       .probe          = kszphy_probe,
+       .config_init    = ksz8061_config_init,
++      .soft_reset     = genphy_soft_reset,
+       .config_intr    = kszphy_config_intr,
+       .handle_interrupt = kszphy_handle_interrupt,
+       .suspend        = kszphy_suspend,
+-- 
+2.43.0
+
diff --git a/queue-6.9/net-sched-taprio-extend-minimum-interval-restriction.patch b/queue-6.9/net-sched-taprio-extend-minimum-interval-restriction.patch
new file mode 100644 (file)
index 0000000..6aad984
--- /dev/null
@@ -0,0 +1,100 @@
+From a883cad578b7013d4d0053e32d698fadb62f4402 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 27 May 2024 18:39:55 +0300
+Subject: net/sched: taprio: extend minimum interval restriction to entire
+ cycle too
+
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+
+[ Upstream commit fb66df20a7201e60f2b13d7f95d031b31a8831d3 ]
+
+It is possible for syzbot to side-step the restriction imposed by the
+blamed commit in the Fixes: tag, because the taprio UAPI permits a
+cycle-time different from (and potentially shorter than) the sum of
+entry intervals.
+
+We need one more restriction, which is that the cycle time itself must
+be larger than N * ETH_ZLEN bit times, where N is the number of schedule
+entries. This restriction needs to apply regardless of whether the cycle
+time came from the user or was the implicit, auto-calculated value, so
+we move the existing "cycle == 0" check outside the "if "(!new->cycle_time)"
+branch. This way covers both conditions and scenarios.
+
+Add a selftest which illustrates the issue triggered by syzbot.
+
+Fixes: b5b73b26b3ca ("taprio: Fix allowing too small intervals")
+Reported-by: syzbot+a7d2b1d5d1af83035567@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/netdev/0000000000007d66bc06196e7c66@google.com/
+Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
+Link: https://lore.kernel.org/r/20240527153955.553333-2-vladimir.oltean@nxp.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_taprio.c                        | 10 ++++-----
+ .../tc-testing/tc-tests/qdiscs/taprio.json    | 22 +++++++++++++++++++
+ 2 files changed, 27 insertions(+), 5 deletions(-)
+
+diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
+index 631140c1f6e5f..5c3f8a278fc2f 100644
+--- a/net/sched/sch_taprio.c
++++ b/net/sched/sch_taprio.c
+@@ -1151,11 +1151,6 @@ static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb,
+               list_for_each_entry(entry, &new->entries, list)
+                       cycle = ktime_add_ns(cycle, entry->interval);
+-              if (!cycle) {
+-                      NL_SET_ERR_MSG(extack, "'cycle_time' can never be 0");
+-                      return -EINVAL;
+-              }
+-
+               if (cycle < 0 || cycle > INT_MAX) {
+                       NL_SET_ERR_MSG(extack, "'cycle_time' is too big");
+                       return -EINVAL;
+@@ -1164,6 +1159,11 @@ static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb,
+               new->cycle_time = cycle;
+       }
++      if (new->cycle_time < new->num_entries * length_to_duration(q, ETH_ZLEN)) {
++              NL_SET_ERR_MSG(extack, "'cycle_time' is too small");
++              return -EINVAL;
++      }
++
+       taprio_calculate_gate_durations(q, new);
+       return 0;
+diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
+index 8f12f00a4f572..557fb074acf0c 100644
+--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
++++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
+@@ -154,6 +154,28 @@
+             "echo \"1\" > /sys/bus/netdevsim/del_device"
+         ]
+     },
++    {
++        "id": "831f",
++        "name": "Add taprio Qdisc with too short cycle-time",
++        "category": [
++            "qdisc",
++            "taprio"
++        ],
++        "plugins": {
++            "requires": "nsPlugin"
++        },
++        "setup": [
++            "echo \"1 1 8\" > /sys/bus/netdevsim/new_device"
++        ],
++        "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: taprio num_tc 2 queues 1@0 1@1 sched-entry S 01 200000 sched-entry S 02 200000 cycle-time 100 clockid CLOCK_TAI",
++        "expExitCode": "2",
++        "verifyCmd": "$TC qdisc show dev $ETH",
++        "matchPattern": "qdisc taprio 1: root refcnt",
++        "matchCount": "0",
++        "teardown": [
++            "echo \"1\" > /sys/bus/netdevsim/del_device"
++        ]
++    },
+     {
+         "id": "3e1e",
+         "name": "Add taprio Qdisc with an invalid cycle-time",
+-- 
+2.43.0
+
diff --git a/queue-6.9/net-sched-taprio-make-q-picos_per_byte-available-to-.patch b/queue-6.9/net-sched-taprio-make-q-picos_per_byte-available-to-.patch
new file mode 100644 (file)
index 0000000..06a65f0
--- /dev/null
@@ -0,0 +1,95 @@
+From 46520deb1ab13d218b26a794784c9340dbfcc2fd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 27 May 2024 18:39:54 +0300
+Subject: net/sched: taprio: make q->picos_per_byte available to
+ fill_sched_entry()
+
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+
+[ Upstream commit e634134180885574d1fe7aa162777ba41e7fcd5b ]
+
+In commit b5b73b26b3ca ("taprio: Fix allowing too small intervals"), a
+comparison of user input against length_to_duration(q, ETH_ZLEN) was
+introduced, to avoid RCU stalls due to frequent hrtimers.
+
+The implementation of length_to_duration() depends on q->picos_per_byte
+being set for the link speed. The blamed commit in the Fixes: tag has
+moved this too late, so the checks introduced above are ineffective.
+The q->picos_per_byte is zero at parse_taprio_schedule() ->
+parse_sched_list() -> parse_sched_entry() -> fill_sched_entry() time.
+
+Move the taprio_set_picos_per_byte() call as one of the first things in
+taprio_change(), before the bulk of the netlink attribute parsing is
+done. That's because it is needed there.
+
+Add a selftest to make sure the issue doesn't get reintroduced.
+
+Fixes: 09dbdf28f9f9 ("net/sched: taprio: fix calculation of maximum gate durations")
+Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20240527153955.553333-1-vladimir.oltean@nxp.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_taprio.c                        |  4 +++-
+ .../tc-testing/tc-tests/qdiscs/taprio.json    | 22 +++++++++++++++++++
+ 2 files changed, 25 insertions(+), 1 deletion(-)
+
+diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
+index a0d54b422186f..631140c1f6e5f 100644
+--- a/net/sched/sch_taprio.c
++++ b/net/sched/sch_taprio.c
+@@ -1851,6 +1851,9 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
+       }
+       q->flags = taprio_flags;
++      /* Needed for length_to_duration() during netlink attribute parsing */
++      taprio_set_picos_per_byte(dev, q);
++
+       err = taprio_parse_mqprio_opt(dev, mqprio, extack, q->flags);
+       if (err < 0)
+               return err;
+@@ -1910,7 +1913,6 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
+       if (err < 0)
+               goto free_sched;
+-      taprio_set_picos_per_byte(dev, q);
+       taprio_update_queue_max_sdu(q, new_admin, stab);
+       if (FULL_OFFLOAD_IS_ENABLED(q->flags))
+diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
+index 12da0a939e3e5..8f12f00a4f572 100644
+--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
++++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
+@@ -132,6 +132,28 @@
+             "echo \"1\" > /sys/bus/netdevsim/del_device"
+         ]
+     },
++    {
++        "id": "6f62",
++        "name": "Add taprio Qdisc with too short interval",
++        "category": [
++            "qdisc",
++            "taprio"
++        ],
++        "plugins": {
++            "requires": "nsPlugin"
++        },
++        "setup": [
++            "echo \"1 1 8\" > /sys/bus/netdevsim/new_device"
++        ],
++        "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: taprio num_tc 2 queues 1@0 1@1 sched-entry S 01 300 sched-entry S 02 1700 clockid CLOCK_TAI",
++        "expExitCode": "2",
++        "verifyCmd": "$TC qdisc show dev $ETH",
++        "matchPattern": "qdisc taprio 1: root refcnt",
++        "matchCount": "0",
++        "teardown": [
++            "echo \"1\" > /sys/bus/netdevsim/del_device"
++        ]
++    },
+     {
+         "id": "3e1e",
+         "name": "Add taprio Qdisc with an invalid cycle-time",
+-- 
+2.43.0
+
diff --git a/queue-6.9/net-ti-icssg-prueth-fix-start-counter-for-ft1-filter.patch b/queue-6.9/net-ti-icssg-prueth-fix-start-counter-for-ft1-filter.patch
new file mode 100644 (file)
index 0000000..7bd1963
--- /dev/null
@@ -0,0 +1,39 @@
+From a98ef76d6e6ef0519a947a691886b423971952f8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 27 May 2024 12:00:15 +0530
+Subject: net: ti: icssg-prueth: Fix start counter for ft1 filter
+
+From: MD Danish Anwar <danishanwar@ti.com>
+
+[ Upstream commit 56a5cf538c3f2d935b0d81040a8303b6e7fc5fd8 ]
+
+The start counter for FT1 filter is wrongly set to 0 in the driver.
+FT1 is used for source address violation (SAV) check and source address
+starts at Byte 6 not Byte 0. Fix this by changing start counter to
+ETH_ALEN in icssg_ft1_set_mac_addr().
+
+Fixes: e9b4ece7d74b ("net: ti: icssg-prueth: Add Firmware config and classification APIs.")
+Signed-off-by: MD Danish Anwar <danishanwar@ti.com>
+Link: https://lore.kernel.org/r/20240527063015.263748-1-danishanwar@ti.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/ti/icssg/icssg_classifier.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/ti/icssg/icssg_classifier.c b/drivers/net/ethernet/ti/icssg/icssg_classifier.c
+index 6df53ab17fbc5..902a2717785cb 100644
+--- a/drivers/net/ethernet/ti/icssg/icssg_classifier.c
++++ b/drivers/net/ethernet/ti/icssg/icssg_classifier.c
+@@ -360,7 +360,7 @@ void icssg_ft1_set_mac_addr(struct regmap *miig_rt, int slice, u8 *mac_addr)
+ {
+       const u8 mask_addr[] = { 0, 0, 0, 0, 0, 0, };
+-      rx_class_ft1_set_start_len(miig_rt, slice, 0, 6);
++      rx_class_ft1_set_start_len(miig_rt, slice, ETH_ALEN, ETH_ALEN);
+       rx_class_ft1_set_da(miig_rt, slice, 0, mac_addr);
+       rx_class_ft1_set_da_mask(miig_rt, slice, 0, mask_addr);
+       rx_class_ft1_cfg_set_type(miig_rt, slice, 0, FT1_CFG_TYPE_EQ);
+-- 
+2.43.0
+
diff --git a/queue-6.9/net-usb-smsc95xx-fix-changing-led_sel-bit-value-upda.patch b/queue-6.9/net-usb-smsc95xx-fix-changing-led_sel-bit-value-upda.patch
new file mode 100644 (file)
index 0000000..834bf59
--- /dev/null
@@ -0,0 +1,68 @@
+From 9ffa9a36e46468680f4f48cd79d7cd0d490518ea Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 May 2024 14:23:14 +0530
+Subject: net: usb: smsc95xx: fix changing LED_SEL bit value updated from
+ EEPROM
+
+From: Parthiban Veerasooran <Parthiban.Veerasooran@microchip.com>
+
+[ Upstream commit 52a2f0608366a629d43dacd3191039c95fef74ba ]
+
+LED Select (LED_SEL) bit in the LED General Purpose IO Configuration
+register is used to determine the functionality of external LED pins
+(Speed Indicator, Link and Activity Indicator, Full Duplex Link
+Indicator). The default value for this bit is 0 when no EEPROM is
+present. If a EEPROM is present, the default value is the value of the
+LED Select bit in the Configuration Flags of the EEPROM. A USB Reset or
+Lite Reset (LRST) will cause this bit to be restored to the image value
+last loaded from EEPROM, or to be set to 0 if no EEPROM is present.
+
+While configuring the dual purpose GPIO/LED pins to LED outputs in the
+LED General Purpose IO Configuration register, the LED_SEL bit is changed
+as 0 and resulting the configured value from the EEPROM is cleared. The
+issue is fixed by using read-modify-write approach.
+
+Fixes: f293501c61c5 ("smsc95xx: configure LED outputs")
+Signed-off-by: Parthiban Veerasooran <Parthiban.Veerasooran@microchip.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Reviewed-by: Woojung Huh <woojung.huh@microchip.com>
+Link: https://lore.kernel.org/r/20240523085314.167650-1-Parthiban.Veerasooran@microchip.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/usb/smsc95xx.c | 11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
+index cbea246664795..8e82184be5e7d 100644
+--- a/drivers/net/usb/smsc95xx.c
++++ b/drivers/net/usb/smsc95xx.c
+@@ -879,7 +879,7 @@ static int smsc95xx_start_rx_path(struct usbnet *dev)
+ static int smsc95xx_reset(struct usbnet *dev)
+ {
+       struct smsc95xx_priv *pdata = dev->driver_priv;
+-      u32 read_buf, write_buf, burst_cap;
++      u32 read_buf, burst_cap;
+       int ret = 0, timeout;
+       netif_dbg(dev, ifup, dev->net, "entering smsc95xx_reset\n");
+@@ -1003,10 +1003,13 @@ static int smsc95xx_reset(struct usbnet *dev)
+               return ret;
+       netif_dbg(dev, ifup, dev->net, "ID_REV = 0x%08x\n", read_buf);
++      ret = smsc95xx_read_reg(dev, LED_GPIO_CFG, &read_buf);
++      if (ret < 0)
++              return ret;
+       /* Configure GPIO pins as LED outputs */
+-      write_buf = LED_GPIO_CFG_SPD_LED | LED_GPIO_CFG_LNK_LED |
+-              LED_GPIO_CFG_FDX_LED;
+-      ret = smsc95xx_write_reg(dev, LED_GPIO_CFG, write_buf);
++      read_buf |= LED_GPIO_CFG_SPD_LED | LED_GPIO_CFG_LNK_LED |
++                  LED_GPIO_CFG_FDX_LED;
++      ret = smsc95xx_write_reg(dev, LED_GPIO_CFG, read_buf);
+       if (ret < 0)
+               return ret;
+-- 
+2.43.0
+
diff --git a/queue-6.9/netfilter-ipset-add-list-flush-to-cancel_gc.patch b/queue-6.9/netfilter-ipset-add-list-flush-to-cancel_gc.patch
new file mode 100644 (file)
index 0000000..87540f5
--- /dev/null
@@ -0,0 +1,39 @@
+From 1105f5db5c9f4b8fe5deb57b2a459c50449c3d32 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 17 Apr 2024 18:51:41 +0500
+Subject: netfilter: ipset: Add list flush to cancel_gc
+
+From: Alexander Maltsev <keltar.gw@gmail.com>
+
+[ Upstream commit c1193d9bbbd379defe9be3c6de566de684de8a6f ]
+
+Flushing list in cancel_gc drops references to other lists right away,
+without waiting for RCU to destroy list. Fixes race when referenced
+ipsets can't be destroyed while referring list is scheduled for destroy.
+
+Fixes: 97f7cf1cd80e ("netfilter: ipset: fix performance regression in swap operation")
+Signed-off-by: Alexander Maltsev <keltar.gw@gmail.com>
+Acked-by: Jozsef Kadlecsik <kadlec@netfilter.org>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/ipset/ip_set_list_set.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
+index 6c3f28bc59b32..54e2a1dd7f5f5 100644
+--- a/net/netfilter/ipset/ip_set_list_set.c
++++ b/net/netfilter/ipset/ip_set_list_set.c
+@@ -549,6 +549,9 @@ list_set_cancel_gc(struct ip_set *set)
+       if (SET_WITH_TIMEOUT(set))
+               timer_shutdown_sync(&map->gc);
++
++      /* Flush list to drop references to other ipsets */
++      list_set_flush(set);
+ }
+ static const struct ip_set_type_variant set_variant = {
+-- 
+2.43.0
+
diff --git a/queue-6.9/netfilter-nfnetlink_queue-acquire-rcu_read_lock-in-i.patch b/queue-6.9/netfilter-nfnetlink_queue-acquire-rcu_read_lock-in-i.patch
new file mode 100644 (file)
index 0000000..fd4ceb5
--- /dev/null
@@ -0,0 +1,79 @@
+From 4095d5d1a4071a2b1688652ddb4d1c8a4dca22b0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 May 2024 13:23:39 +0000
+Subject: netfilter: nfnetlink_queue: acquire rcu_read_lock() in
+ instance_destroy_rcu()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit dc21c6cc3d6986d938efbf95de62473982c98dec ]
+
+syzbot reported that nf_reinject() could be called without rcu_read_lock() :
+
+WARNING: suspicious RCU usage
+6.9.0-rc7-syzkaller-02060-g5c1672705a1a #0 Not tainted
+
+net/netfilter/nfnetlink_queue.c:263 suspicious rcu_dereference_check() usage!
+
+other info that might help us debug this:
+
+rcu_scheduler_active = 2, debug_locks = 1
+2 locks held by syz-executor.4/13427:
+  #0: ffffffff8e334f60 (rcu_callback){....}-{0:0}, at: rcu_lock_acquire include/linux/rcupdate.h:329 [inline]
+  #0: ffffffff8e334f60 (rcu_callback){....}-{0:0}, at: rcu_do_batch kernel/rcu/tree.c:2190 [inline]
+  #0: ffffffff8e334f60 (rcu_callback){....}-{0:0}, at: rcu_core+0xa86/0x1830 kernel/rcu/tree.c:2471
+  #1: ffff88801ca92958 (&inst->lock){+.-.}-{2:2}, at: spin_lock_bh include/linux/spinlock.h:356 [inline]
+  #1: ffff88801ca92958 (&inst->lock){+.-.}-{2:2}, at: nfqnl_flush net/netfilter/nfnetlink_queue.c:405 [inline]
+  #1: ffff88801ca92958 (&inst->lock){+.-.}-{2:2}, at: instance_destroy_rcu+0x30/0x220 net/netfilter/nfnetlink_queue.c:172
+
+stack backtrace:
+CPU: 0 PID: 13427 Comm: syz-executor.4 Not tainted 6.9.0-rc7-syzkaller-02060-g5c1672705a1a #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 04/02/2024
+Call Trace:
+ <IRQ>
+  __dump_stack lib/dump_stack.c:88 [inline]
+  dump_stack_lvl+0x241/0x360 lib/dump_stack.c:114
+  lockdep_rcu_suspicious+0x221/0x340 kernel/locking/lockdep.c:6712
+  nf_reinject net/netfilter/nfnetlink_queue.c:323 [inline]
+  nfqnl_reinject+0x6ec/0x1120 net/netfilter/nfnetlink_queue.c:397
+  nfqnl_flush net/netfilter/nfnetlink_queue.c:410 [inline]
+  instance_destroy_rcu+0x1ae/0x220 net/netfilter/nfnetlink_queue.c:172
+  rcu_do_batch kernel/rcu/tree.c:2196 [inline]
+  rcu_core+0xafd/0x1830 kernel/rcu/tree.c:2471
+  handle_softirqs+0x2d6/0x990 kernel/softirq.c:554
+  __do_softirq kernel/softirq.c:588 [inline]
+  invoke_softirq kernel/softirq.c:428 [inline]
+  __irq_exit_rcu+0xf4/0x1c0 kernel/softirq.c:637
+  irq_exit_rcu+0x9/0x30 kernel/softirq.c:649
+  instr_sysvec_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1043 [inline]
+  sysvec_apic_timer_interrupt+0xa6/0xc0 arch/x86/kernel/apic/apic.c:1043
+ </IRQ>
+ <TASK>
+
+Fixes: 9872bec773c2 ("[NETFILTER]: nfnetlink: use RCU for queue instances hash")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nfnetlink_queue.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
+index 00f4bd21c59b4..f1c31757e4969 100644
+--- a/net/netfilter/nfnetlink_queue.c
++++ b/net/netfilter/nfnetlink_queue.c
+@@ -169,7 +169,9 @@ instance_destroy_rcu(struct rcu_head *head)
+       struct nfqnl_instance *inst = container_of(head, struct nfqnl_instance,
+                                                  rcu);
++      rcu_read_lock();
+       nfqnl_flush(inst, NULL, 0);
++      rcu_read_unlock();
+       kfree(inst);
+       module_put(THIS_MODULE);
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.9/netfilter-nft_fib-allow-from-forward-input-without-i.patch b/queue-6.9/netfilter-nft_fib-allow-from-forward-input-without-i.patch
new file mode 100644 (file)
index 0000000..999ef7c
--- /dev/null
@@ -0,0 +1,45 @@
+From 18328561edbd4db3d345d88a6003a3bea5acbe6d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 21 May 2024 10:25:05 -0400
+Subject: netfilter: nft_fib: allow from forward/input without iif selector
+
+From: Eric Garver <eric@garver.life>
+
+[ Upstream commit e8ded22ef0f4831279c363c264cd41cd9d59ca9e ]
+
+This removes the restriction of needing iif selector in the
+forward/input hooks for fib lookups when requested result is
+oif/oifname.
+
+Removing this restriction allows "loose" lookups from the forward hooks.
+
+Fixes: be8be04e5ddb ("netfilter: nft_fib: reverse path filter for policy-based routing on iif")
+Signed-off-by: Eric Garver <eric@garver.life>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nft_fib.c | 8 +++-----
+ 1 file changed, 3 insertions(+), 5 deletions(-)
+
+diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
+index 37cfe6dd712d8..b58f62195ff3e 100644
+--- a/net/netfilter/nft_fib.c
++++ b/net/netfilter/nft_fib.c
+@@ -35,11 +35,9 @@ int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
+       switch (priv->result) {
+       case NFT_FIB_RESULT_OIF:
+       case NFT_FIB_RESULT_OIFNAME:
+-              hooks = (1 << NF_INET_PRE_ROUTING);
+-              if (priv->flags & NFTA_FIB_F_IIF) {
+-                      hooks |= (1 << NF_INET_LOCAL_IN) |
+-                               (1 << NF_INET_FORWARD);
+-              }
++              hooks = (1 << NF_INET_PRE_ROUTING) |
++                      (1 << NF_INET_LOCAL_IN) |
++                      (1 << NF_INET_FORWARD);
+               break;
+       case NFT_FIB_RESULT_ADDRTYPE:
+               if (priv->flags & NFTA_FIB_F_IIF)
+-- 
+2.43.0
+
diff --git a/queue-6.9/netfilter-nft_payload-restore-vlan-q-in-q-match-supp.patch b/queue-6.9/netfilter-nft_payload-restore-vlan-q-in-q-match-supp.patch
new file mode 100644 (file)
index 0000000..a904a40
--- /dev/null
@@ -0,0 +1,74 @@
+From 3b47be354c3c17ef95892e1bdc2043e618403fbc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 9 May 2024 23:02:24 +0200
+Subject: netfilter: nft_payload: restore vlan q-in-q match support
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit aff5c01fa1284d606f8e7cbdaafeef2511bb46c1 ]
+
+Revert f6ae9f120dad ("netfilter: nft_payload: add C-VLAN support").
+
+f41f72d09ee1 ("netfilter: nft_payload: simplify vlan header handling")
+already allows to match on inner vlan tags by subtract the vlan header
+size to the payload offset which has been popped and stored in skbuff
+metadata fields.
+
+Fixes: f6ae9f120dad ("netfilter: nft_payload: add C-VLAN support")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nft_payload.c | 23 +++++++----------------
+ 1 file changed, 7 insertions(+), 16 deletions(-)
+
+diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
+index 0a689c8e0295d..a3cb5dbcb362c 100644
+--- a/net/netfilter/nft_payload.c
++++ b/net/netfilter/nft_payload.c
+@@ -45,36 +45,27 @@ nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len)
+       int mac_off = skb_mac_header(skb) - skb->data;
+       u8 *vlanh, *dst_u8 = (u8 *) d;
+       struct vlan_ethhdr veth;
+-      u8 vlan_hlen = 0;
+-
+-      if ((skb->protocol == htons(ETH_P_8021AD) ||
+-           skb->protocol == htons(ETH_P_8021Q)) &&
+-          offset >= VLAN_ETH_HLEN && offset < VLAN_ETH_HLEN + VLAN_HLEN)
+-              vlan_hlen += VLAN_HLEN;
+       vlanh = (u8 *) &veth;
+-      if (offset < VLAN_ETH_HLEN + vlan_hlen) {
++      if (offset < VLAN_ETH_HLEN) {
+               u8 ethlen = len;
+-              if (vlan_hlen &&
+-                  skb_copy_bits(skb, mac_off, &veth, VLAN_ETH_HLEN) < 0)
+-                      return false;
+-              else if (!nft_payload_rebuild_vlan_hdr(skb, mac_off, &veth))
++              if (!nft_payload_rebuild_vlan_hdr(skb, mac_off, &veth))
+                       return false;
+-              if (offset + len > VLAN_ETH_HLEN + vlan_hlen)
+-                      ethlen -= offset + len - VLAN_ETH_HLEN - vlan_hlen;
++              if (offset + len > VLAN_ETH_HLEN)
++                      ethlen -= offset + len - VLAN_ETH_HLEN;
+-              memcpy(dst_u8, vlanh + offset - vlan_hlen, ethlen);
++              memcpy(dst_u8, vlanh + offset, ethlen);
+               len -= ethlen;
+               if (len == 0)
+                       return true;
+               dst_u8 += ethlen;
+-              offset = ETH_HLEN + vlan_hlen;
++              offset = ETH_HLEN;
+       } else {
+-              offset -= VLAN_HLEN + vlan_hlen;
++              offset -= VLAN_HLEN;
+       }
+       return skb_copy_bits(skb, offset + mac_off, dst_u8, len) == 0;
+-- 
+2.43.0
+
diff --git a/queue-6.9/netfilter-nft_payload-skbuff-vlan-metadata-mangle-su.patch b/queue-6.9/netfilter-nft_payload-skbuff-vlan-metadata-mangle-su.patch
new file mode 100644 (file)
index 0000000..9826dc3
--- /dev/null
@@ -0,0 +1,147 @@
+From d97ebbaa717f7a2cacd7070e4c2ec21ea867a3cb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 8 May 2024 22:50:34 +0200
+Subject: netfilter: nft_payload: skbuff vlan metadata mangle support
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit 33c563ebf8d3deed7d8addd20d77398ac737ef9a ]
+
+Userspace assumes vlan header is present at a given offset, but vlan
+offload allows to store this in metadata fields of the skbuff. Hence
+mangling vlan results in a garbled packet. Handle this transparently by
+adding a parser to the kernel.
+
+If vlan metadata is present and payload offset is over 12 bytes (source
+and destination mac address fields), then subtract vlan header present
+in vlan metadata, otherwise mangle vlan metadata based on offset and
+length, extracting data from the source register.
+
+This is similar to:
+
+  8cfd23e67401 ("netfilter: nft_payload: work around vlan header stripping")
+
+to deal with vlan payload mangling.
+
+Fixes: 7ec3f7b47b8d ("netfilter: nft_payload: add packet mangling support")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nft_payload.c | 72 +++++++++++++++++++++++++++++++++----
+ 1 file changed, 65 insertions(+), 7 deletions(-)
+
+diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
+index a3cb5dbcb362c..0c43d748e23ae 100644
+--- a/net/netfilter/nft_payload.c
++++ b/net/netfilter/nft_payload.c
+@@ -145,12 +145,12 @@ int nft_payload_inner_offset(const struct nft_pktinfo *pkt)
+       return pkt->inneroff;
+ }
+-static bool nft_payload_need_vlan_copy(const struct nft_payload *priv)
++static bool nft_payload_need_vlan_adjust(u32 offset, u32 len)
+ {
+-      unsigned int len = priv->offset + priv->len;
++      unsigned int boundary = offset + len;
+       /* data past ether src/dst requested, copy needed */
+-      if (len > offsetof(struct ethhdr, h_proto))
++      if (boundary > offsetof(struct ethhdr, h_proto))
+               return true;
+       return false;
+@@ -174,7 +174,7 @@ void nft_payload_eval(const struct nft_expr *expr,
+                       goto err;
+               if (skb_vlan_tag_present(skb) &&
+-                  nft_payload_need_vlan_copy(priv)) {
++                  nft_payload_need_vlan_adjust(priv->offset, priv->len)) {
+                       if (!nft_payload_copy_vlan(dest, skb,
+                                                  priv->offset, priv->len))
+                               goto err;
+@@ -801,21 +801,79 @@ struct nft_payload_set {
+       u8                      csum_flags;
+ };
++/* This is not struct vlan_hdr. */
++struct nft_payload_vlan_hdr {
++      __be16                  h_vlan_proto;
++      __be16                  h_vlan_TCI;
++};
++
++static bool
++nft_payload_set_vlan(const u32 *src, struct sk_buff *skb, u8 offset, u8 len,
++                   int *vlan_hlen)
++{
++      struct nft_payload_vlan_hdr *vlanh;
++      __be16 vlan_proto;
++      u16 vlan_tci;
++
++      if (offset >= offsetof(struct vlan_ethhdr, h_vlan_encapsulated_proto)) {
++              *vlan_hlen = VLAN_HLEN;
++              return true;
++      }
++
++      switch (offset) {
++      case offsetof(struct vlan_ethhdr, h_vlan_proto):
++              if (len == 2) {
++                      vlan_proto = nft_reg_load_be16(src);
++                      skb->vlan_proto = vlan_proto;
++              } else if (len == 4) {
++                      vlanh = (struct nft_payload_vlan_hdr *)src;
++                      __vlan_hwaccel_put_tag(skb, vlanh->h_vlan_proto,
++                                             ntohs(vlanh->h_vlan_TCI));
++              } else {
++                      return false;
++              }
++              break;
++      case offsetof(struct vlan_ethhdr, h_vlan_TCI):
++              if (len != 2)
++                      return false;
++
++              vlan_tci = ntohs(nft_reg_load_be16(src));
++              skb->vlan_tci = vlan_tci;
++              break;
++      default:
++              return false;
++      }
++
++      return true;
++}
++
+ static void nft_payload_set_eval(const struct nft_expr *expr,
+                                struct nft_regs *regs,
+                                const struct nft_pktinfo *pkt)
+ {
+       const struct nft_payload_set *priv = nft_expr_priv(expr);
+-      struct sk_buff *skb = pkt->skb;
+       const u32 *src = &regs->data[priv->sreg];
+-      int offset, csum_offset;
++      int offset, csum_offset, vlan_hlen = 0;
++      struct sk_buff *skb = pkt->skb;
+       __wsum fsum, tsum;
+       switch (priv->base) {
+       case NFT_PAYLOAD_LL_HEADER:
+               if (!skb_mac_header_was_set(skb))
+                       goto err;
+-              offset = skb_mac_header(skb) - skb->data;
++
++              if (skb_vlan_tag_present(skb) &&
++                  nft_payload_need_vlan_adjust(priv->offset, priv->len)) {
++                      if (!nft_payload_set_vlan(src, skb,
++                                                priv->offset, priv->len,
++                                                &vlan_hlen))
++                              goto err;
++
++                      if (!vlan_hlen)
++                              return;
++              }
++
++              offset = skb_mac_header(skb) - skb->data - vlan_hlen;
+               break;
+       case NFT_PAYLOAD_NETWORK_HEADER:
+               offset = skb_network_offset(skb);
+-- 
+2.43.0
+
diff --git a/queue-6.9/netfilter-tproxy-bail-out-if-ip-has-been-disabled-on.patch b/queue-6.9/netfilter-tproxy-bail-out-if-ip-has-been-disabled-on.patch
new file mode 100644 (file)
index 0000000..110eabf
--- /dev/null
@@ -0,0 +1,45 @@
+From 6e53551b9fc0cbcab86abafaf47917cfe5f2270e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 May 2024 12:27:15 +0200
+Subject: netfilter: tproxy: bail out if IP has been disabled on the device
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 21a673bddc8fd4873c370caf9ae70ffc6d47e8d3 ]
+
+syzbot reports:
+general protection fault, probably for non-canonical address 0xdffffc0000000003: 0000 [#1] PREEMPT SMP KASAN PTI
+KASAN: null-ptr-deref in range [0x0000000000000018-0x000000000000001f]
+[..]
+RIP: 0010:nf_tproxy_laddr4+0xb7/0x340 net/ipv4/netfilter/nf_tproxy_ipv4.c:62
+Call Trace:
+ nft_tproxy_eval_v4 net/netfilter/nft_tproxy.c:56 [inline]
+ nft_tproxy_eval+0xa9a/0x1a00 net/netfilter/nft_tproxy.c:168
+
+__in_dev_get_rcu() can return NULL, so check for this.
+
+Reported-and-tested-by: syzbot+b94a6818504ea90d7661@syzkaller.appspotmail.com
+Fixes: cc6eb4338569 ("tproxy: use the interface primary IP address as a default value for --on-ip")
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/netfilter/nf_tproxy_ipv4.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/net/ipv4/netfilter/nf_tproxy_ipv4.c b/net/ipv4/netfilter/nf_tproxy_ipv4.c
+index 69e3317996043..73e66a088e25e 100644
+--- a/net/ipv4/netfilter/nf_tproxy_ipv4.c
++++ b/net/ipv4/netfilter/nf_tproxy_ipv4.c
+@@ -58,6 +58,8 @@ __be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
+       laddr = 0;
+       indev = __in_dev_get_rcu(skb->dev);
++      if (!indev)
++              return daddr;
+       in_dev_for_each_ifa_rcu(ifa, indev) {
+               if (ifa->ifa_flags & IFA_F_SECONDARY)
+-- 
+2.43.0
+
diff --git a/queue-6.9/netkit-fix-pkt_type-override-upon-netkit-pass-verdic.patch b/queue-6.9/netkit-fix-pkt_type-override-upon-netkit-pass-verdic.patch
new file mode 100644 (file)
index 0000000..f7fd5e5
--- /dev/null
@@ -0,0 +1,104 @@
+From 8e6ef14f6babd9f0a17518dd56e22f469623b71a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 24 May 2024 18:36:17 +0200
+Subject: netkit: Fix pkt_type override upon netkit pass verdict
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+[ Upstream commit 3998d184267dfcff858aaa84d3de17429253629d ]
+
+When running Cilium connectivity test suite with netkit in L2 mode, we
+found that compared to tcx a few tests were failing which pushed traffic
+into an L7 proxy sitting in host namespace. The problem in particular is
+around the invocation of eth_type_trans() in netkit.
+
+In case of tcx, this is run before the tcx ingress is triggered inside
+host namespace and thus if the BPF program uses the bpf_skb_change_type()
+helper the newly set type is retained. However, in case of netkit, the
+late eth_type_trans() invocation overrides the earlier decision from the
+BPF program which eventually leads to the test failure.
+
+Instead of eth_type_trans(), split out the relevant parts, meaning, reset
+of mac header and call to eth_skb_pkt_type() before the BPF program is run
+in order to have the same behavior as with tcx, and refactor a small helper
+called eth_skb_pull_mac() which is run in case it's passed up the stack
+where the mac header must be pulled. With this all connectivity tests pass.
+
+Fixes: 35dfaad7188c ("netkit, bpf: Add bpf programmable net device")
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
+Link: https://lore.kernel.org/r/20240524163619.26001-2-daniel@iogearbox.net
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/netkit.c        | 4 +++-
+ include/linux/etherdevice.h | 8 ++++++++
+ net/ethernet/eth.c          | 4 +---
+ 3 files changed, 12 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c
+index 272894053e2c4..16789cd446e9e 100644
+--- a/drivers/net/netkit.c
++++ b/drivers/net/netkit.c
+@@ -55,6 +55,7 @@ static void netkit_prep_forward(struct sk_buff *skb, bool xnet)
+       skb_scrub_packet(skb, xnet);
+       skb->priority = 0;
+       nf_skip_egress(skb, true);
++      skb_reset_mac_header(skb);
+ }
+ static struct netkit *netkit_priv(const struct net_device *dev)
+@@ -78,6 +79,7 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
+                    skb_orphan_frags(skb, GFP_ATOMIC)))
+               goto drop;
+       netkit_prep_forward(skb, !net_eq(dev_net(dev), dev_net(peer)));
++      eth_skb_pkt_type(skb, peer);
+       skb->dev = peer;
+       entry = rcu_dereference(nk->active);
+       if (entry)
+@@ -85,7 +87,7 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
+       switch (ret) {
+       case NETKIT_NEXT:
+       case NETKIT_PASS:
+-              skb->protocol = eth_type_trans(skb, skb->dev);
++              eth_skb_pull_mac(skb);
+               skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+               if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) {
+                       dev_sw_netstats_tx_add(dev, 1, len);
+diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
+index 297231854ada5..e44913a8200fd 100644
+--- a/include/linux/etherdevice.h
++++ b/include/linux/etherdevice.h
+@@ -632,6 +632,14 @@ static inline void eth_skb_pkt_type(struct sk_buff *skb,
+       }
+ }
++static inline struct ethhdr *eth_skb_pull_mac(struct sk_buff *skb)
++{
++      struct ethhdr *eth = (struct ethhdr *)skb->data;
++
++      skb_pull_inline(skb, ETH_HLEN);
++      return eth;
++}
++
+ /**
+  * eth_skb_pad - Pad buffer to mininum number of octets for Ethernet frame
+  * @skb: Buffer to pad
+diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
+index 049c3adeb8504..4e3651101b866 100644
+--- a/net/ethernet/eth.c
++++ b/net/ethernet/eth.c
+@@ -161,9 +161,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
+       skb->dev = dev;
+       skb_reset_mac_header(skb);
+-      eth = (struct ethhdr *)skb->data;
+-      skb_pull_inline(skb, ETH_HLEN);
+-
++      eth = eth_skb_pull_mac(skb);
+       eth_skb_pkt_type(skb, dev);
+       /*
+-- 
+2.43.0
+
diff --git a/queue-6.9/netkit-fix-setting-mac-address-in-l2-mode.patch b/queue-6.9/netkit-fix-setting-mac-address-in-l2-mode.patch
new file mode 100644 (file)
index 0000000..2633128
--- /dev/null
@@ -0,0 +1,101 @@
+From 432efb247cc1ea8a60cb92cf637286f875227710 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 24 May 2024 18:36:16 +0200
+Subject: netkit: Fix setting mac address in l2 mode
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+[ Upstream commit d6fe532b7499e4575f9647879b7a34625817fe7f ]
+
+When running Cilium connectivity test suite with netkit in L2 mode, we
+found that it is expected to be able to specify a custom MAC address for
+the devices, in particular, cilium-cni obtains the specified MAC address
+by querying the endpoint and sets the MAC address of the interface inside
+the Pod. Thus, fix the missing support in netkit for L2 mode.
+
+Fixes: 35dfaad7188c ("netkit, bpf: Add bpf programmable net device")
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
+Acked-by: Stanislav Fomichev <sdf@google.com>
+Link: https://lore.kernel.org/r/20240524163619.26001-1-daniel@iogearbox.net
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/netkit.c | 26 +++++++++++++++++++++-----
+ 1 file changed, 21 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c
+index a4d2e76a8d587..272894053e2c4 100644
+--- a/drivers/net/netkit.c
++++ b/drivers/net/netkit.c
+@@ -155,6 +155,16 @@ static void netkit_set_multicast(struct net_device *dev)
+       /* Nothing to do, we receive whatever gets pushed to us! */
+ }
++static int netkit_set_macaddr(struct net_device *dev, void *sa)
++{
++      struct netkit *nk = netkit_priv(dev);
++
++      if (nk->mode != NETKIT_L2)
++              return -EOPNOTSUPP;
++
++      return eth_mac_addr(dev, sa);
++}
++
+ static void netkit_set_headroom(struct net_device *dev, int headroom)
+ {
+       struct netkit *nk = netkit_priv(dev), *nk2;
+@@ -198,6 +208,7 @@ static const struct net_device_ops netkit_netdev_ops = {
+       .ndo_start_xmit         = netkit_xmit,
+       .ndo_set_rx_mode        = netkit_set_multicast,
+       .ndo_set_rx_headroom    = netkit_set_headroom,
++      .ndo_set_mac_address    = netkit_set_macaddr,
+       .ndo_get_iflink         = netkit_get_iflink,
+       .ndo_get_peer_dev       = netkit_peer_dev,
+       .ndo_get_stats64        = netkit_get_stats,
+@@ -300,9 +311,11 @@ static int netkit_validate(struct nlattr *tb[], struct nlattr *data[],
+       if (!attr)
+               return 0;
+-      NL_SET_ERR_MSG_ATTR(extack, attr,
+-                          "Setting Ethernet address is not supported");
+-      return -EOPNOTSUPP;
++      if (nla_len(attr) != ETH_ALEN)
++              return -EINVAL;
++      if (!is_valid_ether_addr(nla_data(attr)))
++              return -EADDRNOTAVAIL;
++      return 0;
+ }
+ static struct rtnl_link_ops netkit_link_ops;
+@@ -365,6 +378,9 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev,
+               strscpy(ifname, "nk%d", IFNAMSIZ);
+               ifname_assign_type = NET_NAME_ENUM;
+       }
++      if (mode != NETKIT_L2 &&
++          (tb[IFLA_ADDRESS] || tbp[IFLA_ADDRESS]))
++              return -EOPNOTSUPP;
+       net = rtnl_link_get_net(src_net, tbp);
+       if (IS_ERR(net))
+@@ -379,7 +395,7 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev,
+       netif_inherit_tso_max(peer, dev);
+-      if (mode == NETKIT_L2)
++      if (mode == NETKIT_L2 && !(ifmp && tbp[IFLA_ADDRESS]))
+               eth_hw_addr_random(peer);
+       if (ifmp && dev->ifindex)
+               peer->ifindex = ifmp->ifi_index;
+@@ -402,7 +418,7 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev,
+       if (err < 0)
+               goto err_configure_peer;
+-      if (mode == NETKIT_L2)
++      if (mode == NETKIT_L2 && !tb[IFLA_ADDRESS])
+               eth_hw_addr_random(dev);
+       if (tb[IFLA_IFNAME])
+               nla_strscpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ);
+-- 
+2.43.0
+
diff --git a/queue-6.9/null_blk-fix-return-value-of-nullb_device_power_stor.patch b/queue-6.9/null_blk-fix-return-value-of-nullb_device_power_stor.patch
new file mode 100644 (file)
index 0000000..12095f4
--- /dev/null
@@ -0,0 +1,41 @@
+From f38c4778214d3cd99a034c43a4226b3f87bc49d6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 27 May 2024 13:34:45 +0900
+Subject: null_blk: Fix return value of nullb_device_power_store()
+
+From: Damien Le Moal <dlemoal@kernel.org>
+
+[ Upstream commit d9ff882b54f99f96787fa3df7cd938966843c418 ]
+
+When powering on a null_blk device that is not already on, the return
+value ret that is initialized to be count is reused to check the return
+value of null_add_dev(), leading to nullb_device_power_store() to return
+null_add_dev() return value (0 on success) instead of "count".
+So make sure to set ret to be equal to count when there are no errors.
+
+Fixes: a2db328b0839 ("null_blk: fix null-ptr-dereference while configuring 'power' and 'submit_queues'")
+Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
+Reviewed-by: Yu Kuai <yukuai3@huawei.com>
+Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
+Link: https://lore.kernel.org/r/20240527043445.235267-1-dlemoal@kernel.org
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/block/null_blk/main.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
+index 3b3fd093b0044..620679a0ac381 100644
+--- a/drivers/block/null_blk/main.c
++++ b/drivers/block/null_blk/main.c
+@@ -483,6 +483,7 @@ static ssize_t nullb_device_power_store(struct config_item *item,
+               set_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags);
+               dev->power = newp;
++              ret = count;
+       } else if (dev->power && !newp) {
+               if (test_and_clear_bit(NULLB_DEV_FL_UP, &dev->flags)) {
+                       dev->power = newp;
+-- 
+2.43.0
+
diff --git a/queue-6.9/nvme-fix-multipath-batched-completion-accounting.patch b/queue-6.9/nvme-fix-multipath-batched-completion-accounting.patch
new file mode 100644 (file)
index 0000000..656dab3
--- /dev/null
@@ -0,0 +1,67 @@
+From 68576d722789909fcc6afd269576c6c14afbb967 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 21 May 2024 09:50:47 -0700
+Subject: nvme: fix multipath batched completion accounting
+
+From: Keith Busch <kbusch@kernel.org>
+
+[ Upstream commit 2fe7b422460d14b33027d8770f7be8d26bcb2639 ]
+
+Batched completions were missing the io stats accounting and bio trace
+events. Move the common code to a helper and call it from the batched
+and non-batched functions.
+
+Fixes: d4d957b53d91ee ("nvme-multipath: support io stats on the mpath device")
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
+Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Signed-off-by: Keith Busch <kbusch@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/host/core.c | 15 ++++++++++-----
+ 1 file changed, 10 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
+index 095f59e7aa937..5008964f3ebe8 100644
+--- a/drivers/nvme/host/core.c
++++ b/drivers/nvme/host/core.c
+@@ -414,6 +414,14 @@ static inline void nvme_end_req_zoned(struct request *req)
+       }
+ }
++static inline void __nvme_end_req(struct request *req)
++{
++      nvme_end_req_zoned(req);
++      nvme_trace_bio_complete(req);
++      if (req->cmd_flags & REQ_NVME_MPATH)
++              nvme_mpath_end_request(req);
++}
++
+ static inline void nvme_end_req(struct request *req)
+ {
+       blk_status_t status = nvme_error_status(nvme_req(req)->status);
+@@ -424,10 +432,7 @@ static inline void nvme_end_req(struct request *req)
+               else
+                       nvme_log_error(req);
+       }
+-      nvme_end_req_zoned(req);
+-      nvme_trace_bio_complete(req);
+-      if (req->cmd_flags & REQ_NVME_MPATH)
+-              nvme_mpath_end_request(req);
++      __nvme_end_req(req);
+       blk_mq_end_request(req, status);
+ }
+@@ -476,7 +481,7 @@ void nvme_complete_batch_req(struct request *req)
+ {
+       trace_nvme_complete_rq(req);
+       nvme_cleanup_cmd(req);
+-      nvme_end_req_zoned(req);
++      __nvme_end_req(req);
+ }
+ EXPORT_SYMBOL_GPL(nvme_complete_batch_req);
+-- 
+2.43.0
+
diff --git a/queue-6.9/nvme-multipath-fix-io-accounting-on-failover.patch b/queue-6.9/nvme-multipath-fix-io-accounting-on-failover.patch
new file mode 100644 (file)
index 0000000..9b5db6a
--- /dev/null
@@ -0,0 +1,66 @@
+From 558f0d0f1e56d0c06dce1ae19a9c867df777ed08 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 21 May 2024 11:02:28 -0700
+Subject: nvme-multipath: fix io accounting on failover
+
+From: Keith Busch <kbusch@kernel.org>
+
+[ Upstream commit a2e4c5f5f68dbd206f132bc709b98dea64afc3b8 ]
+
+There are io stats accounting that needs to be handled, so don't call
+blk_mq_end_request() directly. Use the existing nvme_end_req() helper
+that already handles everything.
+
+Fixes: d4d957b53d91ee ("nvme-multipath: support io stats on the mpath device")
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
+Signed-off-by: Keith Busch <kbusch@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/host/core.c      | 2 +-
+ drivers/nvme/host/multipath.c | 3 ++-
+ drivers/nvme/host/nvme.h      | 1 +
+ 3 files changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
+index 5008964f3ebe8..d513fd27589df 100644
+--- a/drivers/nvme/host/core.c
++++ b/drivers/nvme/host/core.c
+@@ -422,7 +422,7 @@ static inline void __nvme_end_req(struct request *req)
+               nvme_mpath_end_request(req);
+ }
+-static inline void nvme_end_req(struct request *req)
++void nvme_end_req(struct request *req)
+ {
+       blk_status_t status = nvme_error_status(nvme_req(req)->status);
+diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
+index d16e976ae1a47..a4e46eb20be63 100644
+--- a/drivers/nvme/host/multipath.c
++++ b/drivers/nvme/host/multipath.c
+@@ -118,7 +118,8 @@ void nvme_failover_req(struct request *req)
+       blk_steal_bios(&ns->head->requeue_list, req);
+       spin_unlock_irqrestore(&ns->head->requeue_lock, flags);
+-      blk_mq_end_request(req, 0);
++      nvme_req(req)->status = 0;
++      nvme_end_req(req);
+       kblockd_schedule_work(&ns->head->requeue_work);
+ }
+diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
+index 05532c2811774..d7bcc6d51e84e 100644
+--- a/drivers/nvme/host/nvme.h
++++ b/drivers/nvme/host/nvme.h
+@@ -767,6 +767,7 @@ static inline bool nvme_state_terminal(struct nvme_ctrl *ctrl)
+       }
+ }
++void nvme_end_req(struct request *req);
+ void nvme_complete_rq(struct request *req);
+ void nvme_complete_batch_req(struct request *req);
+-- 
+2.43.0
+
diff --git a/queue-6.9/nvmet-fix-ns-enable-disable-possible-hang.patch b/queue-6.9/nvmet-fix-ns-enable-disable-possible-hang.patch
new file mode 100644 (file)
index 0000000..c066555
--- /dev/null
@@ -0,0 +1,59 @@
+From e8177c68e3f56c19670d170e9aacd1054402e315 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 21 May 2024 23:20:28 +0300
+Subject: nvmet: fix ns enable/disable possible hang
+
+From: Sagi Grimberg <sagi@grimberg.me>
+
+[ Upstream commit f97914e35fd98b2b18fb8a092e0a0799f73afdfe ]
+
+When disabling an nvmet namespace, there is a period where the
+subsys->lock is released, as the ns disable waits for backend IO to
+complete, and the ns percpu ref to be properly killed. The original
+intent was to avoid taking the subsystem lock for a prolong period as
+other processes may need to acquire it (for example new incoming
+connections).
+
+However, it opens up a window where another process may come in and
+enable the ns, (re)intiailizing the ns percpu_ref, causing the disable
+sequence to hang.
+
+Solve this by taking the global nvmet_config_sem over the entire configfs
+enable/disable sequence.
+
+Fixes: a07b4970f464 ("nvmet: add a generic NVMe target")
+Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
+Signed-off-by: Keith Busch <kbusch@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/target/configfs.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
+index 7fda69395c1ef..dfdff6aba6953 100644
+--- a/drivers/nvme/target/configfs.c
++++ b/drivers/nvme/target/configfs.c
+@@ -676,10 +676,18 @@ static ssize_t nvmet_ns_enable_store(struct config_item *item,
+       if (kstrtobool(page, &enable))
+               return -EINVAL;
++      /*
++       * take a global nvmet_config_sem because the disable routine has a
++       * window where it releases the subsys-lock, giving a chance to
++       * a parallel enable to concurrently execute causing the disable to
++       * have a misaccounting of the ns percpu_ref.
++       */
++      down_write(&nvmet_config_sem);
+       if (enable)
+               ret = nvmet_ns_enable(ns);
+       else
+               nvmet_ns_disable(ns);
++      up_write(&nvmet_config_sem);
+       return ret ? ret : count;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.9/octeontx2-pf-free-send-queue-buffers-incase-of-leaf-.patch b/queue-6.9/octeontx2-pf-free-send-queue-buffers-incase-of-leaf-.patch
new file mode 100644 (file)
index 0000000..bc6d6d9
--- /dev/null
@@ -0,0 +1,63 @@
+From 2833314e3636eaedf95eedb42839ae705652e6f2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 May 2024 13:06:26 +0530
+Subject: Octeontx2-pf: Free send queue buffers incase of leaf to inner
+
+From: Hariprasad Kelam <hkelam@marvell.com>
+
+[ Upstream commit 1684842147677a1279bcff95f8adb6de9a656e30 ]
+
+There are two type of classes. "Leaf classes" that are  the
+bottom of the class hierarchy. "Inner classes" that are neither
+the root class nor leaf classes. QoS rules can only specify leaf
+classes as targets for traffic.
+
+                        Root
+                       /  \
+                      /    \
+                      1      2
+                             /\
+                            /  \
+                           4    5
+               classes 1,4 and 5 are leaf classes.
+               class 2 is a inner class.
+
+When a leaf class made as inner, or vice versa, resources associated
+with send queue (send queue buffers and transmit schedulers) are not
+getting freed.
+
+Fixes: 5e6808b4c68d ("octeontx2-pf: Add support for HTB offload")
+Signed-off-by: Hariprasad Kelam <hkelam@marvell.com>
+Link: https://lore.kernel.org/r/20240523073626.4114-1-hkelam@marvell.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/marvell/octeontx2/nic/qos.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c
+index 1723e9912ae07..6cddb4da85b71 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c
+@@ -1407,7 +1407,10 @@ static int otx2_qos_leaf_to_inner(struct otx2_nic *pfvf, u16 classid,
+       otx2_qos_read_txschq_cfg(pfvf, node, old_cfg);
+       /* delete the txschq nodes allocated for this node */
++      otx2_qos_disable_sq(pfvf, qid);
++      otx2_qos_free_hw_node_schq(pfvf, node);
+       otx2_qos_free_sw_node_schq(pfvf, node);
++      pfvf->qos.qid_to_sqmap[qid] = OTX2_QOS_INVALID_SQ;
+       /* mark this node as htb inner node */
+       WRITE_ONCE(node->qid, OTX2_QOS_QID_INNER);
+@@ -1554,6 +1557,7 @@ static int otx2_qos_leaf_del_last(struct otx2_nic *pfvf, u16 classid, bool force
+               dwrr_del_node = true;
+       /* destroy the leaf node */
++      otx2_qos_disable_sq(pfvf, qid);
+       otx2_qos_destroy_node(pfvf, node);
+       pfvf->qos.qid_to_sqmap[qid] = OTX2_QOS_INVALID_SQ;
+-- 
+2.43.0
+
diff --git a/queue-6.9/powerpc-pseries-lparcfg-drop-error-message-from-gues.patch b/queue-6.9/powerpc-pseries-lparcfg-drop-error-message-from-gues.patch
new file mode 100644 (file)
index 0000000..bc7b53c
--- /dev/null
@@ -0,0 +1,41 @@
+From 6988182d567877a6897bc91b393d5ce7f1d0899e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 24 May 2024 14:29:54 -0500
+Subject: powerpc/pseries/lparcfg: drop error message from guest name lookup
+
+From: Nathan Lynch <nathanl@linux.ibm.com>
+
+[ Upstream commit 12870ae3818e39ea65bf710f645972277b634f72 ]
+
+It's not an error or exceptional situation when the hosting
+environment does not expose a name for the LP/guest via RTAS or the
+device tree. This happens with qemu when run without the '-name'
+option. The message also lacks a newline. Remove it.
+
+Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
+Fixes: eddaa9a40275 ("powerpc/pseries: read the lpar name from the firmware")
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://msgid.link/20240524-lparcfg-updates-v2-1-62e2e9d28724@linux.ibm.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/platforms/pseries/lparcfg.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
+index fb0189af2dea1..0ed56e56271fe 100644
+--- a/arch/powerpc/platforms/pseries/lparcfg.c
++++ b/arch/powerpc/platforms/pseries/lparcfg.c
+@@ -361,8 +361,8 @@ static int read_dt_lpar_name(struct seq_file *m)
+ static void read_lpar_name(struct seq_file *m)
+ {
+-      if (read_rtas_lpar_name(m) && read_dt_lpar_name(m))
+-              pr_err_once("Error can't get the LPAR name");
++      if (read_rtas_lpar_name(m))
++              read_dt_lpar_name(m);
+ }
+ #define SPLPAR_MAXLENGTH 1026*(sizeof(char))
+-- 
+2.43.0
+
diff --git a/queue-6.9/powerpc-uaccess-use-yz-asm-constraint-for-ld.patch b/queue-6.9/powerpc-uaccess-use-yz-asm-constraint-for-ld.patch
new file mode 100644 (file)
index 0000000..5f7b296
--- /dev/null
@@ -0,0 +1,64 @@
+From d97895b6c6abbeef07703c35fe3a17a323bd1464 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 29 May 2024 22:30:29 +1000
+Subject: powerpc/uaccess: Use YZ asm constraint for ld
+
+From: Michael Ellerman <mpe@ellerman.id.au>
+
+[ Upstream commit 50934945d54238d2d6d8db4b7c1d4c90d2696c57 ]
+
+The 'ld' instruction requires a 4-byte aligned displacement because it
+is a DS-form instruction. But the "m" asm constraint doesn't enforce
+that.
+
+Add a special case of __get_user_asm2_goto() so that the "YZ" constraint
+can be used for "ld".
+
+The "Z" constraint is documented in the GCC manual PowerPC machine
+constraints, and specifies a "memory operand accessed with indexed or
+indirect addressing". "Y" is not documented in the manual but specifies
+a "memory operand for a DS-form instruction". Using both allows the
+compiler to generate a DS-form "ld" or X-form "ldx" as appropriate.
+
+The change has to be conditional on CONFIG_PPC_KERNEL_PREFIXED because
+the "Y" constraint does not guarantee 4-byte alignment when prefixed
+instructions are enabled.
+
+No build errors have been reported due to this, but the possibility is
+there depending on compiler code generation decisions.
+
+Fixes: c20beffeec3c ("powerpc/uaccess: Use flexible addressing with __put_user()/__get_user()")
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://msgid.link/20240529123029.146953-2-mpe@ellerman.id.au
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/include/asm/uaccess.h | 11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
+index de10437fd2065..ac4f6e4ae5810 100644
+--- a/arch/powerpc/include/asm/uaccess.h
++++ b/arch/powerpc/include/asm/uaccess.h
+@@ -165,8 +165,19 @@ do {                                                              \
+ #endif
+ #ifdef __powerpc64__
++#ifdef CONFIG_PPC_KERNEL_PREFIXED
+ #define __get_user_asm2_goto(x, addr, label)                  \
+       __get_user_asm_goto(x, addr, label, "ld")
++#else
++#define __get_user_asm2_goto(x, addr, label)                  \
++      asm_goto_output(                                        \
++              "1:     ld%U1%X1 %0, %1 # get_user\n"           \
++              EX_TABLE(1b, %l2)                               \
++              : "=r" (x)                                      \
++              : DS_FORM_CONSTRAINT (*addr)                    \
++              :                                               \
++              : label)
++#endif // CONFIG_PPC_KERNEL_PREFIXED
+ #else /* __powerpc64__ */
+ #define __get_user_asm2_goto(x, addr, label)                  \
+       asm_goto_output(                                        \
+-- 
+2.43.0
+
diff --git a/queue-6.9/riscv-prevent-pt_regs-corruption-for-secondary-idle-.patch b/queue-6.9/riscv-prevent-pt_regs-corruption-for-secondary-idle-.patch
new file mode 100644 (file)
index 0000000..55c79d6
--- /dev/null
@@ -0,0 +1,63 @@
+From d0f949d571562a449f986643135af662dad8b3a0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 May 2024 11:43:23 +0300
+Subject: riscv: prevent pt_regs corruption for secondary idle threads
+
+From: Sergey Matyukevich <sergey.matyukevich@syntacore.com>
+
+[ Upstream commit a638b0461b58aa3205cd9d5f14d6f703d795b4af ]
+
+Top of the kernel thread stack should be reserved for pt_regs. However
+this is not the case for the idle threads of the secondary boot harts.
+Their stacks overlap with their pt_regs, so both may get corrupted.
+
+Similar issue has been fixed for the primary hart, see c7cdd96eca28
+("riscv: prevent stack corruption by reserving task_pt_regs(p) early").
+However that fix was not propagated to the secondary harts. The problem
+has been noticed in some CPU hotplug tests with V enabled. The function
+smp_callin stored several registers on stack, corrupting top of pt_regs
+structure including status field. As a result, kernel attempted to save
+or restore inexistent V context.
+
+Fixes: 9a2451f18663 ("RISC-V: Avoid using per cpu array for ordered booting")
+Fixes: 2875fe056156 ("RISC-V: Add cpu_ops and modify default booting method")
+Signed-off-by: Sergey Matyukevich <sergey.matyukevich@syntacore.com>
+Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
+Link: https://lore.kernel.org/r/20240523084327.2013211-1-geomatsi@gmail.com
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kernel/cpu_ops_sbi.c      | 2 +-
+ arch/riscv/kernel/cpu_ops_spinwait.c | 3 +--
+ 2 files changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/arch/riscv/kernel/cpu_ops_sbi.c b/arch/riscv/kernel/cpu_ops_sbi.c
+index 1cc7df740eddc..e6fbaaf549562 100644
+--- a/arch/riscv/kernel/cpu_ops_sbi.c
++++ b/arch/riscv/kernel/cpu_ops_sbi.c
+@@ -72,7 +72,7 @@ static int sbi_cpu_start(unsigned int cpuid, struct task_struct *tidle)
+       /* Make sure tidle is updated */
+       smp_mb();
+       bdata->task_ptr = tidle;
+-      bdata->stack_ptr = task_stack_page(tidle) + THREAD_SIZE;
++      bdata->stack_ptr = task_pt_regs(tidle);
+       /* Make sure boot data is updated */
+       smp_mb();
+       hsm_data = __pa(bdata);
+diff --git a/arch/riscv/kernel/cpu_ops_spinwait.c b/arch/riscv/kernel/cpu_ops_spinwait.c
+index 613872b0a21ac..24869eb889085 100644
+--- a/arch/riscv/kernel/cpu_ops_spinwait.c
++++ b/arch/riscv/kernel/cpu_ops_spinwait.c
+@@ -34,8 +34,7 @@ static void cpu_update_secondary_bootdata(unsigned int cpuid,
+       /* Make sure tidle is updated */
+       smp_mb();
+-      WRITE_ONCE(__cpu_spinwait_stack_pointer[hartid],
+-                 task_stack_page(tidle) + THREAD_SIZE);
++      WRITE_ONCE(__cpu_spinwait_stack_pointer[hartid], task_pt_regs(tidle));
+       WRITE_ONCE(__cpu_spinwait_task_pointer[hartid], tidle);
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.9/sd-also-set-max_user_sectors-when-setting-max_sector.patch b/queue-6.9/sd-also-set-max_user_sectors-when-setting-max_sector.patch
new file mode 100644 (file)
index 0000000..4db7938
--- /dev/null
@@ -0,0 +1,47 @@
+From 7a49f90b77513ee926ee9762ebe7f01ef8acc1d9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 May 2024 20:26:13 +0200
+Subject: sd: also set max_user_sectors when setting max_sectors
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit bafea1c58b24be594d97841ced1b7ae0347bf6e3 ]
+
+sd can set a max_sectors value that is lower than the max_hw_sectors
+limit based on the block limits VPD page.   While this is rather unusual,
+it used to work until the max_user_sectors field was split out to cleanly
+deal with conflicting hardware and user limits when the hardware limit
+changes.  Also set max_user_sectors to ensure the limit can properly be
+stacked.
+
+Fixes: 4f563a64732d ("block: add a max_user_discard_sectors queue limit")
+Reported-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Mike Snitzer <snitzer@kernel.org>
+Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
+Link: https://lore.kernel.org/r/20240523182618.602003-2-hch@lst.de
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/sd.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
+index 65cdc8b77e358..caac482fff2ff 100644
+--- a/drivers/scsi/sd.c
++++ b/drivers/scsi/sd.c
+@@ -3707,8 +3707,10 @@ static int sd_revalidate_disk(struct gendisk *disk)
+        */
+       if (sdkp->first_scan ||
+           q->limits.max_sectors > q->limits.max_dev_sectors ||
+-          q->limits.max_sectors > q->limits.max_hw_sectors)
++          q->limits.max_sectors > q->limits.max_hw_sectors) {
+               q->limits.max_sectors = rw_max;
++              q->limits.max_user_sectors = rw_max;
++      }
+       sdkp->first_scan = 0;
+-- 
+2.43.0
+
diff --git a/queue-6.9/selftests-mptcp-add-ms-units-for-tc-netem-delay.patch b/queue-6.9/selftests-mptcp-add-ms-units-for-tc-netem-delay.patch
new file mode 100644 (file)
index 0000000..a83981f
--- /dev/null
@@ -0,0 +1,66 @@
+From ea8cda72de0494e0a0e280edda69ad20060d6f10 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 5 Apr 2024 12:52:06 +0200
+Subject: selftests: mptcp: add ms units for tc-netem delay
+
+From: Geliang Tang <tanggeliang@kylinos.cn>
+
+[ Upstream commit 9109853a388b7b2b934f56f4ddb250d72e486555 ]
+
+'delay 1' in tc-netem is confusing, not sure if it's a delay of 1 second or
+1 millisecond. This patch explicitly adds millisecond units to make these
+commands clearer.
+
+Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
+Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: 38af56e6668b ("selftests: mptcp: join: mark 'fail' tests as flaky")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/net/mptcp/mptcp_join.sh   | 6 +++---
+ tools/testing/selftests/net/mptcp/simult_flows.sh | 4 ++--
+ 2 files changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
+index 908ef799b13a0..8d16f37cd67f8 100755
+--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
++++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
+@@ -125,8 +125,8 @@ init_shapers()
+ {
+       local i
+       for i in $(seq 1 4); do
+-              tc -n $ns1 qdisc add dev ns1eth$i root netem rate 20mbit delay 1
+-              tc -n $ns2 qdisc add dev ns2eth$i root netem rate 20mbit delay 1
++              tc -n $ns1 qdisc add dev ns1eth$i root netem rate 20mbit delay 1ms
++              tc -n $ns2 qdisc add dev ns2eth$i root netem rate 20mbit delay 1ms
+       done
+ }
+@@ -3218,7 +3218,7 @@ fail_tests()
+       # multiple subflows
+       if reset_with_fail "MP_FAIL MP_RST" 2; then
+-              tc -n $ns2 qdisc add dev ns2eth1 root netem rate 1mbit delay 5
++              tc -n $ns2 qdisc add dev ns2eth1 root netem rate 1mbit delay 5ms
+               pm_nl_set_limits $ns1 0 1
+               pm_nl_set_limits $ns2 0 1
+               pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow
+diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
+index dfdb7031c187a..7322e1e4e5db6 100755
+--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
++++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
+@@ -216,8 +216,8 @@ run_test()
+       shift 4
+       local msg=$*
+-      [ $delay1 -gt 0 ] && delay1="delay $delay1" || delay1=""
+-      [ $delay2 -gt 0 ] && delay2="delay $delay2" || delay2=""
++      [ $delay1 -gt 0 ] && delay1="delay ${delay1}ms" || delay1=""
++      [ $delay2 -gt 0 ] && delay2="delay ${delay2}ms" || delay2=""
+       for dev in ns1eth1 ns1eth2; do
+               tc -n $ns1 qdisc del dev $dev root >/dev/null 2>&1
+-- 
+2.43.0
+
diff --git a/queue-6.9/selftests-mptcp-join-mark-fail-tests-as-flaky.patch b/queue-6.9/selftests-mptcp-join-mark-fail-tests-as-flaky.patch
new file mode 100644 (file)
index 0000000..cd54c6d
--- /dev/null
@@ -0,0 +1,51 @@
+From a0c8fcd249700f5c74dbcdfea6adca812add0ffb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 24 May 2024 18:30:59 +0200
+Subject: selftests: mptcp: join: mark 'fail' tests as flaky
+
+From: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+
+[ Upstream commit 38af56e6668b455f7dd0a8e2d9afe74100068e17 ]
+
+These tests are rarely unstable. It depends on the CI running the tests,
+especially if it is also busy doing other tasks in parallel, and if a
+debug kernel config is being used.
+
+It looks like this issue is sometimes present with the NetDev CI. While
+this is being investigated, the tests are marked as flaky not to create
+noises on such CIs.
+
+Fixes: b6e074e171bc ("selftests: mptcp: add infinite map testcase")
+Link: https://github.com/multipath-tcp/mptcp_net-next/issues/491
+Reviewed-by: Mat Martineau <martineau@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://lore.kernel.org/r/20240524-upstream-net-20240524-selftests-mptcp-flaky-v1-4-a352362f3f8e@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/net/mptcp/mptcp_join.sh | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
+index 8d16f37cd67f8..1b5722e6166e5 100755
+--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
++++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
+@@ -3210,6 +3210,7 @@ fail_tests()
+ {
+       # single subflow
+       if reset_with_fail "Infinite map" 1; then
++              MPTCP_LIB_SUBTEST_FLAKY=1
+               test_linkfail=128 \
+                       run_tests $ns1 $ns2 10.0.1.1
+               chk_join_nr 0 0 0 +1 +0 1 0 1 "$(pedit_action_pkts)"
+@@ -3218,6 +3219,7 @@ fail_tests()
+       # multiple subflows
+       if reset_with_fail "MP_FAIL MP_RST" 2; then
++              MPTCP_LIB_SUBTEST_FLAKY=1
+               tc -n $ns2 qdisc add dev ns2eth1 root netem rate 1mbit delay 5ms
+               pm_nl_set_limits $ns1 0 1
+               pm_nl_set_limits $ns2 0 1
+-- 
+2.43.0
+
diff --git a/queue-6.9/selftests-mptcp-join-mark-fastclose-tests-as-flaky.patch b/queue-6.9/selftests-mptcp-join-mark-fastclose-tests-as-flaky.patch
new file mode 100644 (file)
index 0000000..32b6077
--- /dev/null
@@ -0,0 +1,72 @@
+From 0452d7a4ae1aab096183c0c4383dbd1a5bb8a6e6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 24 May 2024 18:30:58 +0200
+Subject: selftests: mptcp: join: mark 'fastclose' tests as flaky
+
+From: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+
+[ Upstream commit 8c06ac2178a9dee887929232226e35a5cdda1793 ]
+
+These tests are flaky since their introduction. This might be less or
+not visible depending on the CI running the tests, especially if it is
+also busy doing other tasks in parallel, and if a debug kernel config is
+being used.
+
+It looks like this issue is often present with the NetDev CI. While this
+is being investigated, the tests are marked as flaky not to create
+noises on such CIs.
+
+Fixes: 01542c9bf9ab ("selftests: mptcp: add fastclose testcase")
+Link: https://github.com/multipath-tcp/mptcp_net-next/issues/324
+Reviewed-by: Mat Martineau <martineau@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://lore.kernel.org/r/20240524-upstream-net-20240524-selftests-mptcp-flaky-v1-3-a352362f3f8e@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/net/mptcp/mptcp_join.sh | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
+index e4403236f6554..908ef799b13a0 100755
+--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
++++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
+@@ -262,6 +262,8 @@ reset()
+       TEST_NAME="${1}"
++      MPTCP_LIB_SUBTEST_FLAKY=0 # reset if modified
++
+       if skip_test; then
+               MPTCP_LIB_TEST_COUNTER=$((MPTCP_LIB_TEST_COUNTER+1))
+               last_test_ignored=1
+@@ -449,7 +451,9 @@ reset_with_tcp_filter()
+ # $1: err msg
+ fail_test()
+ {
+-      ret=${KSFT_FAIL}
++      if ! mptcp_lib_subtest_is_flaky; then
++              ret=${KSFT_FAIL}
++      fi
+       if [ ${#} -gt 0 ]; then
+               print_fail "${@}"
+@@ -3178,6 +3182,7 @@ fullmesh_tests()
+ fastclose_tests()
+ {
+       if reset_check_counter "fastclose test" "MPTcpExtMPFastcloseTx"; then
++              MPTCP_LIB_SUBTEST_FLAKY=1
+               test_linkfail=1024 fastclose=client \
+                       run_tests $ns1 $ns2 10.0.1.1
+               chk_join_nr 0 0 0
+@@ -3186,6 +3191,7 @@ fastclose_tests()
+       fi
+       if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then
++              MPTCP_LIB_SUBTEST_FLAKY=1
+               test_linkfail=1024 fastclose=server \
+                       run_tests $ns1 $ns2 10.0.1.1
+               chk_join_nr 0 0 0 0 0 0 1
+-- 
+2.43.0
+
diff --git a/queue-6.9/selftests-mptcp-simult-flows-mark-unbalanced-tests-a.patch b/queue-6.9/selftests-mptcp-simult-flows-mark-unbalanced-tests-a.patch
new file mode 100644 (file)
index 0000000..b149ffc
--- /dev/null
@@ -0,0 +1,68 @@
+From 4f69529a85f657c84c6cee2098cd28b086ca9a6e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 24 May 2024 18:30:57 +0200
+Subject: selftests: mptcp: simult flows: mark 'unbalanced' tests as flaky
+
+From: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+
+[ Upstream commit cc73a6577ae64247898269d138dee6b73ff710cc ]
+
+These tests are flaky since their introduction. This might be less or
+not visible depending on the CI running the tests, especially if it is
+also busy doing other tasks in parallel.
+
+A first analysis shown that the transfer can be slowed down when there
+are some re-injections at the MPTCP level. Such re-injections can of
+course happen, and disturb the transfer, but it looks strange to have
+them in this lab. That could be caused by the kernel having access to
+less CPU cycles -- e.g. when other activities are executed in parallel
+-- or by a misinterpretation on the MPTCP packet scheduler side.
+
+While this is being investigated, the tests are marked as flaky not to
+create noises in other CIs.
+
+Fixes: 219d04992b68 ("mptcp: push pending frames when subflow has free space")
+Link: https://github.com/multipath-tcp/mptcp_net-next/issues/475
+Reviewed-by: Mat Martineau <martineau@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://lore.kernel.org/r/20240524-upstream-net-20240524-selftests-mptcp-flaky-v1-2-a352362f3f8e@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/net/mptcp/simult_flows.sh | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
+index 1b23662203881..dfdb7031c187a 100755
+--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
++++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
+@@ -243,7 +243,7 @@ run_test()
+       do_transfer $small $large $time
+       lret=$?
+       mptcp_lib_result_code "${lret}" "${msg}"
+-      if [ $lret -ne 0 ]; then
++      if [ $lret -ne 0 ] && ! mptcp_lib_subtest_is_flaky; then
+               ret=$lret
+               [ $bail -eq 0 ] || exit $ret
+       fi
+@@ -253,7 +253,7 @@ run_test()
+       do_transfer $large $small $time
+       lret=$?
+       mptcp_lib_result_code "${lret}" "${msg}"
+-      if [ $lret -ne 0 ]; then
++      if [ $lret -ne 0 ] && ! mptcp_lib_subtest_is_flaky; then
+               ret=$lret
+               [ $bail -eq 0 ] || exit $ret
+       fi
+@@ -286,7 +286,7 @@ run_test 10 10 0 0 "balanced bwidth"
+ run_test 10 10 1 25 "balanced bwidth with unbalanced delay"
+ # we still need some additional infrastructure to pass the following test-cases
+-run_test 10 3 0 0 "unbalanced bwidth"
++MPTCP_LIB_SUBTEST_FLAKY=1 run_test 10 3 0 0 "unbalanced bwidth"
+ run_test 10 3 1 25 "unbalanced bwidth with unbalanced delay"
+ run_test 10 3 25 1 "unbalanced bwidth with opposed, unbalanced delay"
+-- 
+2.43.0
+
index f91396973c27b8a93cc6b4ba0a60d63ee87d7295..0d40b67f49899c734920fc19c7cd446fc8cfd4fe 100644 (file)
@@ -280,3 +280,86 @@ cifs-fix-missing-set-of-remote_i_size.patch
 tracing-probes-fix-error-check-in-parse_btf_field.patch
 tpm_tis_spi-account-for-spi-header-when-allocating-t.patch
 tcp-reduce-accepted-window-in-new_syn_recv-state.patch
+netfilter-nfnetlink_queue-acquire-rcu_read_lock-in-i.patch
+netfilter-ipset-add-list-flush-to-cancel_gc.patch
+netfilter-nft_payload-restore-vlan-q-in-q-match-supp.patch
+spi-don-t-mark-message-dma-mapped-when-no-transfer-i.patch
+spi-stm32-revert-change-that-enabled-controller-befo.patch
+dma-mapping-benchmark-fix-up-kthread-related-error-h.patch
+dma-mapping-benchmark-fix-node-id-validation.patch
+dma-mapping-benchmark-handle-numa_no_node-correctly.patch
+nvme-fix-multipath-batched-completion-accounting.patch
+nvme-multipath-fix-io-accounting-on-failover.patch
+nvmet-fix-ns-enable-disable-possible-hang.patch
+drm-amd-display-enable-colorspace-property-for-mst-c.patch
+net-phy-micrel-set-soft_reset-callback-to-genphy_sof.patch
+net-mlx5-lag-do-bond-only-if-slaves-agree-on-roce-st.patch
+net-mlx5-do-not-query-mpir-on-embedded-cpu-function.patch
+net-mlx5-fix-mtmp-register-capability-offset-in-mcam.patch
+net-mlx5-use-mlx5_ipsec_rx_status_destroy-to-correct.patch
+net-mlx5e-fix-ipsec-tunnel-mode-offload-feature-chec.patch
+net-mlx5e-use-rx_missed_errors-instead-of-rx_dropped.patch
+net-mlx5e-fix-udp-gso-for-encapsulated-packets.patch
+dma-buf-sw-sync-don-t-enable-irq-from-sync_print_obj.patch
+bpf-fix-potential-integer-overflow-in-resolve_btfids.patch
+netkit-fix-setting-mac-address-in-l2-mode.patch
+netkit-fix-pkt_type-override-upon-netkit-pass-verdic.patch
+alsa-core-remove-debugfs-at-disconnection.patch
+alsa-hda-realtek-adjust-g814jzr-to-use-spi-init-for-.patch
+enic-validate-length-of-nl-attributes-in-enic_set_vf.patch
+af_unix-annotate-data-race-around-unix_sk-sk-addr.patch
+af_unix-read-sk-sk_hash-under-bindlock-during-bind.patch
+octeontx2-pf-free-send-queue-buffers-incase-of-leaf-.patch
+net-usb-smsc95xx-fix-changing-led_sel-bit-value-upda.patch
+asoc-cs42l43-only-restrict-44.1khz-for-the-asp.patch
+bpf-allow-delete-from-sockmap-sockhash-only-if-updat.patch
+null_blk-fix-return-value-of-nullb_device_power_stor.patch
+ipv4-fix-address-dump-when-ipv4-is-disabled-on-an-in.patch
+tcp-reduce-accepted-window-in-new_syn_recv-state.patch-1230
+net-fec-add-fec_enet_deinit.patch
+net-micrel-fix-lan8841_config_intr-after-getting-out.patch
+idpf-don-t-enable-napi-and-interrupts-prior-to-alloc.patch
+ice-fix-accounting-if-a-vlan-already-exists.patch
+selftests-mptcp-simult-flows-mark-unbalanced-tests-a.patch
+selftests-mptcp-join-mark-fastclose-tests-as-flaky.patch
+selftests-mptcp-add-ms-units-for-tc-netem-delay.patch
+selftests-mptcp-join-mark-fail-tests-as-flaky.patch
+drm-xe-add-dbg-messages-on-the-suspend-resume-functi.patch
+drm-xe-check-pcode-init-status-only-on-root-gt-of-ro.patch
+drm-xe-change-pcode-timeout-to-50msec-while-polling-.patch
+drm-xe-only-use-reserved-bcs-instances-for-usm-migra.patch
+alsa-seq-fix-missing-bank-setup-between-midi1-midi2-.patch
+alsa-seq-don-t-clear-bank-selection-at-event-ump-mid.patch
+sd-also-set-max_user_sectors-when-setting-max_sector.patch
+block-stack-max_user_sectors.patch
+net-ti-icssg-prueth-fix-start-counter-for-ft1-filter.patch
+netfilter-nft_payload-skbuff-vlan-metadata-mangle-su.patch
+netfilter-tproxy-bail-out-if-ip-has-been-disabled-on.patch
+netfilter-nft_fib-allow-from-forward-input-without-i.patch
+net-sched-taprio-make-q-picos_per_byte-available-to-.patch
+net-sched-taprio-extend-minimum-interval-restriction.patch
+kconfig-fix-comparison-to-constant-symbols-m-n.patch
+drm-i915-guc-avoid-field_prep-warning.patch
+drm-i915-gt-fix-ccs-id-s-calculation-for-ccs-mode-se.patch
+kheaders-use-command-v-to-test-for-existence-of-cpio.patch
+spi-stm32-don-t-warn-about-spurious-interrupts.patch
+drm-amdgpu-adjust-logic-in-amdgpu_device_partner_ban.patch
+ipv6-introduce-dst_rt6_info-helper.patch
+inet-introduce-dst_rtable-helper.patch
+net-fix-__dst_negative_advice-race.patch
+ipv4-correctly-iterate-over-the-target-netns-in-inet.patch
+net-dsa-microchip-fix-rgmii-error-in-ksz-dsa-driver.patch
+e1000e-move-force-smbus-near-the-end-of-enable_ulp-f.patch
+ice-fix-200g-phy-types-to-link-speed-mapping.patch
+net-ena-fix-redundant-device-numa-node-override.patch
+ipvlan-dont-use-skb-sk-in-ipvlan_process_v-4-6-_outb.patch
+alsa-seq-fix-yet-another-spot-for-system-message-con.patch
+powerpc-pseries-lparcfg-drop-error-message-from-gues.patch
+powerpc-uaccess-use-yz-asm-constraint-for-ld.patch
+drm-panel-sitronix-st7789v-fix-timing-for-jt240mhqs_.patch
+drm-panel-sitronix-st7789v-tweak-timing-for-jt240mhq.patch
+drm-panel-sitronix-st7789v-fix-display-size-for-jt24.patch
+hwmon-intel-m10-bmc-hwmon-fix-multiplier-for-n6000-b.patch
+hwmon-shtc1-fix-property-misspelling.patch
+riscv-prevent-pt_regs-corruption-for-secondary-idle-.patch
+alsa-seq-ump-fix-swapped-song-position-pointer-data.patch
diff --git a/queue-6.9/spi-don-t-mark-message-dma-mapped-when-no-transfer-i.patch b/queue-6.9/spi-don-t-mark-message-dma-mapped-when-no-transfer-i.patch
new file mode 100644 (file)
index 0000000..a703787
--- /dev/null
@@ -0,0 +1,48 @@
+From 004fc4b0eab78a92024c79e152bf1423ea96c3e4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 22 May 2024 20:09:49 +0300
+Subject: spi: Don't mark message DMA mapped when no transfer in it is
+
+From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+
+[ Upstream commit 9f788ba457b45b0ce422943fcec9fa35c4587764 ]
+
+There is no need to set the DMA mapped flag of the message if it has
+no mapped transfers. Moreover, it may give the code a chance to take
+the wrong paths, i.e. to exercise DMA related APIs on unmapped data.
+Make __spi_map_msg() to bail earlier on the above mentioned cases.
+
+Fixes: 99adef310f68 ("spi: Provide core support for DMA mapping transfers")
+Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Link: https://msgid.link/r/20240522171018.3362521-2-andriy.shevchenko@linux.intel.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/spi/spi.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
+index a2c467d9e92f5..2cea7aeb10f95 100644
+--- a/drivers/spi/spi.c
++++ b/drivers/spi/spi.c
+@@ -1242,6 +1242,7 @@ static int __spi_map_msg(struct spi_controller *ctlr, struct spi_message *msg)
+       else
+               rx_dev = ctlr->dev.parent;
++      ret = -ENOMSG;
+       list_for_each_entry(xfer, &msg->transfers, transfer_list) {
+               /* The sync is done before each transfer. */
+               unsigned long attrs = DMA_ATTR_SKIP_CPU_SYNC;
+@@ -1271,6 +1272,9 @@ static int __spi_map_msg(struct spi_controller *ctlr, struct spi_message *msg)
+                       }
+               }
+       }
++      /* No transfer has been mapped, bail out with success */
++      if (ret)
++              return 0;
+       ctlr->cur_rx_dma_dev = rx_dev;
+       ctlr->cur_tx_dma_dev = tx_dev;
+-- 
+2.43.0
+
diff --git a/queue-6.9/spi-stm32-don-t-warn-about-spurious-interrupts.patch b/queue-6.9/spi-stm32-don-t-warn-about-spurious-interrupts.patch
new file mode 100644 (file)
index 0000000..7cfb967
--- /dev/null
@@ -0,0 +1,43 @@
+From 54f4a07a973a0c2121a99823bc7cd0cb07b7b31e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 21 May 2024 12:52:42 +0200
+Subject: spi: stm32: Don't warn about spurious interrupts
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+
+[ Upstream commit 95d7c452a26564ef0c427f2806761b857106d8c4 ]
+
+The dev_warn to notify about a spurious interrupt was introduced with
+the reasoning that these are unexpected. However spurious interrupts
+tend to trigger continously and the error message on the serial console
+prevents that the core's detection of spurious interrupts kicks in
+(which disables the irq) and just floods the console.
+
+Fixes: c64e7efe46b7 ("spi: stm32: make spurious and overrun interrupts visible")
+Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+Link: https://msgid.link/r/20240521105241.62400-2-u.kleine-koenig@pengutronix.de
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/spi/spi-stm32.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c
+index e4e7ddb7524a9..4c4ff074e3f6f 100644
+--- a/drivers/spi/spi-stm32.c
++++ b/drivers/spi/spi-stm32.c
+@@ -1057,7 +1057,7 @@ static irqreturn_t stm32h7_spi_irq_thread(int irq, void *dev_id)
+               mask |= STM32H7_SPI_SR_TXP | STM32H7_SPI_SR_RXP;
+       if (!(sr & mask)) {
+-              dev_warn(spi->dev, "spurious IT (sr=0x%08x, ier=0x%08x)\n",
++              dev_vdbg(spi->dev, "spurious IT (sr=0x%08x, ier=0x%08x)\n",
+                        sr, ier);
+               spin_unlock_irqrestore(&spi->lock, flags);
+               return IRQ_NONE;
+-- 
+2.43.0
+
diff --git a/queue-6.9/spi-stm32-revert-change-that-enabled-controller-befo.patch b/queue-6.9/spi-stm32-revert-change-that-enabled-controller-befo.patch
new file mode 100644 (file)
index 0000000..73b497e
--- /dev/null
@@ -0,0 +1,108 @@
+From 27343412d1e6a6a47b03f4510e023796752f2ee7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 May 2024 12:33:25 +0200
+Subject: spi: stm32: Revert change that enabled controller before asserting CS
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+
+[ Upstream commit a827ad9b3c2fc243e058595533f91ce41a312527 ]
+
+On stm32mp157 enabling the controller before asserting CS makes the
+hardware trigger spurious interrupts in a tight loop and the transfers
+fail. Revert the commit that swapped the order of enable and CS. This
+reintroduces the problem that swapping was supposed to fix, which
+however is less grave.
+
+Reported-by: Leonard Göhrs <l.goehrs@pengutronix.de>
+Link: https://lore.kernel.org/all/39033ed7-3e57-4339-80b4-fc8919e26aa7@pengutronix.de/
+Fixes: 52b62e7a5d4f ("spi: stm32: enable controller before asserting CS")
+Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+Link: https://msgid.link/r/20240523103326.792907-2-u.kleine-koenig@pengutronix.de
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/spi/spi-stm32.c | 14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c
+index 4a68abcdcc353..e4e7ddb7524a9 100644
+--- a/drivers/spi/spi-stm32.c
++++ b/drivers/spi/spi-stm32.c
+@@ -1016,8 +1016,10 @@ static irqreturn_t stm32fx_spi_irq_event(int irq, void *dev_id)
+ static irqreturn_t stm32fx_spi_irq_thread(int irq, void *dev_id)
+ {
+       struct spi_controller *ctrl = dev_id;
++      struct stm32_spi *spi = spi_controller_get_devdata(ctrl);
+       spi_finalize_current_transfer(ctrl);
++      stm32fx_spi_disable(spi);
+       return IRQ_HANDLED;
+ }
+@@ -1185,8 +1187,6 @@ static int stm32_spi_prepare_msg(struct spi_controller *ctrl,
+                        ~clrb) | setb,
+                       spi->base + spi->cfg->regs->cpol.reg);
+-      stm32_spi_enable(spi);
+-
+       spin_unlock_irqrestore(&spi->lock, flags);
+       return 0;
+@@ -1204,6 +1204,7 @@ static void stm32fx_spi_dma_tx_cb(void *data)
+       if (spi->cur_comm == SPI_SIMPLEX_TX || spi->cur_comm == SPI_3WIRE_TX) {
+               spi_finalize_current_transfer(spi->ctrl);
++              stm32fx_spi_disable(spi);
+       }
+ }
+@@ -1218,6 +1219,7 @@ static void stm32_spi_dma_rx_cb(void *data)
+       struct stm32_spi *spi = data;
+       spi_finalize_current_transfer(spi->ctrl);
++      spi->cfg->disable(spi);
+ }
+ /**
+@@ -1305,6 +1307,8 @@ static int stm32fx_spi_transfer_one_irq(struct stm32_spi *spi)
+       stm32_spi_set_bits(spi, STM32FX_SPI_CR2, cr2);
++      stm32_spi_enable(spi);
++
+       /* starting data transfer when buffer is loaded */
+       if (spi->tx_buf)
+               spi->cfg->write_tx(spi);
+@@ -1341,6 +1345,8 @@ static int stm32h7_spi_transfer_one_irq(struct stm32_spi *spi)
+       spin_lock_irqsave(&spi->lock, flags);
++      stm32_spi_enable(spi);
++
+       /* Be sure to have data in fifo before starting data transfer */
+       if (spi->tx_buf)
+               stm32h7_spi_write_txfifo(spi);
+@@ -1372,6 +1378,8 @@ static void stm32fx_spi_transfer_one_dma_start(struct stm32_spi *spi)
+                */
+               stm32_spi_set_bits(spi, STM32FX_SPI_CR2, STM32FX_SPI_CR2_ERRIE);
+       }
++
++      stm32_spi_enable(spi);
+ }
+ /**
+@@ -1405,6 +1413,8 @@ static void stm32h7_spi_transfer_one_dma_start(struct stm32_spi *spi)
+       stm32_spi_set_bits(spi, STM32H7_SPI_IER, ier);
++      stm32_spi_enable(spi);
++
+       if (STM32_SPI_HOST_MODE(spi))
+               stm32_spi_set_bits(spi, STM32H7_SPI_CR1, STM32H7_SPI_CR1_CSTART);
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.9/tcp-reduce-accepted-window-in-new_syn_recv-state.patch-1230 b/queue-6.9/tcp-reduce-accepted-window-in-new_syn_recv-state.patch-1230
new file mode 100644 (file)
index 0000000..ddd1786
--- /dev/null
@@ -0,0 +1,121 @@
+From 920adf577d87672913e2ec3db89b029ce9e9b050 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 May 2024 13:05:27 +0000
+Subject: tcp: reduce accepted window in NEW_SYN_RECV state
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit f4dca95fc0f6350918f2e6727e35b41f7f86fcce ]
+
+Jason commit made checks against ACK sequence less strict
+and can be exploited by attackers to establish spoofed flows
+with less probes.
+
+Innocent users might use tcp_rmem[1] == 1,000,000,000,
+or something more reasonable.
+
+An attacker can use a regular TCP connection to learn the server
+initial tp->rcv_wnd, and use it to optimize the attack.
+
+If we make sure that only the announced window (smaller than 65535)
+is used for ACK validation, we force an attacker to use
+65537 packets to complete the 3WHS (assuming server ISN is unknown)
+
+Fixes: 378979e94e95 ("tcp: remove 64 KByte limit for initial tp->rcv_wnd value")
+Link: https://datatracker.ietf.org/meeting/119/materials/slides-119-tcpm-ghost-acks-00
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Reviewed-by: Jason Xing <kerneljasonxing@gmail.com>
+Link: https://lore.kernel.org/r/20240523130528.60376-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/request_sock.h | 12 ++++++++++++
+ net/ipv4/tcp_ipv4.c        |  7 +------
+ net/ipv4/tcp_minisocks.c   |  7 +++++--
+ net/ipv6/tcp_ipv6.c        |  7 +------
+ 4 files changed, 19 insertions(+), 14 deletions(-)
+
+diff --git a/include/net/request_sock.h b/include/net/request_sock.h
+index 004e651e6067e..aea7743af3d49 100644
+--- a/include/net/request_sock.h
++++ b/include/net/request_sock.h
+@@ -282,4 +282,16 @@ static inline int reqsk_queue_len_young(const struct request_sock_queue *queue)
+       return atomic_read(&queue->young);
+ }
++/* RFC 7323 2.3 Using the Window Scale Option
++ *  The window field (SEG.WND) of every outgoing segment, with the
++ *  exception of <SYN> segments, MUST be right-shifted by
++ *  Rcv.Wind.Shift bits.
++ *
++ * This means the SEG.WND carried in SYNACK can not exceed 65535.
++ * We use this property to harden TCP stack while in NEW_SYN_RECV state.
++ */
++static inline u32 tcp_synack_window(const struct request_sock *req)
++{
++      return min(req->rsk_rcv_wnd, 65535U);
++}
+ #endif /* _REQUEST_SOCK_H */
+diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
+index 92511b7fd5249..0c90369bac4fc 100644
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -1143,14 +1143,9 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
+ #endif
+       }
+-      /* RFC 7323 2.3
+-       * The window field (SEG.WND) of every outgoing segment, with the
+-       * exception of <SYN> segments, MUST be right-shifted by
+-       * Rcv.Wind.Shift bits:
+-       */
+       tcp_v4_send_ack(sk, skb, seq,
+                       tcp_rsk(req)->rcv_nxt,
+-                      req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
++                      tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale,
+                       tcp_rsk_tsval(tcp_rsk(req)),
+                       READ_ONCE(req->ts_recent),
+                       0, &key,
+diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
+index f0761f060a837..7abc80b9eccb8 100644
+--- a/net/ipv4/tcp_minisocks.c
++++ b/net/ipv4/tcp_minisocks.c
+@@ -783,8 +783,11 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
+       /* RFC793: "first check sequence number". */
+-      if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
+-                                        tcp_rsk(req)->rcv_nxt, tcp_rsk(req)->rcv_nxt + req->rsk_rcv_wnd)) {
++      if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq,
++                                        TCP_SKB_CB(skb)->end_seq,
++                                        tcp_rsk(req)->rcv_nxt,
++                                        tcp_rsk(req)->rcv_nxt +
++                                        tcp_synack_window(req))) {
+               /* Out of window: send ACK and drop. */
+               if (!(flg & TCP_FLAG_RST) &&
+                   !tcp_oow_rate_limited(sock_net(sk), skb,
+diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
+index 3f4cba49e9ee6..b149f54120682 100644
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -1268,15 +1268,10 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
+       /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
+        * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
+        */
+-      /* RFC 7323 2.3
+-       * The window field (SEG.WND) of every outgoing segment, with the
+-       * exception of <SYN> segments, MUST be right-shifted by
+-       * Rcv.Wind.Shift bits:
+-       */
+       tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
+                       tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
+                       tcp_rsk(req)->rcv_nxt,
+-                      req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
++                      tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale,
+                       tcp_rsk_tsval(tcp_rsk(req)),
+                       READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
+                       &key, ipv6_get_dsfield(ipv6_hdr(skb)), 0,
+-- 
+2.43.0
+