From: Sasha Levin Date: Wed, 5 Jun 2024 12:09:29 +0000 (-0400) Subject: Fixes for 6.8 X-Git-Tag: v6.1.93~45 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=4377fd48f2efeca1e8f59c051b2fe78b55a889ec;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.8 Signed-off-by: Sasha Levin --- diff --git a/queue-6.8/af_unix-annotate-data-race-around-unix_sk-sk-addr.patch b/queue-6.8/af_unix-annotate-data-race-around-unix_sk-sk-addr.patch new file mode 100644 index 00000000000..3499bcaa144 --- /dev/null +++ b/queue-6.8/af_unix-annotate-data-race-around-unix_sk-sk-addr.patch @@ -0,0 +1,119 @@ +From 15fe393f9b9d38e2cd69c2e1a206a7ddbe6907de Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 23 May 2024 00:40:02 +0900 +Subject: af_unix: Annotate data-race around unix_sk(sk)->addr. + +From: Kuniyuki Iwashima + +[ Upstream commit 97e1db06c7bb948da10ba85acad8030b56886593 ] + +Once unix_sk(sk)->addr is assigned under net->unx.table.locks and +unix_sk(sk)->bindlock, *(unix_sk(sk)->addr) and unix_sk(sk)->path are +fully set up, and unix_sk(sk)->addr is never changed. + +unix_getname() and unix_copy_addr() access the two fields locklessly, +and commit ae3b564179bf ("missing barriers in some of unix_sock ->addr +and ->path accesses") added smp_store_release() and smp_load_acquire() +pairs. + +In other functions, we still read unix_sk(sk)->addr locklessly to check +if the socket is bound, and KCSAN complains about it. [0] + +Given these functions have no dependency for *(unix_sk(sk)->addr) and +unix_sk(sk)->path, READ_ONCE() is enough to annotate the data-race. + +Note that it is safe to access unix_sk(sk)->addr locklessly if the socket +is found in the hash table. For example, the lockless read of otheru->addr +in unix_stream_connect() is safe. + +Note also that newu->addr there is of the child socket that is still not +accessible from userspace, and smp_store_release() publishes the address +in case the socket is accept()ed and unix_getname() / unix_copy_addr() +is called. + +[0]: +BUG: KCSAN: data-race in unix_bind / unix_listen + +write (marked) to 0xffff88805f8d1840 of 8 bytes by task 13723 on cpu 0: + __unix_set_addr_hash net/unix/af_unix.c:329 [inline] + unix_bind_bsd net/unix/af_unix.c:1241 [inline] + unix_bind+0x881/0x1000 net/unix/af_unix.c:1319 + __sys_bind+0x194/0x1e0 net/socket.c:1847 + __do_sys_bind net/socket.c:1858 [inline] + __se_sys_bind net/socket.c:1856 [inline] + __x64_sys_bind+0x40/0x50 net/socket.c:1856 + do_syscall_x64 arch/x86/entry/common.c:52 [inline] + do_syscall_64+0x4f/0x110 arch/x86/entry/common.c:83 + entry_SYSCALL_64_after_hwframe+0x46/0x4e + +read to 0xffff88805f8d1840 of 8 bytes by task 13724 on cpu 1: + unix_listen+0x72/0x180 net/unix/af_unix.c:734 + __sys_listen+0xdc/0x160 net/socket.c:1881 + __do_sys_listen net/socket.c:1890 [inline] + __se_sys_listen net/socket.c:1888 [inline] + __x64_sys_listen+0x2e/0x40 net/socket.c:1888 + do_syscall_x64 arch/x86/entry/common.c:52 [inline] + do_syscall_64+0x4f/0x110 arch/x86/entry/common.c:83 + entry_SYSCALL_64_after_hwframe+0x46/0x4e + +value changed: 0x0000000000000000 -> 0xffff88807b5b1b40 + +Reported by Kernel Concurrency Sanitizer on: +CPU: 1 PID: 13724 Comm: syz-executor.4 Not tainted 6.8.0-12822-gcd51db110a7e #12 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Reported-by: syzkaller +Signed-off-by: Kuniyuki Iwashima +Link: https://lore.kernel.org/r/20240522154002.77857-1-kuniyu@amazon.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/unix/af_unix.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c +index 6afa6d24818fb..085a1fc4934e4 100644 +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -733,7 +733,7 @@ static int unix_listen(struct socket *sock, int backlog) + if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) + goto out; /* Only stream/seqpacket sockets accept */ + err = -EINVAL; +- if (!u->addr) ++ if (!READ_ONCE(u->addr)) + goto out; /* No listens on an unbound socket */ + unix_state_lock(sk); + if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) +@@ -1371,7 +1371,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, + + if ((test_bit(SOCK_PASSCRED, &sock->flags) || + test_bit(SOCK_PASSPIDFD, &sock->flags)) && +- !unix_sk(sk)->addr) { ++ !READ_ONCE(unix_sk(sk)->addr)) { + err = unix_autobind(sk); + if (err) + goto out; +@@ -1483,7 +1483,8 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, + goto out; + + if ((test_bit(SOCK_PASSCRED, &sock->flags) || +- test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) { ++ test_bit(SOCK_PASSPIDFD, &sock->flags)) && ++ !READ_ONCE(u->addr)) { + err = unix_autobind(sk); + if (err) + goto out; +@@ -1937,7 +1938,8 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, + } + + if ((test_bit(SOCK_PASSCRED, &sock->flags) || +- test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) { ++ test_bit(SOCK_PASSPIDFD, &sock->flags)) && ++ !READ_ONCE(u->addr)) { + err = unix_autobind(sk); + if (err) + goto out; +-- +2.43.0 + diff --git a/queue-6.8/af_unix-read-sk-sk_hash-under-bindlock-during-bind.patch b/queue-6.8/af_unix-read-sk-sk_hash-under-bindlock-during-bind.patch new file mode 100644 index 00000000000..675bd9c504b --- /dev/null +++ b/queue-6.8/af_unix-read-sk-sk_hash-under-bindlock-during-bind.patch @@ -0,0 +1,132 @@ +From 240b3f424e06927aab75ce8ac20939a6f660c755 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 23 May 2024 00:42:18 +0900 +Subject: af_unix: Read sk->sk_hash under bindlock during bind(). + +From: Kuniyuki Iwashima + +[ Upstream commit 51d1b25a720982324871338b1a36b197ec9bd6f0 ] + +syzkaller reported data-race of sk->sk_hash in unix_autobind() [0], +and the same ones exist in unix_bind_bsd() and unix_bind_abstract(). + +The three bind() functions prefetch sk->sk_hash locklessly and +use it later after validating that unix_sk(sk)->addr is NULL under +unix_sk(sk)->bindlock. + +The prefetched sk->sk_hash is the hash value of unbound socket set +in unix_create1() and does not change until bind() completes. + +There could be a chance that sk->sk_hash changes after the lockless +read. However, in such a case, non-NULL unix_sk(sk)->addr is visible +under unix_sk(sk)->bindlock, and bind() returns -EINVAL without using +the prefetched value. + +The KCSAN splat is false-positive, but let's silence it by reading +sk->sk_hash under unix_sk(sk)->bindlock. + +[0]: +BUG: KCSAN: data-race in unix_autobind / unix_autobind + +write to 0xffff888034a9fb88 of 4 bytes by task 4468 on cpu 0: + __unix_set_addr_hash net/unix/af_unix.c:331 [inline] + unix_autobind+0x47a/0x7d0 net/unix/af_unix.c:1185 + unix_dgram_connect+0x7e3/0x890 net/unix/af_unix.c:1373 + __sys_connect_file+0xd7/0xe0 net/socket.c:2048 + __sys_connect+0x114/0x140 net/socket.c:2065 + __do_sys_connect net/socket.c:2075 [inline] + __se_sys_connect net/socket.c:2072 [inline] + __x64_sys_connect+0x40/0x50 net/socket.c:2072 + do_syscall_x64 arch/x86/entry/common.c:52 [inline] + do_syscall_64+0x4f/0x110 arch/x86/entry/common.c:83 + entry_SYSCALL_64_after_hwframe+0x46/0x4e + +read to 0xffff888034a9fb88 of 4 bytes by task 4465 on cpu 1: + unix_autobind+0x28/0x7d0 net/unix/af_unix.c:1134 + unix_dgram_connect+0x7e3/0x890 net/unix/af_unix.c:1373 + __sys_connect_file+0xd7/0xe0 net/socket.c:2048 + __sys_connect+0x114/0x140 net/socket.c:2065 + __do_sys_connect net/socket.c:2075 [inline] + __se_sys_connect net/socket.c:2072 [inline] + __x64_sys_connect+0x40/0x50 net/socket.c:2072 + do_syscall_x64 arch/x86/entry/common.c:52 [inline] + do_syscall_64+0x4f/0x110 arch/x86/entry/common.c:83 + entry_SYSCALL_64_after_hwframe+0x46/0x4e + +value changed: 0x000000e4 -> 0x000001e3 + +Reported by Kernel Concurrency Sanitizer on: +CPU: 1 PID: 4465 Comm: syz-executor.0 Not tainted 6.8.0-12822-gcd51db110a7e #12 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 + +Fixes: afd20b9290e1 ("af_unix: Replace the big lock with small locks.") +Reported-by: syzkaller +Signed-off-by: Kuniyuki Iwashima +Link: https://lore.kernel.org/r/20240522154218.78088-1-kuniyu@amazon.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/unix/af_unix.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c +index 085a1fc4934e4..51891b3db560d 100644 +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -1133,8 +1133,8 @@ static struct sock *unix_find_other(struct net *net, + + static int unix_autobind(struct sock *sk) + { +- unsigned int new_hash, old_hash = sk->sk_hash; + struct unix_sock *u = unix_sk(sk); ++ unsigned int new_hash, old_hash; + struct net *net = sock_net(sk); + struct unix_address *addr; + u32 lastnum, ordernum; +@@ -1157,6 +1157,7 @@ static int unix_autobind(struct sock *sk) + addr->name->sun_family = AF_UNIX; + refcount_set(&addr->refcnt, 1); + ++ old_hash = sk->sk_hash; + ordernum = get_random_u32(); + lastnum = ordernum & 0xFFFFF; + retry: +@@ -1197,8 +1198,8 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, + { + umode_t mode = S_IFSOCK | + (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask()); +- unsigned int new_hash, old_hash = sk->sk_hash; + struct unix_sock *u = unix_sk(sk); ++ unsigned int new_hash, old_hash; + struct net *net = sock_net(sk); + struct mnt_idmap *idmap; + struct unix_address *addr; +@@ -1236,6 +1237,7 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, + if (u->addr) + goto out_unlock; + ++ old_hash = sk->sk_hash; + new_hash = unix_bsd_hash(d_backing_inode(dentry)); + unix_table_double_lock(net, old_hash, new_hash); + u->path.mnt = mntget(parent.mnt); +@@ -1263,8 +1265,8 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, + static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr, + int addr_len) + { +- unsigned int new_hash, old_hash = sk->sk_hash; + struct unix_sock *u = unix_sk(sk); ++ unsigned int new_hash, old_hash; + struct net *net = sock_net(sk); + struct unix_address *addr; + int err; +@@ -1282,6 +1284,7 @@ static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr, + goto out_mutex; + } + ++ old_hash = sk->sk_hash; + new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); + unix_table_double_lock(net, old_hash, new_hash); + +-- +2.43.0 + diff --git a/queue-6.8/alsa-core-remove-debugfs-at-disconnection.patch b/queue-6.8/alsa-core-remove-debugfs-at-disconnection.patch new file mode 100644 index 00000000000..8858986f8a1 --- /dev/null +++ b/queue-6.8/alsa-core-remove-debugfs-at-disconnection.patch @@ -0,0 +1,134 @@ +From 9cc8fd8135fb684480c7b0bb723401f1e693922c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 24 May 2024 17:11:46 +0200 +Subject: ALSA: core: Remove debugfs at disconnection + +From: Takashi Iwai + +[ Upstream commit 495000a38634e640e2fd02f7e4f1512ccc92d770 ] + +The card-specific debugfs entries are removed at the last stage of +card free phase, and it's performed after synchronization of the +closes of all opened fds. This works fine for most cases, but it can +be potentially problematic for a hotplug device like USB-audio. Due +to the nature of snd_card_free_when_closed(), the card free isn't +called immediately after the driver removal for a hotplug device, but +it's left until the last fd is closed. It implies that the card +debugfs entries also remain. Meanwhile, when a new device is inserted +before the last close and the very same card slot is assigned, the +driver tries to create the card debugfs root again on the very same +path. This conflicts with the remaining entry, and results in the +kernel warning such as: + debugfs: Directory 'card0' with parent 'sound' already present! +with the missing debugfs entry afterwards. + +For avoiding such conflicts, remove debugfs entries at the device +disconnection phase instead. The jack kctl debugfs entries get +removed in snd_jack_dev_disconnect() instead of each kctl +private_free. + +Fixes: 2d670ea2bd53 ("ALSA: jack: implement software jack injection via debugfs") +Link: https://lore.kernel.org/r/20240524151256.32521-1-tiwai@suse.de +Signed-off-by: Takashi Iwai +Signed-off-by: Sasha Levin +--- + sound/core/init.c | 9 +++++---- + sound/core/jack.c | 21 ++++++++++++++------- + 2 files changed, 19 insertions(+), 11 deletions(-) + +diff --git a/sound/core/init.c b/sound/core/init.c +index d97b8af897ee4..b2b7e50ff4cc3 100644 +--- a/sound/core/init.c ++++ b/sound/core/init.c +@@ -546,6 +546,11 @@ void snd_card_disconnect(struct snd_card *card) + synchronize_irq(card->sync_irq); + + snd_info_card_disconnect(card); ++#ifdef CONFIG_SND_DEBUG ++ debugfs_remove(card->debugfs_root); ++ card->debugfs_root = NULL; ++#endif ++ + if (card->registered) { + device_del(&card->card_dev); + card->registered = false; +@@ -598,10 +603,6 @@ static int snd_card_do_free(struct snd_card *card) + dev_warn(card->dev, "unable to free card info\n"); + /* Not fatal error */ + } +-#ifdef CONFIG_SND_DEBUG +- debugfs_remove(card->debugfs_root); +- card->debugfs_root = NULL; +-#endif + if (card->release_completion) + complete(card->release_completion); + if (!card->managed) +diff --git a/sound/core/jack.c b/sound/core/jack.c +index e08b2c4fbd1a5..e4bcecdf89b7e 100644 +--- a/sound/core/jack.c ++++ b/sound/core/jack.c +@@ -37,11 +37,15 @@ static const int jack_switch_types[SND_JACK_SWITCH_TYPES] = { + }; + #endif /* CONFIG_SND_JACK_INPUT_DEV */ + ++static void snd_jack_remove_debugfs(struct snd_jack *jack); ++ + static int snd_jack_dev_disconnect(struct snd_device *device) + { +-#ifdef CONFIG_SND_JACK_INPUT_DEV + struct snd_jack *jack = device->device_data; + ++ snd_jack_remove_debugfs(jack); ++ ++#ifdef CONFIG_SND_JACK_INPUT_DEV + guard(mutex)(&jack->input_dev_lock); + if (!jack->input_dev) + return 0; +@@ -381,10 +385,14 @@ static int snd_jack_debugfs_add_inject_node(struct snd_jack *jack, + return 0; + } + +-static void snd_jack_debugfs_clear_inject_node(struct snd_jack_kctl *jack_kctl) ++static void snd_jack_remove_debugfs(struct snd_jack *jack) + { +- debugfs_remove(jack_kctl->jack_debugfs_root); +- jack_kctl->jack_debugfs_root = NULL; ++ struct snd_jack_kctl *jack_kctl; ++ ++ list_for_each_entry(jack_kctl, &jack->kctl_list, list) { ++ debugfs_remove(jack_kctl->jack_debugfs_root); ++ jack_kctl->jack_debugfs_root = NULL; ++ } + } + #else /* CONFIG_SND_JACK_INJECTION_DEBUG */ + static int snd_jack_debugfs_add_inject_node(struct snd_jack *jack, +@@ -393,7 +401,7 @@ static int snd_jack_debugfs_add_inject_node(struct snd_jack *jack, + return 0; + } + +-static void snd_jack_debugfs_clear_inject_node(struct snd_jack_kctl *jack_kctl) ++static void snd_jack_remove_debugfs(struct snd_jack *jack) + { + } + #endif /* CONFIG_SND_JACK_INJECTION_DEBUG */ +@@ -404,7 +412,6 @@ static void snd_jack_kctl_private_free(struct snd_kcontrol *kctl) + + jack_kctl = kctl->private_data; + if (jack_kctl) { +- snd_jack_debugfs_clear_inject_node(jack_kctl); + list_del(&jack_kctl->list); + kfree(jack_kctl); + } +@@ -497,8 +504,8 @@ int snd_jack_new(struct snd_card *card, const char *id, int type, + .dev_free = snd_jack_dev_free, + #ifdef CONFIG_SND_JACK_INPUT_DEV + .dev_register = snd_jack_dev_register, +- .dev_disconnect = snd_jack_dev_disconnect, + #endif /* CONFIG_SND_JACK_INPUT_DEV */ ++ .dev_disconnect = snd_jack_dev_disconnect, + }; + + if (initial_kctl) { +-- +2.43.0 + diff --git a/queue-6.8/alsa-hda-realtek-adjust-g814jzr-to-use-spi-init-for-.patch b/queue-6.8/alsa-hda-realtek-adjust-g814jzr-to-use-spi-init-for-.patch new file mode 100644 index 00000000000..490d5287e3f --- /dev/null +++ b/queue-6.8/alsa-hda-realtek-adjust-g814jzr-to-use-spi-init-for-.patch @@ -0,0 +1,37 @@ +From c669b215ac50b3e67e6631bd5f22b5455f5d2b8e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 26 May 2024 21:10:32 +1200 +Subject: ALSA: hda/realtek: Adjust G814JZR to use SPI init for amp + +From: Luke D. Jones + +[ Upstream commit 2be46155d792d629e8fe3188c2cde176833afe36 ] + +The 2024 ASUS ROG G814J model is much the same as the 2023 model +and the 2023 16" version. We can use the same Cirrus Amp quirk. + +Fixes: 811dd426a9b1 ("ALSA: hda/realtek: Add quirks for Asus ROG 2024 laptops using CS35L41") +Signed-off-by: Luke D. Jones +Link: https://lore.kernel.org/r/20240526091032.114545-1-luke@ljones.dev +Signed-off-by: Takashi Iwai +Signed-off-by: Sasha Levin +--- + sound/pci/hda/patch_realtek.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c +index c0e12e6746922..9a2cc048668ca 100644 +--- a/sound/pci/hda/patch_realtek.c ++++ b/sound/pci/hda/patch_realtek.c +@@ -10196,7 +10196,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { + SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2), + SND_PCI_QUIRK(0x1043, 0x3a20, "ASUS G614JZR", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x3a30, "ASUS G814JVR/JIR", ALC245_FIXUP_CS35L41_SPI_2), +- SND_PCI_QUIRK(0x1043, 0x3a40, "ASUS G814JZR", ALC245_FIXUP_CS35L41_SPI_2), ++ SND_PCI_QUIRK(0x1043, 0x3a40, "ASUS G814JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), + SND_PCI_QUIRK(0x1043, 0x3a50, "ASUS G834JYR/JZR", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x3a60, "ASUS G634JYR/JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), + SND_PCI_QUIRK(0x1043, 0x831a, "ASUS P901", ALC269_FIXUP_STEREO_DMIC), +-- +2.43.0 + diff --git a/queue-6.8/alsa-jack-use-guard-for-locking.patch b/queue-6.8/alsa-jack-use-guard-for-locking.patch new file mode 100644 index 00000000000..188214bc441 --- /dev/null +++ b/queue-6.8/alsa-jack-use-guard-for-locking.patch @@ -0,0 +1,91 @@ +From 3bc83b4c289f55adc4baea93404971ca99b7dbd6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 27 Feb 2024 09:52:52 +0100 +Subject: ALSA: jack: Use guard() for locking + +From: Takashi Iwai + +[ Upstream commit 7234795b59f7b0b14569ec46dce56300a4988067 ] + +We can simplify the code gracefully with new guard() macro and co for +automatic cleanup of locks. + +Only the code refactoring, and no functional changes. + +Signed-off-by: Takashi Iwai +Link: https://lore.kernel.org/r/20240227085306.9764-11-tiwai@suse.de +Stable-dep-of: 495000a38634 ("ALSA: core: Remove debugfs at disconnection") +Signed-off-by: Sasha Levin +--- + sound/core/jack.c | 25 +++++++------------------ + 1 file changed, 7 insertions(+), 18 deletions(-) + +diff --git a/sound/core/jack.c b/sound/core/jack.c +index e0f034e7275cd..e08b2c4fbd1a5 100644 +--- a/sound/core/jack.c ++++ b/sound/core/jack.c +@@ -42,11 +42,9 @@ static int snd_jack_dev_disconnect(struct snd_device *device) + #ifdef CONFIG_SND_JACK_INPUT_DEV + struct snd_jack *jack = device->device_data; + +- mutex_lock(&jack->input_dev_lock); +- if (!jack->input_dev) { +- mutex_unlock(&jack->input_dev_lock); ++ guard(mutex)(&jack->input_dev_lock); ++ if (!jack->input_dev) + return 0; +- } + + /* If the input device is registered with the input subsystem + * then we need to use a different deallocator. */ +@@ -55,7 +53,6 @@ static int snd_jack_dev_disconnect(struct snd_device *device) + else + input_free_device(jack->input_dev); + jack->input_dev = NULL; +- mutex_unlock(&jack->input_dev_lock); + #endif /* CONFIG_SND_JACK_INPUT_DEV */ + return 0; + } +@@ -92,11 +89,9 @@ static int snd_jack_dev_register(struct snd_device *device) + snprintf(jack->name, sizeof(jack->name), "%s %s", + card->shortname, jack->id); + +- mutex_lock(&jack->input_dev_lock); +- if (!jack->input_dev) { +- mutex_unlock(&jack->input_dev_lock); ++ guard(mutex)(&jack->input_dev_lock); ++ if (!jack->input_dev) + return 0; +- } + + jack->input_dev->name = jack->name; + +@@ -121,7 +116,6 @@ static int snd_jack_dev_register(struct snd_device *device) + if (err == 0) + jack->registered = 1; + +- mutex_unlock(&jack->input_dev_lock); + return err; + } + #endif /* CONFIG_SND_JACK_INPUT_DEV */ +@@ -586,14 +580,9 @@ EXPORT_SYMBOL(snd_jack_new); + void snd_jack_set_parent(struct snd_jack *jack, struct device *parent) + { + WARN_ON(jack->registered); +- mutex_lock(&jack->input_dev_lock); +- if (!jack->input_dev) { +- mutex_unlock(&jack->input_dev_lock); +- return; +- } +- +- jack->input_dev->dev.parent = parent; +- mutex_unlock(&jack->input_dev_lock); ++ guard(mutex)(&jack->input_dev_lock); ++ if (jack->input_dev) ++ jack->input_dev->dev.parent = parent; + } + EXPORT_SYMBOL(snd_jack_set_parent); + +-- +2.43.0 + diff --git a/queue-6.8/alsa-seq-don-t-clear-bank-selection-at-event-ump-mid.patch b/queue-6.8/alsa-seq-don-t-clear-bank-selection-at-event-ump-mid.patch new file mode 100644 index 00000000000..054772fbe34 --- /dev/null +++ b/queue-6.8/alsa-seq-don-t-clear-bank-selection-at-event-ump-mid.patch @@ -0,0 +1,40 @@ +From 350881d0dd7c9cd258a4d04bd863d1d89478d1c0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 27 May 2024 17:18:50 +0200 +Subject: ALSA: seq: Don't clear bank selection at event -> UMP MIDI2 + conversion + +From: Takashi Iwai + +[ Upstream commit a200df7deb3186cd7b55abb77ab96dfefb8a4f09 ] + +The current code to convert from a legacy sequencer event to UMP MIDI2 +clears the bank selection at each time the program change is +submitted. This is confusing and may lead to incorrect bank values +tranmitted to the destination in the end. + +Drop the line to clear the bank info and keep the provided values. + +Fixes: e9e02819a98a ("ALSA: seq: Automatic conversion of UMP events") +Link: https://lore.kernel.org/r/20240527151852.29036-2-tiwai@suse.de +Signed-off-by: Takashi Iwai +Signed-off-by: Sasha Levin +--- + sound/core/seq/seq_ump_convert.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/sound/core/seq/seq_ump_convert.c b/sound/core/seq/seq_ump_convert.c +index c21be87f5da9e..f5d22dd008426 100644 +--- a/sound/core/seq/seq_ump_convert.c ++++ b/sound/core/seq/seq_ump_convert.c +@@ -892,7 +892,6 @@ static int pgm_ev_to_ump_midi2(const struct snd_seq_event *event, + data->pg.bank_msb = cc->cc_bank_msb; + data->pg.bank_lsb = cc->cc_bank_lsb; + cc->bank_set = 0; +- cc->cc_bank_msb = cc->cc_bank_lsb = 0; + } + return 1; + } +-- +2.43.0 + diff --git a/queue-6.8/alsa-seq-fix-missing-bank-setup-between-midi1-midi2-.patch b/queue-6.8/alsa-seq-fix-missing-bank-setup-between-midi1-midi2-.patch new file mode 100644 index 00000000000..ddd4bd2c133 --- /dev/null +++ b/queue-6.8/alsa-seq-fix-missing-bank-setup-between-midi1-midi2-.patch @@ -0,0 +1,104 @@ +From 16765adcdc88f2e63273e1419f45e801971c5a07 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 27 May 2024 17:18:49 +0200 +Subject: ALSA: seq: Fix missing bank setup between MIDI1/MIDI2 UMP conversion + +From: Takashi Iwai + +[ Upstream commit 8a42886cae307663f3f999846926bd6e64392000 ] + +When a UMP packet is converted between MIDI1 and MIDI2 protocols, the +bank selection may be lost. The conversion from MIDI1 to MIDI2 needs +the encoding of the bank into UMP_MSG_STATUS_PROGRAM bits, while the +conversion from MIDI2 to MIDI1 needs the extraction from that +instead. + +This patch implements the missing bank selection mechanism in those +conversions. + +Fixes: e9e02819a98a ("ALSA: seq: Automatic conversion of UMP events") +Link: https://lore.kernel.org/r/20240527151852.29036-1-tiwai@suse.de +Signed-off-by: Takashi Iwai +Signed-off-by: Sasha Levin +--- + sound/core/seq/seq_ump_convert.c | 38 ++++++++++++++++++++++++++++++++ + 1 file changed, 38 insertions(+) + +diff --git a/sound/core/seq/seq_ump_convert.c b/sound/core/seq/seq_ump_convert.c +index ee6ac649df836..c21be87f5da9e 100644 +--- a/sound/core/seq/seq_ump_convert.c ++++ b/sound/core/seq/seq_ump_convert.c +@@ -368,6 +368,7 @@ static int cvt_ump_midi1_to_midi2(struct snd_seq_client *dest, + struct snd_seq_ump_event ev_cvt; + const union snd_ump_midi1_msg *midi1 = (const union snd_ump_midi1_msg *)event->ump; + union snd_ump_midi2_msg *midi2 = (union snd_ump_midi2_msg *)ev_cvt.ump; ++ struct snd_seq_ump_midi2_bank *cc; + + ev_cvt = *event; + memset(&ev_cvt.ump, 0, sizeof(ev_cvt.ump)); +@@ -387,11 +388,29 @@ static int cvt_ump_midi1_to_midi2(struct snd_seq_client *dest, + midi2->paf.data = upscale_7_to_32bit(midi1->paf.data); + break; + case UMP_MSG_STATUS_CC: ++ cc = &dest_port->midi2_bank[midi1->note.channel]; ++ switch (midi1->cc.index) { ++ case UMP_CC_BANK_SELECT: ++ cc->bank_set = 1; ++ cc->cc_bank_msb = midi1->cc.data; ++ return 0; // skip ++ case UMP_CC_BANK_SELECT_LSB: ++ cc->bank_set = 1; ++ cc->cc_bank_lsb = midi1->cc.data; ++ return 0; // skip ++ } + midi2->cc.index = midi1->cc.index; + midi2->cc.data = upscale_7_to_32bit(midi1->cc.data); + break; + case UMP_MSG_STATUS_PROGRAM: + midi2->pg.program = midi1->pg.program; ++ cc = &dest_port->midi2_bank[midi1->note.channel]; ++ if (cc->bank_set) { ++ midi2->pg.bank_valid = 1; ++ midi2->pg.bank_msb = cc->cc_bank_msb; ++ midi2->pg.bank_lsb = cc->cc_bank_lsb; ++ cc->bank_set = 0; ++ } + break; + case UMP_MSG_STATUS_CHANNEL_PRESSURE: + midi2->caf.data = upscale_7_to_32bit(midi1->caf.data); +@@ -419,6 +438,7 @@ static int cvt_ump_midi2_to_midi1(struct snd_seq_client *dest, + struct snd_seq_ump_event ev_cvt; + union snd_ump_midi1_msg *midi1 = (union snd_ump_midi1_msg *)ev_cvt.ump; + const union snd_ump_midi2_msg *midi2 = (const union snd_ump_midi2_msg *)event->ump; ++ int err; + u16 v; + + ev_cvt = *event; +@@ -443,6 +463,24 @@ static int cvt_ump_midi2_to_midi1(struct snd_seq_client *dest, + midi1->cc.data = downscale_32_to_7bit(midi2->cc.data); + break; + case UMP_MSG_STATUS_PROGRAM: ++ if (midi2->pg.bank_valid) { ++ midi1->cc.status = UMP_MSG_STATUS_CC; ++ midi1->cc.index = UMP_CC_BANK_SELECT; ++ midi1->cc.data = midi2->pg.bank_msb; ++ err = __snd_seq_deliver_single_event(dest, dest_port, ++ (struct snd_seq_event *)&ev_cvt, ++ atomic, hop); ++ if (err < 0) ++ return err; ++ midi1->cc.index = UMP_CC_BANK_SELECT_LSB; ++ midi1->cc.data = midi2->pg.bank_lsb; ++ err = __snd_seq_deliver_single_event(dest, dest_port, ++ (struct snd_seq_event *)&ev_cvt, ++ atomic, hop); ++ if (err < 0) ++ return err; ++ midi1->note.status = midi2->note.status; ++ } + midi1->pg.program = midi2->pg.program; + break; + case UMP_MSG_STATUS_CHANNEL_PRESSURE: +-- +2.43.0 + diff --git a/queue-6.8/alsa-seq-fix-yet-another-spot-for-system-message-con.patch b/queue-6.8/alsa-seq-fix-yet-another-spot-for-system-message-con.patch new file mode 100644 index 00000000000..de9c73d27dd --- /dev/null +++ b/queue-6.8/alsa-seq-fix-yet-another-spot-for-system-message-con.patch @@ -0,0 +1,37 @@ +From 4f5b91e48a720e347339b98e2e1b98ab49b81e34 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 30 May 2024 12:10:43 +0200 +Subject: ALSA: seq: Fix yet another spot for system message conversion + +From: Takashi Iwai + +[ Upstream commit 700fe6fd093d08c6da2bda8efe00479b0e617327 ] + +We fixed the incorrect UMP type for system messages in the recent +commit, but it missed one place in system_ev_to_ump_midi1(). +Fix it now. + +Fixes: e9e02819a98a ("ALSA: seq: Automatic conversion of UMP events") +Fixes: c2bb79613fed ("ALSA: seq: Fix incorrect UMP type for system messages") +Link: https://lore.kernel.org/r/20240530101044.17524-1-tiwai@suse.de +Signed-off-by: Takashi Iwai +Signed-off-by: Sasha Levin +--- + sound/core/seq/seq_ump_convert.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/sound/core/seq/seq_ump_convert.c b/sound/core/seq/seq_ump_convert.c +index f5d22dd008426..903a644b80e25 100644 +--- a/sound/core/seq/seq_ump_convert.c ++++ b/sound/core/seq/seq_ump_convert.c +@@ -729,6 +729,7 @@ static int system_ev_to_ump_midi1(const struct snd_seq_event *event, + union snd_ump_midi1_msg *data, + unsigned char status) + { ++ data->system.type = UMP_MSG_TYPE_SYSTEM; // override + data->system.status = status; + return 1; + } +-- +2.43.0 + diff --git a/queue-6.8/alsa-seq-ump-fix-swapped-song-position-pointer-data.patch b/queue-6.8/alsa-seq-ump-fix-swapped-song-position-pointer-data.patch new file mode 100644 index 00000000000..ef374d25853 --- /dev/null +++ b/queue-6.8/alsa-seq-ump-fix-swapped-song-position-pointer-data.patch @@ -0,0 +1,49 @@ +From 26726de0a606dad99e9e07b172b491b924b3d9f0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 31 May 2024 09:51:07 +0200 +Subject: ALSA: seq: ump: Fix swapped song position pointer data + +From: Takashi Iwai + +[ Upstream commit 310fa3ec2859f1c094e6e9b5d2e1ca51738c409a ] + +At converting between the legacy event and UMP, the parameters for +MIDI Song Position Pointer are incorrectly stored. It should have +been LSB -> MSB order while it stored in MSB -> LSB order. +This patch corrects the ordering. + +Fixes: e9e02819a98a ("ALSA: seq: Automatic conversion of UMP events") +Link: https://lore.kernel.org/r/20240531075110.3250-1-tiwai@suse.de +Signed-off-by: Takashi Iwai +Signed-off-by: Sasha Levin +--- + sound/core/seq/seq_ump_convert.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/sound/core/seq/seq_ump_convert.c b/sound/core/seq/seq_ump_convert.c +index 903a644b80e25..9bfba69b2a709 100644 +--- a/sound/core/seq/seq_ump_convert.c ++++ b/sound/core/seq/seq_ump_convert.c +@@ -157,7 +157,7 @@ static void ump_system_to_one_param_ev(const union snd_ump_midi1_msg *val, + static void ump_system_to_songpos_ev(const union snd_ump_midi1_msg *val, + struct snd_seq_event *ev) + { +- ev->data.control.value = (val->system.parm1 << 7) | val->system.parm2; ++ ev->data.control.value = (val->system.parm2 << 7) | val->system.parm1; + } + + /* Encoders for 0xf0 - 0xff */ +@@ -752,8 +752,8 @@ static int system_2p_ev_to_ump_midi1(const struct snd_seq_event *event, + unsigned char status) + { + data->system.status = status; +- data->system.parm1 = (event->data.control.value >> 7) & 0x7f; +- data->system.parm2 = event->data.control.value & 0x7f; ++ data->system.parm1 = event->data.control.value & 0x7f; ++ data->system.parm2 = (event->data.control.value >> 7) & 0x7f; + return 1; + } + +-- +2.43.0 + diff --git a/queue-6.8/asoc-cs42l43-only-restrict-44.1khz-for-the-asp.patch b/queue-6.8/asoc-cs42l43-only-restrict-44.1khz-for-the-asp.patch new file mode 100644 index 00000000000..9e228310a95 --- /dev/null +++ b/queue-6.8/asoc-cs42l43-only-restrict-44.1khz-for-the-asp.patch @@ -0,0 +1,42 @@ +From ff837e9a4db95c85ba7601bd82d54182084beded Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 27 May 2024 11:08:40 +0100 +Subject: ASoC: cs42l43: Only restrict 44.1kHz for the ASP + +From: Charles Keepax + +[ Upstream commit 797c525e85d1e44cf0e6f338890e8e0c661f524a ] + +The SoundWire interface can always support 44.1kHz using flow controlled +mode, and whether the ASP is in master mode should obviously only affect +the ASP. Update cs42l43_startup() to only restrict the rates for the ASP +DAI. + +Fixes: fc918cbe874e ("ASoC: cs42l43: Add support for the cs42l43") +Signed-off-by: Charles Keepax +Link: https://msgid.link/r/20240527100840.439832-1-ckeepax@opensource.cirrus.com +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + sound/soc/codecs/cs42l43.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/sound/soc/codecs/cs42l43.c b/sound/soc/codecs/cs42l43.c +index a24b52c9dda6b..193ef7543930a 100644 +--- a/sound/soc/codecs/cs42l43.c ++++ b/sound/soc/codecs/cs42l43.c +@@ -300,8 +300,9 @@ static int cs42l43_startup(struct snd_pcm_substream *substream, struct snd_soc_d + struct snd_soc_component *component = dai->component; + struct cs42l43_codec *priv = snd_soc_component_get_drvdata(component); + struct cs42l43 *cs42l43 = priv->core; +- int provider = !!regmap_test_bits(cs42l43->regmap, CS42L43_ASP_CLK_CONFIG2, +- CS42L43_ASP_MASTER_MODE_MASK); ++ int provider = !dai->id || !!regmap_test_bits(cs42l43->regmap, ++ CS42L43_ASP_CLK_CONFIG2, ++ CS42L43_ASP_MASTER_MODE_MASK); + + if (provider) + priv->constraint.mask = CS42L43_PROVIDER_RATE_MASK; +-- +2.43.0 + diff --git a/queue-6.8/bpf-allow-delete-from-sockmap-sockhash-only-if-updat.patch b/queue-6.8/bpf-allow-delete-from-sockmap-sockhash-only-if-updat.patch new file mode 100644 index 00000000000..891e1159022 --- /dev/null +++ b/queue-6.8/bpf-allow-delete-from-sockmap-sockhash-only-if-updat.patch @@ -0,0 +1,79 @@ +From 38afc7dbb3d6607c93930624defd68c0e3907989 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 27 May 2024 13:20:07 +0200 +Subject: bpf: Allow delete from sockmap/sockhash only if update is allowed + +From: Jakub Sitnicki + +[ Upstream commit 98e948fb60d41447fd8d2d0c3b8637fc6b6dc26d ] + +We have seen an influx of syzkaller reports where a BPF program attached to +a tracepoint triggers a locking rule violation by performing a map_delete +on a sockmap/sockhash. + +We don't intend to support this artificial use scenario. Extend the +existing verifier allowed-program-type check for updating sockmap/sockhash +to also cover deleting from a map. + +From now on only BPF programs which were previously allowed to update +sockmap/sockhash can delete from these map types. + +Fixes: ff9105993240 ("bpf, sockmap: Prevent lock inversion deadlock in map delete elem") +Reported-by: Tetsuo Handa +Reported-by: syzbot+ec941d6e24f633a59172@syzkaller.appspotmail.com +Signed-off-by: Jakub Sitnicki +Signed-off-by: Daniel Borkmann +Tested-by: syzbot+ec941d6e24f633a59172@syzkaller.appspotmail.com +Acked-by: John Fastabend +Closes: https://syzkaller.appspot.com/bug?extid=ec941d6e24f633a59172 +Link: https://lore.kernel.org/bpf/20240527-sockmap-verify-deletes-v1-1-944b372f2101@cloudflare.com +Signed-off-by: Sasha Levin +--- + kernel/bpf/verifier.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c +index 6edfcc3375082..f3b36f500a693 100644 +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -8756,7 +8756,8 @@ static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id) + enum bpf_attach_type eatype = env->prog->expected_attach_type; + enum bpf_prog_type type = resolve_prog_type(env->prog); + +- if (func_id != BPF_FUNC_map_update_elem) ++ if (func_id != BPF_FUNC_map_update_elem && ++ func_id != BPF_FUNC_map_delete_elem) + return false; + + /* It's not possible to get access to a locked struct sock in these +@@ -8767,6 +8768,11 @@ static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id) + if (eatype == BPF_TRACE_ITER) + return true; + break; ++ case BPF_PROG_TYPE_SOCK_OPS: ++ /* map_update allowed only via dedicated helpers with event type checks */ ++ if (func_id == BPF_FUNC_map_delete_elem) ++ return true; ++ break; + case BPF_PROG_TYPE_SOCKET_FILTER: + case BPF_PROG_TYPE_SCHED_CLS: + case BPF_PROG_TYPE_SCHED_ACT: +@@ -8862,7 +8868,6 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, + case BPF_MAP_TYPE_SOCKMAP: + if (func_id != BPF_FUNC_sk_redirect_map && + func_id != BPF_FUNC_sock_map_update && +- func_id != BPF_FUNC_map_delete_elem && + func_id != BPF_FUNC_msg_redirect_map && + func_id != BPF_FUNC_sk_select_reuseport && + func_id != BPF_FUNC_map_lookup_elem && +@@ -8872,7 +8877,6 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, + case BPF_MAP_TYPE_SOCKHASH: + if (func_id != BPF_FUNC_sk_redirect_hash && + func_id != BPF_FUNC_sock_hash_update && +- func_id != BPF_FUNC_map_delete_elem && + func_id != BPF_FUNC_msg_redirect_hash && + func_id != BPF_FUNC_sk_select_reuseport && + func_id != BPF_FUNC_map_lookup_elem && +-- +2.43.0 + diff --git a/queue-6.8/bpf-fix-potential-integer-overflow-in-resolve_btfids.patch b/queue-6.8/bpf-fix-potential-integer-overflow-in-resolve_btfids.patch new file mode 100644 index 00000000000..998e3dc4263 --- /dev/null +++ b/queue-6.8/bpf-fix-potential-integer-overflow-in-resolve_btfids.patch @@ -0,0 +1,41 @@ +From d3faaae38c2a16246344d8376a96ae251659ecf6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 14 May 2024 09:09:31 +0200 +Subject: bpf: Fix potential integer overflow in resolve_btfids + +From: Friedrich Vock + +[ Upstream commit 44382b3ed6b2787710c8ade06c0e97f5970a47c8 ] + +err is a 32-bit integer, but elf_update returns an off_t, which is 64-bit +at least on 64-bit platforms. If symbols_patch is called on a binary between +2-4GB in size, the result will be negative when cast to a 32-bit integer, +which the code assumes means an error occurred. This can wrongly trigger +build failures when building very large kernel images. + +Fixes: fbbb68de80a4 ("bpf: Add resolve_btfids tool to resolve BTF IDs in ELF object") +Signed-off-by: Friedrich Vock +Signed-off-by: Daniel Borkmann +Acked-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20240514070931.199694-1-friedrich.vock@gmx.de +Signed-off-by: Sasha Levin +--- + tools/bpf/resolve_btfids/main.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c +index d9520cb826b31..af393c7dee1f1 100644 +--- a/tools/bpf/resolve_btfids/main.c ++++ b/tools/bpf/resolve_btfids/main.c +@@ -728,7 +728,7 @@ static int sets_patch(struct object *obj) + + static int symbols_patch(struct object *obj) + { +- int err; ++ off_t err; + + if (__symbols_patch(obj, &obj->structs) || + __symbols_patch(obj, &obj->unions) || +-- +2.43.0 + diff --git a/queue-6.8/dma-buf-sw-sync-don-t-enable-irq-from-sync_print_obj.patch b/queue-6.8/dma-buf-sw-sync-don-t-enable-irq-from-sync_print_obj.patch new file mode 100644 index 00000000000..d0fcf731c90 --- /dev/null +++ b/queue-6.8/dma-buf-sw-sync-don-t-enable-irq-from-sync_print_obj.patch @@ -0,0 +1,55 @@ +From 1a75fd61311d3573d3556c1ae10e0afc4b034403 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 5 May 2024 23:08:31 +0900 +Subject: dma-buf/sw-sync: don't enable IRQ from sync_print_obj() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Tetsuo Handa + +[ Upstream commit b794918961516f667b0c745aebdfebbb8a98df39 ] + +Since commit a6aa8fca4d79 ("dma-buf/sw-sync: Reduce irqsave/irqrestore from +known context") by error replaced spin_unlock_irqrestore() with +spin_unlock_irq() for both sync_debugfs_show() and sync_print_obj() despite +sync_print_obj() is called from sync_debugfs_show(), lockdep complains +inconsistent lock state warning. + +Use plain spin_{lock,unlock}() for sync_print_obj(), for +sync_debugfs_show() is already using spin_{lock,unlock}_irq(). + +Reported-by: syzbot +Closes: https://syzkaller.appspot.com/bug?extid=a225ee3df7e7f9372dbe +Fixes: a6aa8fca4d79 ("dma-buf/sw-sync: Reduce irqsave/irqrestore from known context") +Signed-off-by: Tetsuo Handa +Reviewed-by: Christian König +Link: https://patchwork.freedesktop.org/patch/msgid/c2e46020-aaa6-4e06-bf73-f05823f913f0@I-love.SAKURA.ne.jp +Signed-off-by: Christian König +Signed-off-by: Sasha Levin +--- + drivers/dma-buf/sync_debug.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/dma-buf/sync_debug.c b/drivers/dma-buf/sync_debug.c +index 101394f16930f..237bce21d1e72 100644 +--- a/drivers/dma-buf/sync_debug.c ++++ b/drivers/dma-buf/sync_debug.c +@@ -110,12 +110,12 @@ static void sync_print_obj(struct seq_file *s, struct sync_timeline *obj) + + seq_printf(s, "%s: %d\n", obj->name, obj->value); + +- spin_lock_irq(&obj->lock); ++ spin_lock(&obj->lock); /* Caller already disabled IRQ. */ + list_for_each(pos, &obj->pt_list) { + struct sync_pt *pt = container_of(pos, struct sync_pt, link); + sync_print_fence(s, &pt->base, false); + } +- spin_unlock_irq(&obj->lock); ++ spin_unlock(&obj->lock); + } + + static void sync_print_sync_file(struct seq_file *s, +-- +2.43.0 + diff --git a/queue-6.8/dma-mapping-benchmark-fix-node-id-validation.patch b/queue-6.8/dma-mapping-benchmark-fix-node-id-validation.patch new file mode 100644 index 00000000000..781033b5a25 --- /dev/null +++ b/queue-6.8/dma-mapping-benchmark-fix-node-id-validation.patch @@ -0,0 +1,64 @@ +From 751a523adf2e95ff053229e62f5890fb7edb4bf0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 4 May 2024 14:47:03 +0300 +Subject: dma-mapping: benchmark: fix node id validation + +From: Fedor Pchelkin + +[ Upstream commit 1ff05e723f7ca30644b8ec3fb093f16312e408ad ] + +While validating node ids in map_benchmark_ioctl(), node_possible() may +be provided with invalid argument outside of [0,MAX_NUMNODES-1] range +leading to: + +BUG: KASAN: wild-memory-access in map_benchmark_ioctl (kernel/dma/map_benchmark.c:214) +Read of size 8 at addr 1fffffff8ccb6398 by task dma_map_benchma/971 +CPU: 7 PID: 971 Comm: dma_map_benchma Not tainted 6.9.0-rc6 #37 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) +Call Trace: + +dump_stack_lvl (lib/dump_stack.c:117) +kasan_report (mm/kasan/report.c:603) +kasan_check_range (mm/kasan/generic.c:189) +variable_test_bit (arch/x86/include/asm/bitops.h:227) [inline] +arch_test_bit (arch/x86/include/asm/bitops.h:239) [inline] +_test_bit at (include/asm-generic/bitops/instrumented-non-atomic.h:142) [inline] +node_state (include/linux/nodemask.h:423) [inline] +map_benchmark_ioctl (kernel/dma/map_benchmark.c:214) +full_proxy_unlocked_ioctl (fs/debugfs/file.c:333) +__x64_sys_ioctl (fs/ioctl.c:890) +do_syscall_64 (arch/x86/entry/common.c:83) +entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) + +Compare node ids with sane bounds first. NUMA_NO_NODE is considered a +special valid case meaning that benchmarking kthreads won't be bound to a +cpuset of a given node. + +Found by Linux Verification Center (linuxtesting.org). + +Fixes: 65789daa8087 ("dma-mapping: add benchmark support for streaming DMA APIs") +Signed-off-by: Fedor Pchelkin +Reviewed-by: Robin Murphy +Signed-off-by: Christoph Hellwig +Signed-off-by: Sasha Levin +--- + kernel/dma/map_benchmark.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c +index 2478957cf9f83..59fb3f849b351 100644 +--- a/kernel/dma/map_benchmark.c ++++ b/kernel/dma/map_benchmark.c +@@ -212,7 +212,8 @@ static long map_benchmark_ioctl(struct file *file, unsigned int cmd, + } + + if (map->bparam.node != NUMA_NO_NODE && +- !node_possible(map->bparam.node)) { ++ (map->bparam.node < 0 || map->bparam.node >= MAX_NUMNODES || ++ !node_possible(map->bparam.node))) { + pr_err("invalid numa node\n"); + return -EINVAL; + } +-- +2.43.0 + diff --git a/queue-6.8/dma-mapping-benchmark-fix-up-kthread-related-error-h.patch b/queue-6.8/dma-mapping-benchmark-fix-up-kthread-related-error-h.patch new file mode 100644 index 00000000000..ca38e1780b4 --- /dev/null +++ b/queue-6.8/dma-mapping-benchmark-fix-up-kthread-related-error-h.patch @@ -0,0 +1,78 @@ +From 43254f21bc6b9616746e41b9460cdc9707d80867 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 4 May 2024 14:47:01 +0300 +Subject: dma-mapping: benchmark: fix up kthread-related error handling + +From: Fedor Pchelkin + +[ Upstream commit bb9025f4432f8c158322cf2c04c2b492f23eb511 ] + +kthread creation failure is invalidly handled inside do_map_benchmark(). +The put_task_struct() calls on the error path are supposed to balance the +get_task_struct() calls which only happen after all the kthreads are +successfully created. Rollback using kthread_stop() for already created +kthreads in case of such failure. + +In normal situation call kthread_stop_put() to gracefully stop kthreads +and put their task refcounts. This should be done for all started +kthreads. + +Found by Linux Verification Center (linuxtesting.org). + +Fixes: 65789daa8087 ("dma-mapping: add benchmark support for streaming DMA APIs") +Suggested-by: Robin Murphy +Signed-off-by: Fedor Pchelkin +Reviewed-by: Robin Murphy +Signed-off-by: Christoph Hellwig +Signed-off-by: Sasha Levin +--- + kernel/dma/map_benchmark.c | 16 ++++++++++------ + 1 file changed, 10 insertions(+), 6 deletions(-) + +diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c +index 02205ab53b7e9..2478957cf9f83 100644 +--- a/kernel/dma/map_benchmark.c ++++ b/kernel/dma/map_benchmark.c +@@ -118,6 +118,8 @@ static int do_map_benchmark(struct map_benchmark_data *map) + if (IS_ERR(tsk[i])) { + pr_err("create dma_map thread failed\n"); + ret = PTR_ERR(tsk[i]); ++ while (--i >= 0) ++ kthread_stop(tsk[i]); + goto out; + } + +@@ -139,13 +141,17 @@ static int do_map_benchmark(struct map_benchmark_data *map) + + msleep_interruptible(map->bparam.seconds * 1000); + +- /* wait for the completion of benchmark threads */ ++ /* wait for the completion of all started benchmark threads */ + for (i = 0; i < threads; i++) { +- ret = kthread_stop(tsk[i]); +- if (ret) +- goto out; ++ int kthread_ret = kthread_stop_put(tsk[i]); ++ ++ if (kthread_ret) ++ ret = kthread_ret; + } + ++ if (ret) ++ goto out; ++ + loops = atomic64_read(&map->loops); + if (likely(loops > 0)) { + u64 map_variance, unmap_variance; +@@ -170,8 +176,6 @@ static int do_map_benchmark(struct map_benchmark_data *map) + } + + out: +- for (i = 0; i < threads; i++) +- put_task_struct(tsk[i]); + put_device(map->dev); + kfree(tsk); + return ret; +-- +2.43.0 + diff --git a/queue-6.8/dma-mapping-benchmark-handle-numa_no_node-correctly.patch b/queue-6.8/dma-mapping-benchmark-handle-numa_no_node-correctly.patch new file mode 100644 index 00000000000..5b4dd3ba3c6 --- /dev/null +++ b/queue-6.8/dma-mapping-benchmark-handle-numa_no_node-correctly.patch @@ -0,0 +1,70 @@ +From 57d7b930ea1d8e75a696370504d439715014d0b9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 4 May 2024 14:47:04 +0300 +Subject: dma-mapping: benchmark: handle NUMA_NO_NODE correctly + +From: Fedor Pchelkin + +[ Upstream commit e64746e74f717961250a155e14c156616fcd981f ] + +cpumask_of_node() can be called for NUMA_NO_NODE inside do_map_benchmark() +resulting in the following sanitizer report: + +UBSAN: array-index-out-of-bounds in ./arch/x86/include/asm/topology.h:72:28 +index -1 is out of range for type 'cpumask [64][1]' +CPU: 1 PID: 990 Comm: dma_map_benchma Not tainted 6.9.0-rc6 #29 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) +Call Trace: + +dump_stack_lvl (lib/dump_stack.c:117) +ubsan_epilogue (lib/ubsan.c:232) +__ubsan_handle_out_of_bounds (lib/ubsan.c:429) +cpumask_of_node (arch/x86/include/asm/topology.h:72) [inline] +do_map_benchmark (kernel/dma/map_benchmark.c:104) +map_benchmark_ioctl (kernel/dma/map_benchmark.c:246) +full_proxy_unlocked_ioctl (fs/debugfs/file.c:333) +__x64_sys_ioctl (fs/ioctl.c:890) +do_syscall_64 (arch/x86/entry/common.c:83) +entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) + +Use cpumask_of_node() in place when binding a kernel thread to a cpuset +of a particular node. + +Note that the provided node id is checked inside map_benchmark_ioctl(). +It's just a NUMA_NO_NODE case which is not handled properly later. + +Found by Linux Verification Center (linuxtesting.org). + +Fixes: 65789daa8087 ("dma-mapping: add benchmark support for streaming DMA APIs") +Signed-off-by: Fedor Pchelkin +Acked-by: Barry Song +Signed-off-by: Christoph Hellwig +Signed-off-by: Sasha Levin +--- + kernel/dma/map_benchmark.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c +index 59fb3f849b351..f7f3d14fa69a7 100644 +--- a/kernel/dma/map_benchmark.c ++++ b/kernel/dma/map_benchmark.c +@@ -101,7 +101,6 @@ static int do_map_benchmark(struct map_benchmark_data *map) + struct task_struct **tsk; + int threads = map->bparam.threads; + int node = map->bparam.node; +- const cpumask_t *cpu_mask = cpumask_of_node(node); + u64 loops; + int ret = 0; + int i; +@@ -124,7 +123,7 @@ static int do_map_benchmark(struct map_benchmark_data *map) + } + + if (node != NUMA_NO_NODE) +- kthread_bind_mask(tsk[i], cpu_mask); ++ kthread_bind_mask(tsk[i], cpumask_of_node(node)); + } + + /* clear the old value in the previous benchmark */ +-- +2.43.0 + diff --git a/queue-6.8/drm-amd-display-enable-colorspace-property-for-mst-c.patch b/queue-6.8/drm-amd-display-enable-colorspace-property-for-mst-c.patch new file mode 100644 index 00000000000..58208736327 --- /dev/null +++ b/queue-6.8/drm-amd-display-enable-colorspace-property-for-mst-c.patch @@ -0,0 +1,45 @@ +From 08299aae3ab9b587d8a4db8cc95f2550e96fc305 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 8 May 2024 16:45:35 -0500 +Subject: drm/amd/display: Enable colorspace property for MST connectors +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Mario Limonciello + +[ Upstream commit 8195979d2dd995d60c2663adf54c69c1bf4eadd1 ] + +MST colorspace property support was disabled due to a series of warnings +that came up when the device was plugged in since the properties weren't +made at device creation. Create the properties in advance instead. + +Suggested-by: Ville Syrjälä +Fixes: 69a959610229 ("drm/amd/display: Temporary Disable MST DP Colorspace Property"). +Reported-and-tested-by: Tyler Schneider +Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3353 +Reviewed-by: Harry Wentland +Signed-off-by: Mario Limonciello +Signed-off-by: Alex Deucher +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +index 941e96f100f4e..12b036d511d0c 100644 +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +@@ -613,6 +613,9 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr, + &connector->base, + dev->mode_config.tile_property, + 0); ++ connector->colorspace_property = master->base.colorspace_property; ++ if (connector->colorspace_property) ++ drm_connector_attach_colorspace_property(connector); + + drm_connector_set_path_property(connector, pathprop); + +-- +2.43.0 + diff --git a/queue-6.8/drm-amdgpu-adjust-logic-in-amdgpu_device_partner_ban.patch b/queue-6.8/drm-amdgpu-adjust-logic-in-amdgpu_device_partner_ban.patch new file mode 100644 index 00000000000..e7ed385203e --- /dev/null +++ b/queue-6.8/drm-amdgpu-adjust-logic-in-amdgpu_device_partner_ban.patch @@ -0,0 +1,57 @@ +From cf90abab51068037e57770b9c7dce1ee462cdbe1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 15 May 2024 11:25:49 -0400 +Subject: drm/amdgpu: Adjust logic in amdgpu_device_partner_bandwidth() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Alex Deucher + +[ Upstream commit ba46b3bda296c4f82b061ac40b90f49d2a00a380 ] + +Use current speed/width on devices which don't support +dynamic PCIe switching. + +Fixes: 466a7d115326 ("drm/amd: Use the first non-dGPU PCI device for BW limits") +Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3289 +Acked-by: Christian König +Signed-off-by: Alex Deucher +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 19 ++++++++++++------- + 1 file changed, 12 insertions(+), 7 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +index 14d878675586a..33f07e6cbc1d0 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +@@ -5792,13 +5792,18 @@ static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev, + *speed = PCI_SPEED_UNKNOWN; + *width = PCIE_LNK_WIDTH_UNKNOWN; + +- while ((parent = pci_upstream_bridge(parent))) { +- /* skip upstream/downstream switches internal to dGPU*/ +- if (parent->vendor == PCI_VENDOR_ID_ATI) +- continue; +- *speed = pcie_get_speed_cap(parent); +- *width = pcie_get_width_cap(parent); +- break; ++ if (amdgpu_device_pcie_dynamic_switching_supported(adev)) { ++ while ((parent = pci_upstream_bridge(parent))) { ++ /* skip upstream/downstream switches internal to dGPU*/ ++ if (parent->vendor == PCI_VENDOR_ID_ATI) ++ continue; ++ *speed = pcie_get_speed_cap(parent); ++ *width = pcie_get_width_cap(parent); ++ break; ++ } ++ } else { ++ /* use the current speeds rather than max if switching is not supported */ ++ pcie_bandwidth_available(adev->pdev, NULL, speed, width); + } + } + +-- +2.43.0 + diff --git a/queue-6.8/drm-i915-gt-fix-ccs-id-s-calculation-for-ccs-mode-se.patch b/queue-6.8/drm-i915-gt-fix-ccs-id-s-calculation-for-ccs-mode-se.patch new file mode 100644 index 00000000000..a2135c629aa --- /dev/null +++ b/queue-6.8/drm-i915-gt-fix-ccs-id-s-calculation-for-ccs-mode-se.patch @@ -0,0 +1,98 @@ +From 6c8edde802f7e98521f153e27a1448e67a98b8b0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 17 May 2024 11:06:16 +0200 +Subject: drm/i915/gt: Fix CCS id's calculation for CCS mode setting + +From: Andi Shyti + +[ Upstream commit ee01b6a386eaf9984b58a2476e8f531149679da9 ] + +The whole point of the previous fixes has been to change the CCS +hardware configuration to generate only one stream available to +the compute users. We did this by changing the info.engine_mask +that is set during device probe, reset during the detection of +the fused engines, and finally reset again when choosing the CCS +mode. + +We can't use the engine_mask variable anymore, as with the +current configuration, it imposes only one CCS no matter what the +hardware configuration is. + +Before changing the engine_mask for the third time, save it and +use it for calculating the CCS mode. + +After the previous changes, the user reported a performance drop +to around 1/4. We have tested that the compute operations, with +the current patch, have improved by the same factor. + +Fixes: 6db31251bb26 ("drm/i915/gt: Enable only one CCS for compute workload") +Signed-off-by: Andi Shyti +Cc: Chris Wilson +Cc: Gnattu OC +Cc: Joonas Lahtinen +Cc: Matt Roper +Tested-by: Jian Ye +Reviewed-by: Umesh Nerlige Ramappa +Tested-by: Gnattu OC +Link: https://patchwork.freedesktop.org/patch/msgid/20240517090616.242529-1-andi.shyti@linux.intel.com +(cherry picked from commit a09d2327a9ba8e3f5be238bc1b7ca2809255b464) +Signed-off-by: Jani Nikula +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/i915/gt/intel_engine_cs.c | 6 ++++++ + drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c | 2 +- + drivers/gpu/drm/i915/gt/intel_gt_types.h | 8 ++++++++ + 3 files changed, 15 insertions(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c +index 84be97f959faa..e379eabeaa662 100644 +--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c ++++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c +@@ -919,6 +919,12 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) + if (IS_DG2(gt->i915)) { + u8 first_ccs = __ffs(CCS_MASK(gt)); + ++ /* ++ * Store the number of active cslices before ++ * changing the CCS engine configuration ++ */ ++ gt->ccs.cslices = CCS_MASK(gt); ++ + /* Mask off all the CCS engine */ + info->engine_mask &= ~GENMASK(CCS3, CCS0); + /* Put back in the first CCS engine */ +diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c +index 99b71bb7da0a6..3c62a44e9106c 100644 +--- a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c ++++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c +@@ -19,7 +19,7 @@ unsigned int intel_gt_apply_ccs_mode(struct intel_gt *gt) + + /* Build the value for the fixed CCS load balancing */ + for (cslice = 0; cslice < I915_MAX_CCS; cslice++) { +- if (CCS_MASK(gt) & BIT(cslice)) ++ if (gt->ccs.cslices & BIT(cslice)) + /* + * If available, assign the cslice + * to the first available engine... +diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h +index def7dd0eb6f19..cfdd2ad5e9549 100644 +--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h ++++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h +@@ -207,6 +207,14 @@ struct intel_gt { + [MAX_ENGINE_INSTANCE + 1]; + enum intel_submission_method submission_method; + ++ struct { ++ /* ++ * Mask of the non fused CCS slices ++ * to be used for the load balancing ++ */ ++ intel_engine_mask_t cslices; ++ } ccs; ++ + /* + * Default address space (either GGTT or ppGTT depending on arch). + * +-- +2.43.0 + diff --git a/queue-6.8/drm-i915-guc-avoid-field_prep-warning.patch b/queue-6.8/drm-i915-guc-avoid-field_prep-warning.patch new file mode 100644 index 00000000000..0ffbc6b95ad --- /dev/null +++ b/queue-6.8/drm-i915-guc-avoid-field_prep-warning.patch @@ -0,0 +1,58 @@ +From 8a573027f373c2cbb70a647287e935c3feba7b73 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 30 Apr 2024 09:48:09 -0700 +Subject: drm/i915/guc: avoid FIELD_PREP warning + +From: Arnd Bergmann + +[ Upstream commit d4f36db62396b73bed383c0b6e48d36278cafa78 ] + +With gcc-7 and earlier, there are lots of warnings like + +In file included from :0:0: +In function '__guc_context_policy_add_priority.isra.66', + inlined from '__guc_context_set_prio.isra.67' at drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:3292:3, + inlined from 'guc_context_set_prio' at drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:3320:2: +include/linux/compiler_types.h:399:38: error: call to '__compiletime_assert_631' declared with attribute error: FIELD_PREP: mask is not constant + _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) + ^ +... +drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:2422:3: note: in expansion of macro 'FIELD_PREP' + FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ + ^~~~~~~~~~ + +Make sure that GUC_KLV_0_KEY is an unsigned value to avoid the warning. + +Fixes: 77b6f79df66e ("drm/i915/guc: Update to GuC version 69.0.3") +Signed-off-by: Arnd Bergmann +Reviewed-by: Michal Wajdeczko +Signed-off-by: Julia Filipchuk +Signed-off-by: John Harrison +Link: https://patchwork.freedesktop.org/patch/msgid/20240430164809.482131-1-julia.filipchuk@intel.com +(cherry picked from commit 364e039827ef628c650c21c1afe1c54d9c3296d9) +Signed-off-by: Jani Nikula +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h +index 58012edd4eb0e..4f4f53c42a9c5 100644 +--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h ++++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h +@@ -29,9 +29,9 @@ + */ + + #define GUC_KLV_LEN_MIN 1u +-#define GUC_KLV_0_KEY (0xffff << 16) +-#define GUC_KLV_0_LEN (0xffff << 0) +-#define GUC_KLV_n_VALUE (0xffffffff << 0) ++#define GUC_KLV_0_KEY (0xffffu << 16) ++#define GUC_KLV_0_LEN (0xffffu << 0) ++#define GUC_KLV_n_VALUE (0xffffffffu << 0) + + /** + * DOC: GuC Self Config KLVs +-- +2.43.0 + diff --git a/queue-6.8/drm-panel-sitronix-st7789v-fix-display-size-for-jt24.patch b/queue-6.8/drm-panel-sitronix-st7789v-fix-display-size-for-jt24.patch new file mode 100644 index 00000000000..4ff79d68f13 --- /dev/null +++ b/queue-6.8/drm-panel-sitronix-st7789v-fix-display-size-for-jt24.patch @@ -0,0 +1,41 @@ +From 1a11817ac504ef732a7e85f2119241a2a18f6a3d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 29 May 2024 16:42:47 +0200 +Subject: drm/panel: sitronix-st7789v: fix display size for jt240mhqs_hwt_ek_e3 + panel + +From: Gerald Loacker + +[ Upstream commit b62c150c3bae72ac1910dcc588f360159eb0744a ] + +This is a portrait mode display. Change the dimensions accordingly. + +Fixes: 0fbbe96bfa08 ("drm/panel: sitronix-st7789v: add jasonic jt240mhqs-hwt-ek-e3 support") +Signed-off-by: Gerald Loacker +Acked-by: Jessica Zhang +Link: https://lore.kernel.org/r/20240409-bugfix-jt240mhqs_hwt_ek_e3-timing-v2-3-e4821802443d@wolfvision.net +Signed-off-by: Neil Armstrong +Link: https://patchwork.freedesktop.org/patch/msgid/20240409-bugfix-jt240mhqs_hwt_ek_e3-timing-v2-3-e4821802443d@wolfvision.net +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/panel/panel-sitronix-st7789v.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/gpu/drm/panel/panel-sitronix-st7789v.c b/drivers/gpu/drm/panel/panel-sitronix-st7789v.c +index c7e3f1280404d..e8f385b9c6182 100644 +--- a/drivers/gpu/drm/panel/panel-sitronix-st7789v.c ++++ b/drivers/gpu/drm/panel/panel-sitronix-st7789v.c +@@ -289,8 +289,8 @@ static const struct drm_display_mode jt240mhqs_hwt_ek_e3_mode = { + .vsync_start = 280 + 48, + .vsync_end = 280 + 48 + 4, + .vtotal = 280 + 48 + 4 + 4, +- .width_mm = 43, +- .height_mm = 37, ++ .width_mm = 37, ++ .height_mm = 43, + .flags = DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC, + }; + +-- +2.43.0 + diff --git a/queue-6.8/drm-panel-sitronix-st7789v-fix-timing-for-jt240mhqs_.patch b/queue-6.8/drm-panel-sitronix-st7789v-fix-timing-for-jt240mhqs_.patch new file mode 100644 index 00000000000..2dbedbb6229 --- /dev/null +++ b/queue-6.8/drm-panel-sitronix-st7789v-fix-timing-for-jt240mhqs_.patch @@ -0,0 +1,44 @@ +From da12ce0ec6616d4f40130fe83eb2978c793ac054 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 29 May 2024 16:42:45 +0200 +Subject: drm/panel: sitronix-st7789v: fix timing for jt240mhqs_hwt_ek_e3 panel + +From: Gerald Loacker + +[ Upstream commit 0e5895ff7fab0fc05ec17daf9a568368828fa6ea ] + +Flickering was observed when using partial mode. Moving the vsync to the +same position as used by the default sitronix-st7789v timing resolves this +issue. + +Fixes: 0fbbe96bfa08 ("drm/panel: sitronix-st7789v: add jasonic jt240mhqs-hwt-ek-e3 support") +Acked-by: Jessica Zhang +Signed-off-by: Gerald Loacker +Link: https://lore.kernel.org/r/20240409-bugfix-jt240mhqs_hwt_ek_e3-timing-v2-1-e4821802443d@wolfvision.net +Signed-off-by: Neil Armstrong +Link: https://patchwork.freedesktop.org/patch/msgid/20240409-bugfix-jt240mhqs_hwt_ek_e3-timing-v2-1-e4821802443d@wolfvision.net +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/panel/panel-sitronix-st7789v.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/drivers/gpu/drm/panel/panel-sitronix-st7789v.c b/drivers/gpu/drm/panel/panel-sitronix-st7789v.c +index 88e80fe98112d..32e5c03480381 100644 +--- a/drivers/gpu/drm/panel/panel-sitronix-st7789v.c ++++ b/drivers/gpu/drm/panel/panel-sitronix-st7789v.c +@@ -286,9 +286,9 @@ static const struct drm_display_mode jt240mhqs_hwt_ek_e3_mode = { + .hsync_end = 240 + 28 + 10, + .htotal = 240 + 28 + 10 + 10, + .vdisplay = 280, +- .vsync_start = 280 + 8, +- .vsync_end = 280 + 8 + 4, +- .vtotal = 280 + 8 + 4 + 4, ++ .vsync_start = 280 + 48, ++ .vsync_end = 280 + 48 + 4, ++ .vtotal = 280 + 48 + 4 + 4, + .width_mm = 43, + .height_mm = 37, + .flags = DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC, +-- +2.43.0 + diff --git a/queue-6.8/drm-panel-sitronix-st7789v-tweak-timing-for-jt240mhq.patch b/queue-6.8/drm-panel-sitronix-st7789v-tweak-timing-for-jt240mhq.patch new file mode 100644 index 00000000000..166074c7c17 --- /dev/null +++ b/queue-6.8/drm-panel-sitronix-st7789v-tweak-timing-for-jt240mhq.patch @@ -0,0 +1,44 @@ +From 2d1dad3e69369ea5e4ce985fa1a60f1c374a2959 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 29 May 2024 16:42:46 +0200 +Subject: drm/panel: sitronix-st7789v: tweak timing for jt240mhqs_hwt_ek_e3 + panel + +From: Gerald Loacker + +[ Upstream commit 2ba50582634d0bfe3a333ab7575a7f0122a7cde8 ] + +Use the default timing parameters to get a refresh rate of about 60 Hz for +a clock of 6 MHz. + +Fixes: 0fbbe96bfa08 ("drm/panel: sitronix-st7789v: add jasonic jt240mhqs-hwt-ek-e3 support") +Signed-off-by: Gerald Loacker +Acked-by: Jessica Zhang +Link: https://lore.kernel.org/r/20240409-bugfix-jt240mhqs_hwt_ek_e3-timing-v2-2-e4821802443d@wolfvision.net +Signed-off-by: Neil Armstrong +Link: https://patchwork.freedesktop.org/patch/msgid/20240409-bugfix-jt240mhqs_hwt_ek_e3-timing-v2-2-e4821802443d@wolfvision.net +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/panel/panel-sitronix-st7789v.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/drivers/gpu/drm/panel/panel-sitronix-st7789v.c b/drivers/gpu/drm/panel/panel-sitronix-st7789v.c +index 32e5c03480381..c7e3f1280404d 100644 +--- a/drivers/gpu/drm/panel/panel-sitronix-st7789v.c ++++ b/drivers/gpu/drm/panel/panel-sitronix-st7789v.c +@@ -282,9 +282,9 @@ static const struct drm_display_mode et028013dma_mode = { + static const struct drm_display_mode jt240mhqs_hwt_ek_e3_mode = { + .clock = 6000, + .hdisplay = 240, +- .hsync_start = 240 + 28, +- .hsync_end = 240 + 28 + 10, +- .htotal = 240 + 28 + 10 + 10, ++ .hsync_start = 240 + 38, ++ .hsync_end = 240 + 38 + 10, ++ .htotal = 240 + 38 + 10 + 10, + .vdisplay = 280, + .vsync_start = 280 + 48, + .vsync_end = 280 + 48 + 4, +-- +2.43.0 + diff --git a/queue-6.8/drm-xe-add-dbg-messages-on-the-suspend-resume-functi.patch b/queue-6.8/drm-xe-add-dbg-messages-on-the-suspend-resume-functi.patch new file mode 100644 index 00000000000..11862ca1558 --- /dev/null +++ b/queue-6.8/drm-xe-add-dbg-messages-on-the-suspend-resume-functi.patch @@ -0,0 +1,108 @@ +From f024e7f975f54b5ea7fb420c73bdb2b431d1baa8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 18 Mar 2024 14:01:41 -0400 +Subject: drm/xe: Add dbg messages on the suspend resume functions. + +From: Rodrigo Vivi + +[ Upstream commit f7f24b7950af4b1548ad5075ddb13eeb333bb782 ] + +In case of the suspend/resume flow getting locked up we +can get reports with some useful hints on where it might +get locked and if that has failed. + +Reviewed-by: Matthew Auld +Link: https://patchwork.freedesktop.org/patch/msgid/20240318180141.267458-2-rodrigo.vivi@intel.com +Signed-off-by: Rodrigo Vivi +Stable-dep-of: 77b79df0268b ("drm/xe: Change pcode timeout to 50msec while polling again") +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/xe/xe_pm.c | 22 +++++++++++++++++----- + 1 file changed, 17 insertions(+), 5 deletions(-) + +diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c +index b429c2876a764..bcf748036dce9 100644 +--- a/drivers/gpu/drm/xe/xe_pm.c ++++ b/drivers/gpu/drm/xe/xe_pm.c +@@ -54,13 +54,15 @@ int xe_pm_suspend(struct xe_device *xe) + u8 id; + int err; + ++ drm_dbg(&xe->drm, "Suspending device\n"); ++ + for_each_gt(gt, xe, id) + xe_gt_suspend_prepare(gt); + + /* FIXME: Super racey... */ + err = xe_bo_evict_all(xe); + if (err) +- return err; ++ goto err; + + xe_display_pm_suspend(xe); + +@@ -68,7 +70,7 @@ int xe_pm_suspend(struct xe_device *xe) + err = xe_gt_suspend(gt); + if (err) { + xe_display_pm_resume(xe); +- return err; ++ goto err; + } + } + +@@ -76,7 +78,11 @@ int xe_pm_suspend(struct xe_device *xe) + + xe_display_pm_suspend_late(xe); + ++ drm_dbg(&xe->drm, "Device suspended\n"); + return 0; ++err: ++ drm_dbg(&xe->drm, "Device suspend failed %d\n", err); ++ return err; + } + + /** +@@ -92,13 +98,15 @@ int xe_pm_resume(struct xe_device *xe) + u8 id; + int err; + ++ drm_dbg(&xe->drm, "Resuming device\n"); ++ + for_each_tile(tile, xe, id) + xe_wa_apply_tile_workarounds(tile); + + for_each_gt(gt, xe, id) { + err = xe_pcode_init(gt); + if (err) +- return err; ++ goto err; + } + + xe_display_pm_resume_early(xe); +@@ -109,7 +117,7 @@ int xe_pm_resume(struct xe_device *xe) + */ + err = xe_bo_restore_kernel(xe); + if (err) +- return err; ++ goto err; + + xe_irq_resume(xe); + +@@ -120,9 +128,13 @@ int xe_pm_resume(struct xe_device *xe) + + err = xe_bo_restore_user(xe); + if (err) +- return err; ++ goto err; + ++ drm_dbg(&xe->drm, "Device resumed\n"); + return 0; ++err: ++ drm_dbg(&xe->drm, "Device resume failed %d\n", err); ++ return err; + } + + static bool xe_pm_pci_d3cold_capable(struct pci_dev *pdev) +-- +2.43.0 + diff --git a/queue-6.8/drm-xe-change-pcode-timeout-to-50msec-while-polling-.patch b/queue-6.8/drm-xe-change-pcode-timeout-to-50msec-while-polling-.patch new file mode 100644 index 00000000000..d0a304824c0 --- /dev/null +++ b/queue-6.8/drm-xe-change-pcode-timeout-to-50msec-while-polling-.patch @@ -0,0 +1,52 @@ +From a25768366eb6765373fa51c8b785efd98777b5d3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 8 May 2024 20:52:15 +0530 +Subject: drm/xe: Change pcode timeout to 50msec while polling again +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Himal Prasad Ghimiray + +[ Upstream commit 77b79df0268bee3ef38fd5e76e86a076ce02995d ] + +Polling is initially attempted with timeout_base_ms enabled for +preemption, and if it exceeds this timeframe, another attempt is made +without preemption, allowing an additional 50 ms before timing out. + +v2 +- Rebase + +v3 +- Move warnings to separate patch (Lucas) + +Cc: Lucas De Marchi +Cc: Rodrigo Vivi +Signed-off-by: Himal Prasad Ghimiray +Fixes: 7dc9b92dcfef ("drm/xe: Remove i915_utils dependency from xe_pcode.") +Reviewed-by: Lucas De Marchi +Link: https://patchwork.freedesktop.org/patch/msgid/20240508152216.3263109-2-himal.prasad.ghimiray@intel.com +Signed-off-by: Rodrigo Vivi +(cherry picked from commit c81858eb52266b3d6ba28ca4f62a198231a10cdc) +Signed-off-by: Thomas Hellström +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/xe/xe_pcode.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c +index c674c87c7f40b..81f4ae2ea08f3 100644 +--- a/drivers/gpu/drm/xe/xe_pcode.c ++++ b/drivers/gpu/drm/xe/xe_pcode.c +@@ -191,7 +191,7 @@ int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request, + drm_WARN_ON_ONCE(>_to_xe(gt)->drm, timeout_base_ms > 1); + preempt_disable(); + ret = pcode_try_request(gt, mbox, request, reply_mask, reply, &status, +- true, timeout_base_ms * 1000, true); ++ true, 50 * 1000, true); + preempt_enable(); + + out: +-- +2.43.0 + diff --git a/queue-6.8/drm-xe-check-pcode-init-status-only-on-root-gt-of-ro.patch b/queue-6.8/drm-xe-check-pcode-init-status-only-on-root-gt-of-ro.patch new file mode 100644 index 00000000000..be3e7eaf702 --- /dev/null +++ b/queue-6.8/drm-xe-check-pcode-init-status-only-on-root-gt-of-ro.patch @@ -0,0 +1,348 @@ +From 53f036fe10e0b9c5948145284a8db62522f8259d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 10 Apr 2024 14:20:04 +0530 +Subject: drm/xe: check pcode init status only on root gt of root tile + +From: Riana Tauro + +[ Upstream commit 933fd5ffaf87a60a019992d48e3a96b5c3403d9f ] + +The root tile indicates the pcode initialization is complete +when all tiles have completed their initialization. +So the mailbox can be polled only on the root tile. +Check pcode init status only on root tile and move it to +device probe early as root tile is initialized there. +Also make similar changes in resume paths. + +v2: add lock/unlocked version of pcode_mailbox_rw + to allow pcode init to be called in device + early probe (Rodrigo) + +v3: add code description about using root tile + change function names to xe_pcode_probe_early + and xe_pcode_init (Rodrigo) + +Signed-off-by: Riana Tauro +Reviewed-by: Rodrigo Vivi +Reviewed-by: Himal Prasad Ghimiray +Link: https://patchwork.freedesktop.org/patch/msgid/20240410085005.1126343-2-riana.tauro@intel.com +Signed-off-by: Rodrigo Vivi +Stable-dep-of: 77b79df0268b ("drm/xe: Change pcode timeout to 50msec while polling again") +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/xe/xe_device.c | 21 ++++-- + drivers/gpu/drm/xe/xe_pcode.c | 115 ++++++++++++++++++++------------- + drivers/gpu/drm/xe/xe_pcode.h | 6 +- + drivers/gpu/drm/xe/xe_pm.c | 16 ++--- + 4 files changed, 94 insertions(+), 64 deletions(-) + +diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c +index 5176c27e4b6a4..1554ba3cf3ce2 100644 +--- a/drivers/gpu/drm/xe/xe_device.c ++++ b/drivers/gpu/drm/xe/xe_device.c +@@ -377,8 +377,14 @@ static int xe_set_dma_info(struct xe_device *xe) + return err; + } + +-/* +- * Initialize MMIO resources that don't require any knowledge about tile count. ++/** ++ * xe_device_probe_early: Device early probe ++ * @xe: xe device instance ++ * ++ * Initialize MMIO resources that don't require any ++ * knowledge about tile count. Also initialize pcode ++ * ++ * Return: 0 on success, error code on failure + */ + int xe_device_probe_early(struct xe_device *xe) + { +@@ -392,6 +398,10 @@ int xe_device_probe_early(struct xe_device *xe) + if (err) + return err; + ++ err = xe_pcode_probe_early(xe); ++ if (err) ++ return err; ++ + return 0; + } + +@@ -454,11 +464,8 @@ int xe_device_probe(struct xe_device *xe) + if (err) + return err; + +- for_each_gt(gt, xe, id) { +- err = xe_pcode_probe(gt); +- if (err) +- return err; +- } ++ for_each_gt(gt, xe, id) ++ xe_pcode_init(gt); + + err = xe_display_init_noirq(xe); + if (err) +diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c +index b324dc2a5debe..c674c87c7f40b 100644 +--- a/drivers/gpu/drm/xe/xe_pcode.c ++++ b/drivers/gpu/drm/xe/xe_pcode.c +@@ -10,6 +10,7 @@ + + #include + ++#include "xe_device.h" + #include "xe_gt.h" + #include "xe_mmio.h" + #include "xe_pcode_api.h" +@@ -43,8 +44,6 @@ static int pcode_mailbox_status(struct xe_gt *gt) + [PCODE_ERROR_MASK] = {-EPROTO, "Unknown"}, + }; + +- lockdep_assert_held(>->pcode.lock); +- + err = xe_mmio_read32(gt, PCODE_MAILBOX) & PCODE_ERROR_MASK; + if (err) { + drm_err(>_to_xe(gt)->drm, "PCODE Mailbox failed: %d %s", err, +@@ -55,17 +54,15 @@ static int pcode_mailbox_status(struct xe_gt *gt) + return 0; + } + +-static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1, +- unsigned int timeout_ms, bool return_data, +- bool atomic) ++static int __pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1, ++ unsigned int timeout_ms, bool return_data, ++ bool atomic) + { + int err; + + if (gt_to_xe(gt)->info.skip_pcode) + return 0; + +- lockdep_assert_held(>->pcode.lock); +- + if ((xe_mmio_read32(gt, PCODE_MAILBOX) & PCODE_READY) != 0) + return -EAGAIN; + +@@ -87,6 +84,18 @@ static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1, + return pcode_mailbox_status(gt); + } + ++static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1, ++ unsigned int timeout_ms, bool return_data, ++ bool atomic) ++{ ++ if (gt_to_xe(gt)->info.skip_pcode) ++ return 0; ++ ++ lockdep_assert_held(>->pcode.lock); ++ ++ return __pcode_mailbox_rw(gt, mbox, data0, data1, timeout_ms, return_data, atomic); ++} ++ + int xe_pcode_write_timeout(struct xe_gt *gt, u32 mbox, u32 data, int timeout) + { + int err; +@@ -109,15 +118,19 @@ int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1) + return err; + } + +-static int xe_pcode_try_request(struct xe_gt *gt, u32 mbox, +- u32 request, u32 reply_mask, u32 reply, +- u32 *status, bool atomic, int timeout_us) ++static int pcode_try_request(struct xe_gt *gt, u32 mbox, ++ u32 request, u32 reply_mask, u32 reply, ++ u32 *status, bool atomic, int timeout_us, bool locked) + { + int slept, wait = 10; + + for (slept = 0; slept < timeout_us; slept += wait) { +- *status = pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true, +- atomic); ++ if (locked) ++ *status = pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true, ++ atomic); ++ else ++ *status = __pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true, ++ atomic); + if ((*status == 0) && ((request & reply_mask) == reply)) + return 0; + +@@ -158,8 +171,8 @@ int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request, + + mutex_lock(>->pcode.lock); + +- ret = xe_pcode_try_request(gt, mbox, request, reply_mask, reply, &status, +- false, timeout_base_ms * 1000); ++ ret = pcode_try_request(gt, mbox, request, reply_mask, reply, &status, ++ false, timeout_base_ms * 1000, true); + if (!ret) + goto out; + +@@ -177,8 +190,8 @@ int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request, + "PCODE timeout, retrying with preemption disabled\n"); + drm_WARN_ON_ONCE(>_to_xe(gt)->drm, timeout_base_ms > 1); + preempt_disable(); +- ret = xe_pcode_try_request(gt, mbox, request, reply_mask, reply, &status, +- true, timeout_base_ms * 1000); ++ ret = pcode_try_request(gt, mbox, request, reply_mask, reply, &status, ++ true, timeout_base_ms * 1000, true); + preempt_enable(); + + out: +@@ -238,59 +251,71 @@ int xe_pcode_init_min_freq_table(struct xe_gt *gt, u32 min_gt_freq, + } + + /** +- * xe_pcode_init - Ensure PCODE is initialized +- * @gt: gt instance ++ * xe_pcode_ready - Ensure PCODE is initialized ++ * @xe: xe instance ++ * @locked: true if lock held, false otherwise + * +- * This function ensures that PCODE is properly initialized. To be called during +- * probe and resume paths. ++ * PCODE init mailbox is polled only on root gt of root tile ++ * as the root tile provides the initialization is complete only ++ * after all the tiles have completed the initialization. ++ * Called only on early probe without locks and with locks in ++ * resume path. + * +- * It returns 0 on success, and -error number on failure. ++ * Returns 0 on success, and -error number on failure. + */ +-int xe_pcode_init(struct xe_gt *gt) ++int xe_pcode_ready(struct xe_device *xe, bool locked) + { + u32 status, request = DGFX_GET_INIT_STATUS; ++ struct xe_gt *gt = xe_root_mmio_gt(xe); + int timeout_us = 180000000; /* 3 min */ + int ret; + +- if (gt_to_xe(gt)->info.skip_pcode) ++ if (xe->info.skip_pcode) + return 0; + +- if (!IS_DGFX(gt_to_xe(gt))) ++ if (!IS_DGFX(xe)) + return 0; + +- mutex_lock(>->pcode.lock); +- ret = xe_pcode_try_request(gt, DGFX_PCODE_STATUS, request, +- DGFX_INIT_STATUS_COMPLETE, +- DGFX_INIT_STATUS_COMPLETE, +- &status, false, timeout_us); +- mutex_unlock(>->pcode.lock); ++ if (locked) ++ mutex_lock(>->pcode.lock); ++ ++ ret = pcode_try_request(gt, DGFX_PCODE_STATUS, request, ++ DGFX_INIT_STATUS_COMPLETE, ++ DGFX_INIT_STATUS_COMPLETE, ++ &status, false, timeout_us, locked); ++ ++ if (locked) ++ mutex_unlock(>->pcode.lock); + + if (ret) +- drm_err(>_to_xe(gt)->drm, ++ drm_err(&xe->drm, + "PCODE initialization timedout after: 3 min\n"); + + return ret; + } + + /** +- * xe_pcode_probe - Prepare xe_pcode and also ensure PCODE is initialized. ++ * xe_pcode_init: initialize components of PCODE + * @gt: gt instance + * +- * This function initializes the xe_pcode component, and when needed, it ensures +- * that PCODE has properly performed its initialization and it is really ready +- * to go. To be called once only during probe. +- * +- * It returns 0 on success, and -error number on failure. ++ * This function initializes the xe_pcode component. ++ * To be called once only during probe. + */ +-int xe_pcode_probe(struct xe_gt *gt) ++void xe_pcode_init(struct xe_gt *gt) + { + drmm_mutex_init(>_to_xe(gt)->drm, >->pcode.lock); ++} + +- if (gt_to_xe(gt)->info.skip_pcode) +- return 0; +- +- if (!IS_DGFX(gt_to_xe(gt))) +- return 0; +- +- return xe_pcode_init(gt); ++/** ++ * xe_pcode_probe_early: initializes PCODE ++ * @xe: xe instance ++ * ++ * This function checks the initialization status of PCODE ++ * To be called once only during early probe without locks. ++ * ++ * Returns 0 on success, error code otherwise ++ */ ++int xe_pcode_probe_early(struct xe_device *xe) ++{ ++ return xe_pcode_ready(xe, false); + } +diff --git a/drivers/gpu/drm/xe/xe_pcode.h b/drivers/gpu/drm/xe/xe_pcode.h +index 08cb1d047cba2..3f54c6d2a57d2 100644 +--- a/drivers/gpu/drm/xe/xe_pcode.h ++++ b/drivers/gpu/drm/xe/xe_pcode.h +@@ -8,9 +8,11 @@ + + #include + struct xe_gt; ++struct xe_device; + +-int xe_pcode_probe(struct xe_gt *gt); +-int xe_pcode_init(struct xe_gt *gt); ++void xe_pcode_init(struct xe_gt *gt); ++int xe_pcode_probe_early(struct xe_device *xe); ++int xe_pcode_ready(struct xe_device *xe, bool locked); + int xe_pcode_init_min_freq_table(struct xe_gt *gt, u32 min_gt_freq, + u32 max_gt_freq); + int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1); +diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c +index bcf748036dce9..343b71216b797 100644 +--- a/drivers/gpu/drm/xe/xe_pm.c ++++ b/drivers/gpu/drm/xe/xe_pm.c +@@ -103,11 +103,9 @@ int xe_pm_resume(struct xe_device *xe) + for_each_tile(tile, xe, id) + xe_wa_apply_tile_workarounds(tile); + +- for_each_gt(gt, xe, id) { +- err = xe_pcode_init(gt); +- if (err) +- goto err; +- } ++ err = xe_pcode_ready(xe, true); ++ if (err) ++ return err; + + xe_display_pm_resume_early(xe); + +@@ -298,11 +296,9 @@ int xe_pm_runtime_resume(struct xe_device *xe) + xe->d3cold.power_lost = xe_guc_in_reset(>->uc.guc); + + if (xe->d3cold.allowed && xe->d3cold.power_lost) { +- for_each_gt(gt, xe, id) { +- err = xe_pcode_init(gt); +- if (err) +- goto out; +- } ++ err = xe_pcode_ready(xe, true); ++ if (err) ++ goto out; + + /* + * This only restores pinned memory which is the memory +-- +2.43.0 + diff --git a/queue-6.8/drm-xe-only-use-reserved-bcs-instances-for-usm-migra.patch b/queue-6.8/drm-xe-only-use-reserved-bcs-instances-for-usm-migra.patch new file mode 100644 index 00000000000..e2a94bf7458 --- /dev/null +++ b/queue-6.8/drm-xe-only-use-reserved-bcs-instances-for-usm-migra.patch @@ -0,0 +1,78 @@ +From 2da6594eb5a33a254705a6c17ffcb6665e2e268c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 15 Apr 2024 12:04:53 -0700 +Subject: drm/xe: Only use reserved BCS instances for usm migrate exec queue +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Matthew Brost + +[ Upstream commit c8ea2c31f5ea437199b239d76ad5db27343edb0c ] + +The GuC context scheduling queue is 2 entires deep, thus it is possible +for a migration job to be stuck behind a fault if migration exec queue +shares engines with user jobs. This can deadlock as the migrate exec +queue is required to service page faults. Avoid deadlock by only using +reserved BCS instances for usm migrate exec queue. + +Fixes: a043fbab7af5 ("drm/xe/pvc: Use fast copy engines as migrate engine on PVC") +Cc: Matt Roper +Cc: Niranjana Vishwanathapura +Signed-off-by: Matthew Brost +Link: https://patchwork.freedesktop.org/patch/msgid/20240415190453.696553-2-matthew.brost@intel.com +Reviewed-by: Brian Welty +(cherry picked from commit 04f4a70a183a688a60fe3882d6e4236ea02cfc67) +Signed-off-by: Thomas Hellström +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/xe/xe_migrate.c | 12 +++++------- + 1 file changed, 5 insertions(+), 7 deletions(-) + +diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c +index 7a6ad3469d748..0f77305c47c88 100644 +--- a/drivers/gpu/drm/xe/xe_migrate.c ++++ b/drivers/gpu/drm/xe/xe_migrate.c +@@ -32,7 +32,6 @@ + #include "xe_sync.h" + #include "xe_trace.h" + #include "xe_vm.h" +-#include "xe_wa.h" + + /** + * struct xe_migrate - migrate context. +@@ -288,10 +287,6 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, + } + + /* +- * Due to workaround 16017236439, odd instance hardware copy engines are +- * faster than even instance ones. +- * This function returns the mask involving all fast copy engines and the +- * reserved copy engine to be used as logical mask for migrate engine. + * Including the reserved copy engine is required to avoid deadlocks due to + * migrate jobs servicing the faults gets stuck behind the job that faulted. + */ +@@ -305,8 +300,7 @@ static u32 xe_migrate_usm_logical_mask(struct xe_gt *gt) + if (hwe->class != XE_ENGINE_CLASS_COPY) + continue; + +- if (!XE_WA(gt, 16017236439) || +- xe_gt_is_usm_hwe(gt, hwe) || hwe->instance & 1) ++ if (xe_gt_is_usm_hwe(gt, hwe)) + logical_mask |= BIT(hwe->logical_instance); + } + +@@ -357,6 +351,10 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) + if (!hwe || !logical_mask) + return ERR_PTR(-EINVAL); + ++ /* ++ * XXX: Currently only reserving 1 (likely slow) BCS instance on ++ * PVC, may want to revisit if performance is needed. ++ */ + m->q = xe_exec_queue_create(xe, vm, logical_mask, 1, hwe, + EXEC_QUEUE_FLAG_KERNEL | + EXEC_QUEUE_FLAG_PERMANENT | +-- +2.43.0 + diff --git a/queue-6.8/e1000e-move-force-smbus-near-the-end-of-enable_ulp-f.patch b/queue-6.8/e1000e-move-force-smbus-near-the-end-of-enable_ulp-f.patch new file mode 100644 index 00000000000..172add8b9ec --- /dev/null +++ b/queue-6.8/e1000e-move-force-smbus-near-the-end-of-enable_ulp-f.patch @@ -0,0 +1,126 @@ +From 6a330dc17966a9bdd1199ef8175a606f1354c9f5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 28 May 2024 15:06:04 -0700 +Subject: e1000e: move force SMBUS near the end of enable_ulp function + +From: Hui Wang + +[ Upstream commit bfd546a552e140b0a4c8a21527c39d6d21addb28 ] + +The commit 861e8086029e ("e1000e: move force SMBUS from enable ulp +function to avoid PHY loss issue") introduces a regression on +PCH_MTP_I219_LM18 (PCIID: 0x8086550A). Without the referred commit, the +ethernet works well after suspend and resume, but after applying the +commit, the ethernet couldn't work anymore after the resume and the +dmesg shows that the NIC link changes to 10Mbps (1000Mbps originally): + + [ 43.305084] e1000e 0000:00:1f.6 enp0s31f6: NIC Link is Up 10 Mbps Full Duplex, Flow Control: Rx/Tx + +Without the commit, the force SMBUS code will not be executed if +"return 0" or "goto out" is executed in the enable_ulp(), and in my +case, the "goto out" is executed since FWSM_FW_VALID is set. But after +applying the commit, the force SMBUS code will be ran unconditionally. + +Here move the force SMBUS code back to enable_ulp() and put it +immediately ahead of hw->phy.ops.release(hw), this could allow the +longest settling time as possible for interface in this function and +doesn't change the original code logic. + +The issue was found on a Lenovo laptop with the ethernet hw as below: +00:1f.6 Ethernet controller [0200]: Intel Corporation Device [8086:550a] +(rev 20). + +And this patch is verified (cable plug and unplug, system suspend +and resume) on Lenovo laptops with ethernet hw: [8086:550a], +[8086:550b], [8086:15bb], [8086:15be], [8086:1a1f], [8086:1a1c] and +[8086:0dc7]. + +Fixes: 861e8086029e ("e1000e: move force SMBUS from enable ulp function to avoid PHY loss issue") +Signed-off-by: Hui Wang +Acked-by: Vitaly Lifshits +Tested-by: Naama Meir +Reviewed-by: Simon Horman +Reviewed-by: Paul Menzel +Signed-off-by: Tony Nguyen +Tested-by: Zhang Rui +Signed-off-by: Jacob Keller +Link: https://lore.kernel.org/r/20240528-net-2024-05-28-intel-net-fixes-v1-1-dc8593d2bbc6@intel.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/e1000e/ich8lan.c | 22 +++++++++++++++++++++ + drivers/net/ethernet/intel/e1000e/netdev.c | 18 ----------------- + 2 files changed, 22 insertions(+), 18 deletions(-) + +diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c +index f9e94be36e97f..2e98a2a0bead9 100644 +--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c ++++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c +@@ -1225,6 +1225,28 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx) + } + + release: ++ /* Switching PHY interface always returns MDI error ++ * so disable retry mechanism to avoid wasting time ++ */ ++ e1000e_disable_phy_retry(hw); ++ ++ /* Force SMBus mode in PHY */ ++ ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg); ++ if (ret_val) { ++ e1000e_enable_phy_retry(hw); ++ hw->phy.ops.release(hw); ++ goto out; ++ } ++ phy_reg |= CV_SMB_CTRL_FORCE_SMBUS; ++ e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg); ++ ++ e1000e_enable_phy_retry(hw); ++ ++ /* Force SMBus mode in MAC */ ++ mac_reg = er32(CTRL_EXT); ++ mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS; ++ ew32(CTRL_EXT, mac_reg); ++ + hw->phy.ops.release(hw); + out: + if (ret_val) +diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c +index 3692fce201959..cc8c531ec3dff 100644 +--- a/drivers/net/ethernet/intel/e1000e/netdev.c ++++ b/drivers/net/ethernet/intel/e1000e/netdev.c +@@ -6623,7 +6623,6 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime) + struct e1000_hw *hw = &adapter->hw; + u32 ctrl, ctrl_ext, rctl, status, wufc; + int retval = 0; +- u16 smb_ctrl; + + /* Runtime suspend should only enable wakeup for link changes */ + if (runtime) +@@ -6697,23 +6696,6 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime) + if (retval) + return retval; + } +- +- /* Force SMBUS to allow WOL */ +- /* Switching PHY interface always returns MDI error +- * so disable retry mechanism to avoid wasting time +- */ +- e1000e_disable_phy_retry(hw); +- +- e1e_rphy(hw, CV_SMB_CTRL, &smb_ctrl); +- smb_ctrl |= CV_SMB_CTRL_FORCE_SMBUS; +- e1e_wphy(hw, CV_SMB_CTRL, smb_ctrl); +- +- e1000e_enable_phy_retry(hw); +- +- /* Force SMBus mode in MAC */ +- ctrl_ext = er32(CTRL_EXT); +- ctrl_ext |= E1000_CTRL_EXT_FORCE_SMBUS; +- ew32(CTRL_EXT, ctrl_ext); + } + + /* Ensure that the appropriate bits are set in LPI_CTRL +-- +2.43.0 + diff --git a/queue-6.8/enic-validate-length-of-nl-attributes-in-enic_set_vf.patch b/queue-6.8/enic-validate-length-of-nl-attributes-in-enic_set_vf.patch new file mode 100644 index 00000000000..70013787c91 --- /dev/null +++ b/queue-6.8/enic-validate-length-of-nl-attributes-in-enic_set_vf.patch @@ -0,0 +1,69 @@ +From 5e93316f3b6564d6b4a1bc3b45cce5255bb1b2ea Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 22 May 2024 10:30:44 +0300 +Subject: enic: Validate length of nl attributes in enic_set_vf_port + +From: Roded Zats + +[ Upstream commit e8021b94b0412c37bcc79027c2e382086b6ce449 ] + +enic_set_vf_port assumes that the nl attribute IFLA_PORT_PROFILE +is of length PORT_PROFILE_MAX and that the nl attributes +IFLA_PORT_INSTANCE_UUID, IFLA_PORT_HOST_UUID are of length PORT_UUID_MAX. +These attributes are validated (in the function do_setlink in rtnetlink.c) +using the nla_policy ifla_port_policy. The policy defines IFLA_PORT_PROFILE +as NLA_STRING, IFLA_PORT_INSTANCE_UUID as NLA_BINARY and +IFLA_PORT_HOST_UUID as NLA_STRING. That means that the length validation +using the policy is for the max size of the attributes and not on exact +size so the length of these attributes might be less than the sizes that +enic_set_vf_port expects. This might cause an out of bands +read access in the memcpys of the data of these +attributes in enic_set_vf_port. + +Fixes: f8bd909183ac ("net: Add ndo_{set|get}_vf_port support for enic dynamic vnics") +Signed-off-by: Roded Zats +Link: https://lore.kernel.org/r/20240522073044.33519-1-rzats@paloaltonetworks.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/cisco/enic/enic_main.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c +index 37bd38d772e80..cccf0db2fb4e5 100644 +--- a/drivers/net/ethernet/cisco/enic/enic_main.c ++++ b/drivers/net/ethernet/cisco/enic/enic_main.c +@@ -1117,18 +1117,30 @@ static int enic_set_vf_port(struct net_device *netdev, int vf, + pp->request = nla_get_u8(port[IFLA_PORT_REQUEST]); + + if (port[IFLA_PORT_PROFILE]) { ++ if (nla_len(port[IFLA_PORT_PROFILE]) != PORT_PROFILE_MAX) { ++ memcpy(pp, &prev_pp, sizeof(*pp)); ++ return -EINVAL; ++ } + pp->set |= ENIC_SET_NAME; + memcpy(pp->name, nla_data(port[IFLA_PORT_PROFILE]), + PORT_PROFILE_MAX); + } + + if (port[IFLA_PORT_INSTANCE_UUID]) { ++ if (nla_len(port[IFLA_PORT_INSTANCE_UUID]) != PORT_UUID_MAX) { ++ memcpy(pp, &prev_pp, sizeof(*pp)); ++ return -EINVAL; ++ } + pp->set |= ENIC_SET_INSTANCE; + memcpy(pp->instance_uuid, + nla_data(port[IFLA_PORT_INSTANCE_UUID]), PORT_UUID_MAX); + } + + if (port[IFLA_PORT_HOST_UUID]) { ++ if (nla_len(port[IFLA_PORT_HOST_UUID]) != PORT_UUID_MAX) { ++ memcpy(pp, &prev_pp, sizeof(*pp)); ++ return -EINVAL; ++ } + pp->set |= ENIC_SET_HOST; + memcpy(pp->host_uuid, + nla_data(port[IFLA_PORT_HOST_UUID]), PORT_UUID_MAX); +-- +2.43.0 + diff --git a/queue-6.8/hwmon-intel-m10-bmc-hwmon-fix-multiplier-for-n6000-b.patch b/queue-6.8/hwmon-intel-m10-bmc-hwmon-fix-multiplier-for-n6000-b.patch new file mode 100644 index 00000000000..c9525a11e31 --- /dev/null +++ b/queue-6.8/hwmon-intel-m10-bmc-hwmon-fix-multiplier-for-n6000-b.patch @@ -0,0 +1,39 @@ +From a80a1bd8f5d0be32b830902f159caf45f057ed91 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 21 May 2024 14:12:46 -0400 +Subject: hwmon: (intel-m10-bmc-hwmon) Fix multiplier for N6000 board power + sensor + +From: Peter Colberg + +[ Upstream commit 027a44fedd55fbdf1d45603894634acd960ad04b ] + +The Intel N6000 BMC outputs the board power value in milliwatt, whereas +the hwmon sysfs interface must provide power values in microwatt. + +Fixes: e1983220ae14 ("hwmon: intel-m10-bmc-hwmon: Add N6000 sensors") +Signed-off-by: Peter Colberg +Reviewed-by: Matthew Gerlach +Link: https://lore.kernel.org/r/20240521181246.683833-1-peter.colberg@intel.com +Signed-off-by: Guenter Roeck +Signed-off-by: Sasha Levin +--- + drivers/hwmon/intel-m10-bmc-hwmon.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/hwmon/intel-m10-bmc-hwmon.c b/drivers/hwmon/intel-m10-bmc-hwmon.c +index 6500ca548f9c7..ca2dff1589251 100644 +--- a/drivers/hwmon/intel-m10-bmc-hwmon.c ++++ b/drivers/hwmon/intel-m10-bmc-hwmon.c +@@ -429,7 +429,7 @@ static const struct m10bmc_sdata n6000bmc_curr_tbl[] = { + }; + + static const struct m10bmc_sdata n6000bmc_power_tbl[] = { +- { 0x724, 0x0, 0x0, 0x0, 0x0, 1, "Board Power" }, ++ { 0x724, 0x0, 0x0, 0x0, 0x0, 1000, "Board Power" }, + }; + + static const struct hwmon_channel_info * const n6000bmc_hinfo[] = { +-- +2.43.0 + diff --git a/queue-6.8/hwmon-shtc1-fix-property-misspelling.patch b/queue-6.8/hwmon-shtc1-fix-property-misspelling.patch new file mode 100644 index 00000000000..0715911542f --- /dev/null +++ b/queue-6.8/hwmon-shtc1-fix-property-misspelling.patch @@ -0,0 +1,36 @@ +From 59641bdcbc62dc0916f431477e10a0ae38289199 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 30 May 2024 08:20:14 -0700 +Subject: hwmon: (shtc1) Fix property misspelling + +From: Guenter Roeck + +[ Upstream commit 52a2c70c3ec555e670a34dd1ab958986451d2dd2 ] + +The property name is "sensirion,low-precision", not +"sensicon,low-precision". + +Cc: Chris Ruehl +Fixes: be7373b60df5 ("hwmon: shtc1: add support for device tree bindings") +Signed-off-by: Guenter Roeck +Signed-off-by: Sasha Levin +--- + drivers/hwmon/shtc1.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/hwmon/shtc1.c b/drivers/hwmon/shtc1.c +index 1f96e94967ee8..439dd3dba5fc8 100644 +--- a/drivers/hwmon/shtc1.c ++++ b/drivers/hwmon/shtc1.c +@@ -238,7 +238,7 @@ static int shtc1_probe(struct i2c_client *client) + + if (np) { + data->setup.blocking_io = of_property_read_bool(np, "sensirion,blocking-io"); +- data->setup.high_precision = !of_property_read_bool(np, "sensicon,low-precision"); ++ data->setup.high_precision = !of_property_read_bool(np, "sensirion,low-precision"); + } else { + if (client->dev.platform_data) + data->setup = *(struct shtc1_platform_data *)dev->platform_data; +-- +2.43.0 + diff --git a/queue-6.8/ice-fix-200g-phy-types-to-link-speed-mapping.patch b/queue-6.8/ice-fix-200g-phy-types-to-link-speed-mapping.patch new file mode 100644 index 00000000000..bdbcfb46d87 --- /dev/null +++ b/queue-6.8/ice-fix-200g-phy-types-to-link-speed-mapping.patch @@ -0,0 +1,53 @@ +From 5f9f3dac875f2cae8ad5a08d5f5ac1dbdf9b5f29 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 28 May 2024 15:06:08 -0700 +Subject: ice: fix 200G PHY types to link speed mapping + +From: Paul Greenwalt + +[ Upstream commit 2a6d8f2de2224ac46df94dc40f43f8b9701f6703 ] + +Commit 24407a01e57c ("ice: Add 200G speed/phy type use") added support +for 200G PHY speeds, but did not include the mapping of 200G PHY types +to link speed. As a result the driver is returning UNKNOWN link speed +when setting 200G ethtool advertised link modes. + +To fix this add 200G PHY types to link speed mapping to +ice_get_link_speed_based_on_phy_type(). + +Fixes: 24407a01e57c ("ice: Add 200G speed/phy type use") +Reviewed-by: Michal Swiatkowski +Signed-off-by: Paul Greenwalt +Tested-by: Pucha Himasekhar Reddy +Signed-off-by: Jacob Keller +Link: https://lore.kernel.org/r/20240528-net-2024-05-28-intel-net-fixes-v1-5-dc8593d2bbc6@intel.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/ice/ice_common.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c +index ce50a322daa91..3017e7c4423b9 100644 +--- a/drivers/net/ethernet/intel/ice/ice_common.c ++++ b/drivers/net/ethernet/intel/ice/ice_common.c +@@ -3103,6 +3103,16 @@ ice_get_link_speed_based_on_phy_type(u64 phy_type_low, u64 phy_type_high) + case ICE_PHY_TYPE_HIGH_100G_AUI2: + speed_phy_type_high = ICE_AQ_LINK_SPEED_100GB; + break; ++ case ICE_PHY_TYPE_HIGH_200G_CR4_PAM4: ++ case ICE_PHY_TYPE_HIGH_200G_SR4: ++ case ICE_PHY_TYPE_HIGH_200G_FR4: ++ case ICE_PHY_TYPE_HIGH_200G_LR4: ++ case ICE_PHY_TYPE_HIGH_200G_DR4: ++ case ICE_PHY_TYPE_HIGH_200G_KR4_PAM4: ++ case ICE_PHY_TYPE_HIGH_200G_AUI4_AOC_ACC: ++ case ICE_PHY_TYPE_HIGH_200G_AUI4: ++ speed_phy_type_high = ICE_AQ_LINK_SPEED_200GB; ++ break; + default: + speed_phy_type_high = ICE_AQ_LINK_SPEED_UNKNOWN; + break; +-- +2.43.0 + diff --git a/queue-6.8/ice-fix-accounting-if-a-vlan-already-exists.patch b/queue-6.8/ice-fix-accounting-if-a-vlan-already-exists.patch new file mode 100644 index 00000000000..4f816487e6c --- /dev/null +++ b/queue-6.8/ice-fix-accounting-if-a-vlan-already-exists.patch @@ -0,0 +1,74 @@ +From 623a231f07ce8203ca96aff87b0ba5c0436f2fe3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 23 May 2024 10:45:30 -0700 +Subject: ice: fix accounting if a VLAN already exists + +From: Jacob Keller + +[ Upstream commit 82617b9a04649e83ee8731918aeadbb6e6d7cbc7 ] + +The ice_vsi_add_vlan() function is used to add a VLAN filter for the target +VSI. This function prepares a filter in the switch table for the given VSI. +If it succeeds, the vsi->num_vlan counter is incremented. + +It is not considered an error to add a VLAN which already exists in the +switch table, so the function explicitly checks and ignores -EEXIST. The +vsi->num_vlan counter is still incremented. + +This seems incorrect, as it means we can double-count in the case where the +same VLAN is added twice by the caller. The actual table will have one less +filter than the count. + +The ice_vsi_del_vlan() function similarly checks and handles the -ENOENT +condition for when deleting a filter that doesn't exist. This flow only +decrements the vsi->num_vlan if it actually deleted a filter. + +The vsi->num_vlan counter is used only in a few places, primarily related +to tracking the number of non-zero VLANs. If the vsi->num_vlans gets out of +sync, then ice_vsi_num_non_zero_vlans() will incorrectly report more VLANs +than are present, and ice_vsi_has_non_zero_vlans() could return true +potentially in cases where there are only VLAN 0 filters left. + +Fix this by only incrementing the vsi->num_vlan in the case where we +actually added an entry, and not in the case where the entry already +existed. + +Fixes: a1ffafb0b4a4 ("ice: Support configuring the device to Double VLAN Mode") +Signed-off-by: Jacob Keller +Tested-by: Pucha Himasekhar Reddy +Reviewed-by: Simon Horman +Link: https://lore.kernel.org/r/20240523-net-2024-05-23-intel-net-fixes-v1-2-17a923e0bb5f@intel.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c +index 2e9ad27cb9d13..6e8f2aab60801 100644 +--- a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c ++++ b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c +@@ -45,14 +45,15 @@ int ice_vsi_add_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan) + return -EINVAL; + + err = ice_fltr_add_vlan(vsi, vlan); +- if (err && err != -EEXIST) { ++ if (!err) ++ vsi->num_vlan++; ++ else if (err == -EEXIST) ++ err = 0; ++ else + dev_err(ice_pf_to_dev(vsi->back), "Failure Adding VLAN %d on VSI %i, status %d\n", + vlan->vid, vsi->vsi_num, err); +- return err; +- } + +- vsi->num_vlan++; +- return 0; ++ return err; + } + + /** +-- +2.43.0 + diff --git a/queue-6.8/idpf-don-t-enable-napi-and-interrupts-prior-to-alloc.patch b/queue-6.8/idpf-don-t-enable-napi-and-interrupts-prior-to-alloc.patch new file mode 100644 index 00000000000..d783be32664 --- /dev/null +++ b/queue-6.8/idpf-don-t-enable-napi-and-interrupts-prior-to-alloc.patch @@ -0,0 +1,113 @@ +From 55daf032467f31eddec14aab29fa133851438be7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 23 May 2024 10:45:29 -0700 +Subject: idpf: don't enable NAPI and interrupts prior to allocating Rx buffers + +From: Alexander Lobakin + +[ Upstream commit d514c8b54209de7a95ab37259fe32c7406976bd9 ] + +Currently, idpf enables NAPI and interrupts prior to allocating Rx +buffers. +This may lead to frame loss (there are no buffers to place incoming +frames) and even crashes on quick ifup-ifdown. Interrupts must be +enabled only after all the resources are here and available. +Split interrupt init into two phases: initialization and enabling, +and perform the second only after the queues are fully initialized. +Note that we can't just move interrupt initialization down the init +process, as the queues must have correct a ::q_vector pointer set +and NAPI already added in order to allocate buffers correctly. +Also, during the deinit process, disable HW interrupts first and +only then disable NAPI. Otherwise, there can be a HW event leading +to napi_schedule(), but the NAPI will already be unavailable. + +Fixes: d4d558718266 ("idpf: initialize interrupts and enable vport") +Reported-by: Michal Kubiak +Reviewed-by: Wojciech Drewek +Signed-off-by: Alexander Lobakin +Reviewed-by: Simon Horman +Tested-by: Krishneil Singh +Signed-off-by: Jacob Keller +Link: https://lore.kernel.org/r/20240523-net-2024-05-23-intel-net-fixes-v1-1-17a923e0bb5f@intel.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/idpf/idpf_lib.c | 1 + + drivers/net/ethernet/intel/idpf/idpf_txrx.c | 12 +++++++----- + drivers/net/ethernet/intel/idpf/idpf_txrx.h | 1 + + 3 files changed, 9 insertions(+), 5 deletions(-) + +diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c +index 58179bd733ff0..f769a9fc1084c 100644 +--- a/drivers/net/ethernet/intel/idpf/idpf_lib.c ++++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c +@@ -1419,6 +1419,7 @@ static int idpf_vport_open(struct idpf_vport *vport, bool alloc_res) + } + + idpf_rx_init_buf_tail(vport); ++ idpf_vport_intr_ena(vport); + + err = idpf_send_config_queues_msg(vport); + if (err) { +diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c +index 017a081d85802..b16fa9d8a8c58 100644 +--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c ++++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c +@@ -3746,9 +3746,9 @@ static void idpf_vport_intr_ena_irq_all(struct idpf_vport *vport) + */ + void idpf_vport_intr_deinit(struct idpf_vport *vport) + { ++ idpf_vport_intr_dis_irq_all(vport); + idpf_vport_intr_napi_dis_all(vport); + idpf_vport_intr_napi_del_all(vport); +- idpf_vport_intr_dis_irq_all(vport); + idpf_vport_intr_rel_irq(vport); + } + +@@ -4179,7 +4179,6 @@ int idpf_vport_intr_init(struct idpf_vport *vport) + + idpf_vport_intr_map_vector_to_qs(vport); + idpf_vport_intr_napi_add_all(vport); +- idpf_vport_intr_napi_ena_all(vport); + + err = vport->adapter->dev_ops.reg_ops.intr_reg_init(vport); + if (err) +@@ -4193,17 +4192,20 @@ int idpf_vport_intr_init(struct idpf_vport *vport) + if (err) + goto unroll_vectors_alloc; + +- idpf_vport_intr_ena_irq_all(vport); +- + return 0; + + unroll_vectors_alloc: +- idpf_vport_intr_napi_dis_all(vport); + idpf_vport_intr_napi_del_all(vport); + + return err; + } + ++void idpf_vport_intr_ena(struct idpf_vport *vport) ++{ ++ idpf_vport_intr_napi_ena_all(vport); ++ idpf_vport_intr_ena_irq_all(vport); ++} ++ + /** + * idpf_config_rss - Send virtchnl messages to configure RSS + * @vport: virtual port +diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h +index df76493faa756..85a1466890d43 100644 +--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h ++++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h +@@ -988,6 +988,7 @@ int idpf_vport_intr_alloc(struct idpf_vport *vport); + void idpf_vport_intr_update_itr_ena_irq(struct idpf_q_vector *q_vector); + void idpf_vport_intr_deinit(struct idpf_vport *vport); + int idpf_vport_intr_init(struct idpf_vport *vport); ++void idpf_vport_intr_ena(struct idpf_vport *vport); + enum pkt_hash_types idpf_ptype_to_htype(const struct idpf_rx_ptype_decoded *decoded); + int idpf_config_rss(struct idpf_vport *vport); + int idpf_init_rss(struct idpf_vport *vport); +-- +2.43.0 + diff --git a/queue-6.8/inet-introduce-dst_rtable-helper.patch b/queue-6.8/inet-introduce-dst_rtable-helper.patch new file mode 100644 index 00000000000..d175f8ccb59 --- /dev/null +++ b/queue-6.8/inet-introduce-dst_rtable-helper.patch @@ -0,0 +1,570 @@ +From 600692a5e88746cb0478d7f65514cb799941a1d2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 29 Apr 2024 13:30:09 +0000 +Subject: inet: introduce dst_rtable() helper + +From: Eric Dumazet + +[ Upstream commit 05d6d492097c55f2d153fc3fd33cbe78e1e28e0a ] + +I added dst_rt6_info() in commit +e8dfd42c17fa ("ipv6: introduce dst_rt6_info() helper") + +This patch does a similar change for IPv4. + +Instead of (struct rtable *)dst casts, we can use : + + #define dst_rtable(_ptr) \ + container_of_const(_ptr, struct rtable, dst) + +Patch is smaller than IPv6 one, because IPv4 has skb_rtable() helper. + +Signed-off-by: Eric Dumazet +Reviewed-by: David Ahern +Reviewed-by: Sabrina Dubroca +Link: https://lore.kernel.org/r/20240429133009.1227754-1-edumazet@google.com +Signed-off-by: Jakub Kicinski +Stable-dep-of: 92f1655aa2b2 ("net: fix __dst_negative_advice() race") +Signed-off-by: Sasha Levin +--- + drivers/infiniband/core/addr.c | 12 +++--------- + drivers/net/vrf.c | 2 +- + drivers/s390/net/qeth_core.h | 5 ++--- + include/linux/skbuff.h | 9 --------- + include/net/ip.h | 4 ++-- + include/net/route.h | 11 +++++++++++ + net/atm/clip.c | 2 +- + net/core/dst_cache.c | 2 +- + net/core/filter.c | 3 +-- + net/ipv4/af_inet.c | 2 +- + net/ipv4/icmp.c | 26 ++++++++++++++------------ + net/ipv4/ip_input.c | 2 +- + net/ipv4/ip_output.c | 8 ++++---- + net/ipv4/route.c | 24 +++++++++++------------- + net/ipv4/udp.c | 2 +- + net/ipv4/xfrm4_policy.c | 2 +- + net/l2tp/l2tp_ip.c | 2 +- + net/mpls/mpls_iptunnel.c | 2 +- + net/netfilter/ipvs/ip_vs_xmit.c | 2 +- + net/netfilter/nf_flow_table_ip.c | 4 ++-- + net/netfilter/nft_rt.c | 2 +- + net/sctp/protocol.c | 4 ++-- + net/tipc/udp_media.c | 2 +- + 23 files changed, 64 insertions(+), 70 deletions(-) + +diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c +index f20dfe70fa0e4..be0743dac3fff 100644 +--- a/drivers/infiniband/core/addr.c ++++ b/drivers/infiniband/core/addr.c +@@ -348,16 +348,10 @@ static int dst_fetch_ha(const struct dst_entry *dst, + + static bool has_gateway(const struct dst_entry *dst, sa_family_t family) + { +- const struct rtable *rt; +- const struct rt6_info *rt6; ++ if (family == AF_INET) ++ return dst_rtable(dst)->rt_uses_gateway; + +- if (family == AF_INET) { +- rt = container_of(dst, struct rtable, dst); +- return rt->rt_uses_gateway; +- } +- +- rt6 = dst_rt6_info(dst); +- return rt6->rt6i_flags & RTF_GATEWAY; ++ return dst_rt6_info(dst)->rt6i_flags & RTF_GATEWAY; + } + + static int fetch_ha(const struct dst_entry *dst, struct rdma_dev_addr *dev_addr, +diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c +index 71cfa03a77449..c3af9ad5e1547 100644 +--- a/drivers/net/vrf.c ++++ b/drivers/net/vrf.c +@@ -860,7 +860,7 @@ static int vrf_rt6_create(struct net_device *dev) + static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) + { + struct dst_entry *dst = skb_dst(skb); +- struct rtable *rt = (struct rtable *)dst; ++ struct rtable *rt = dst_rtable(dst); + struct net_device *dev = dst->dev; + unsigned int hh_len = LL_RESERVED_SPACE(dev); + struct neighbour *neigh; +diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h +index 5f17a2a5d0e33..41fe8a043d61f 100644 +--- a/drivers/s390/net/qeth_core.h ++++ b/drivers/s390/net/qeth_core.h +@@ -970,9 +970,8 @@ static inline struct dst_entry *qeth_dst_check_rcu(struct sk_buff *skb, + static inline __be32 qeth_next_hop_v4_rcu(struct sk_buff *skb, + struct dst_entry *dst) + { +- struct rtable *rt = (struct rtable *) dst; +- +- return (rt) ? rt_nexthop(rt, ip_hdr(skb)->daddr) : ip_hdr(skb)->daddr; ++ return (dst) ? rt_nexthop(dst_rtable(dst), ip_hdr(skb)->daddr) : ++ ip_hdr(skb)->daddr; + } + + static inline struct in6_addr *qeth_next_hop_v6_rcu(struct sk_buff *skb, +diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h +index f86f9396f727b..d4e53a5a262c7 100644 +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -1169,15 +1169,6 @@ static inline bool skb_dst_is_noref(const struct sk_buff *skb) + return (skb->_skb_refdst & SKB_DST_NOREF) && skb_dst(skb); + } + +-/** +- * skb_rtable - Returns the skb &rtable +- * @skb: buffer +- */ +-static inline struct rtable *skb_rtable(const struct sk_buff *skb) +-{ +- return (struct rtable *)skb_dst(skb); +-} +- + /* For mangling skb->pkt_type from user space side from applications + * such as nft, tc, etc, we only allow a conservative subset of + * possible pkt_types to be set. +diff --git a/include/net/ip.h b/include/net/ip.h +index 25cb688bdc623..6d735e00d3f3e 100644 +--- a/include/net/ip.h ++++ b/include/net/ip.h +@@ -423,7 +423,7 @@ int ip_decrease_ttl(struct iphdr *iph) + + static inline int ip_mtu_locked(const struct dst_entry *dst) + { +- const struct rtable *rt = (const struct rtable *)dst; ++ const struct rtable *rt = dst_rtable(dst); + + return rt->rt_mtu_locked || dst_metric_locked(dst, RTAX_MTU); + } +@@ -461,7 +461,7 @@ static inline bool ip_sk_ignore_df(const struct sock *sk) + static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, + bool forwarding) + { +- const struct rtable *rt = container_of(dst, struct rtable, dst); ++ const struct rtable *rt = dst_rtable(dst); + struct net *net = dev_net(dst->dev); + unsigned int mtu; + +diff --git a/include/net/route.h b/include/net/route.h +index 980ab474eabdc..9cc68c2d23271 100644 +--- a/include/net/route.h ++++ b/include/net/route.h +@@ -80,6 +80,17 @@ struct rtable { + rt_pmtu:31; + }; + ++#define dst_rtable(_ptr) container_of_const(_ptr, struct rtable, dst) ++ ++/** ++ * skb_rtable - Returns the skb &rtable ++ * @skb: buffer ++ */ ++static inline struct rtable *skb_rtable(const struct sk_buff *skb) ++{ ++ return dst_rtable(skb_dst(skb)); ++} ++ + static inline bool rt_is_input_route(const struct rtable *rt) + { + return rt->rt_is_input != 0; +diff --git a/net/atm/clip.c b/net/atm/clip.c +index 294cb9efe3d38..015fb679be425 100644 +--- a/net/atm/clip.c ++++ b/net/atm/clip.c +@@ -345,7 +345,7 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb, + dev->stats.tx_dropped++; + return NETDEV_TX_OK; + } +- rt = (struct rtable *) dst; ++ rt = dst_rtable(dst); + if (rt->rt_gw_family == AF_INET) + daddr = &rt->rt_gw4; + else +diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c +index b17171345d649..0c0bdb058c5b1 100644 +--- a/net/core/dst_cache.c ++++ b/net/core/dst_cache.c +@@ -83,7 +83,7 @@ struct rtable *dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr) + return NULL; + + *saddr = idst->in_saddr.s_addr; +- return container_of(dst, struct rtable, dst); ++ return dst_rtable(dst); + } + EXPORT_SYMBOL_GPL(dst_cache_get_ip4); + +diff --git a/net/core/filter.c b/net/core/filter.c +index 9c9300664320b..4aa556ecefe2e 100644 +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -2314,8 +2314,7 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb, + + rcu_read_lock(); + if (!nh) { +- struct dst_entry *dst = skb_dst(skb); +- struct rtable *rt = container_of(dst, struct rtable, dst); ++ struct rtable *rt = skb_rtable(skb); + + neigh = ip_neigh_for_gw(rt, skb, &is_v6gw); + } else if (nh->nh_family == AF_INET6) { +diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c +index 1e373e7477efa..88966ae7f9c4c 100644 +--- a/net/ipv4/af_inet.c ++++ b/net/ipv4/af_inet.c +@@ -1307,8 +1307,8 @@ static int inet_sk_reselect_saddr(struct sock *sk) + + int inet_sk_rebuild_header(struct sock *sk) + { ++ struct rtable *rt = dst_rtable(__sk_dst_check(sk, 0)); + struct inet_sock *inet = inet_sk(sk); +- struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0); + __be32 daddr; + struct ip_options_rcu *inet_opt; + struct flowi4 *fl4; +diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c +index 437e782b9663b..207482d30dc7e 100644 +--- a/net/ipv4/icmp.c ++++ b/net/ipv4/icmp.c +@@ -483,6 +483,7 @@ static struct rtable *icmp_route_lookup(struct net *net, + struct icmp_bxm *param) + { + struct net_device *route_lookup_dev; ++ struct dst_entry *dst, *dst2; + struct rtable *rt, *rt2; + struct flowi4 fl4_dec; + int err; +@@ -508,16 +509,17 @@ static struct rtable *icmp_route_lookup(struct net *net, + /* No need to clone since we're just using its address. */ + rt2 = rt; + +- rt = (struct rtable *) xfrm_lookup(net, &rt->dst, +- flowi4_to_flowi(fl4), NULL, 0); +- if (!IS_ERR(rt)) { ++ dst = xfrm_lookup(net, &rt->dst, ++ flowi4_to_flowi(fl4), NULL, 0); ++ rt = dst_rtable(dst); ++ if (!IS_ERR(dst)) { + if (rt != rt2) + return rt; +- } else if (PTR_ERR(rt) == -EPERM) { ++ } else if (PTR_ERR(dst) == -EPERM) { + rt = NULL; +- } else ++ } else { + return rt; +- ++ } + err = xfrm_decode_session_reverse(net, skb_in, flowi4_to_flowi(&fl4_dec), AF_INET); + if (err) + goto relookup_failed; +@@ -551,19 +553,19 @@ static struct rtable *icmp_route_lookup(struct net *net, + if (err) + goto relookup_failed; + +- rt2 = (struct rtable *) xfrm_lookup(net, &rt2->dst, +- flowi4_to_flowi(&fl4_dec), NULL, +- XFRM_LOOKUP_ICMP); +- if (!IS_ERR(rt2)) { ++ dst2 = xfrm_lookup(net, &rt2->dst, flowi4_to_flowi(&fl4_dec), NULL, ++ XFRM_LOOKUP_ICMP); ++ rt2 = dst_rtable(dst2); ++ if (!IS_ERR(dst2)) { + dst_release(&rt->dst); + memcpy(fl4, &fl4_dec, sizeof(*fl4)); + rt = rt2; +- } else if (PTR_ERR(rt2) == -EPERM) { ++ } else if (PTR_ERR(dst2) == -EPERM) { + if (rt) + dst_release(&rt->dst); + return rt2; + } else { +- err = PTR_ERR(rt2); ++ err = PTR_ERR(dst2); + goto relookup_failed; + } + return rt; +diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c +index 5e9c8156656a7..d6fbcbd2358a5 100644 +--- a/net/ipv4/ip_input.c ++++ b/net/ipv4/ip_input.c +@@ -616,7 +616,7 @@ static void ip_list_rcv_finish(struct net *net, struct sock *sk, + dst = skb_dst(skb); + if (curr_dst != dst) { + hint = ip_extract_route_hint(net, skb, +- ((struct rtable *)dst)->rt_type); ++ dst_rtable(dst)->rt_type); + + /* dispatch old sublist */ + if (!list_empty(&sublist)) +diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c +index a38e63669c54a..3e6ea4318d1e3 100644 +--- a/net/ipv4/ip_output.c ++++ b/net/ipv4/ip_output.c +@@ -198,7 +198,7 @@ EXPORT_SYMBOL_GPL(ip_build_and_send_pkt); + static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) + { + struct dst_entry *dst = skb_dst(skb); +- struct rtable *rt = (struct rtable *)dst; ++ struct rtable *rt = dst_rtable(dst); + struct net_device *dev = dst->dev; + unsigned int hh_len = LL_RESERVED_SPACE(dev); + struct neighbour *neigh; +@@ -475,7 +475,7 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, + goto packet_routed; + + /* Make sure we can route this packet. */ +- rt = (struct rtable *)__sk_dst_check(sk, 0); ++ rt = dst_rtable(__sk_dst_check(sk, 0)); + if (!rt) { + __be32 daddr; + +@@ -971,7 +971,7 @@ static int __ip_append_data(struct sock *sk, + bool zc = false; + unsigned int maxfraglen, fragheaderlen, maxnonfragsize; + int csummode = CHECKSUM_NONE; +- struct rtable *rt = (struct rtable *)cork->dst; ++ struct rtable *rt = dst_rtable(cork->dst); + bool paged, hold_tskey, extra_uref = false; + unsigned int wmem_alloc_delta = 0; + u32 tskey = 0; +@@ -1390,7 +1390,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, + struct inet_sock *inet = inet_sk(sk); + struct net *net = sock_net(sk); + struct ip_options *opt = NULL; +- struct rtable *rt = (struct rtable *)cork->dst; ++ struct rtable *rt = dst_rtable(cork->dst); + struct iphdr *iph; + u8 pmtudisc, ttl; + __be16 df = 0; +diff --git a/net/ipv4/route.c b/net/ipv4/route.c +index f67d3d6fe9345..6d6e1c3e67d8f 100644 +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -831,7 +831,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf + u32 mark = skb->mark; + __u8 tos = iph->tos; + +- rt = (struct rtable *) dst; ++ rt = dst_rtable(dst); + + __build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0); + __ip_do_redirect(rt, skb, &fl4, true); +@@ -839,7 +839,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf + + static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) + { +- struct rtable *rt = (struct rtable *)dst; ++ struct rtable *rt = dst_rtable(dst); + struct dst_entry *ret = dst; + + if (rt) { +@@ -1056,7 +1056,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) + { +- struct rtable *rt = (struct rtable *) dst; ++ struct rtable *rt = dst_rtable(dst); + struct flowi4 fl4; + + ip_rt_build_flow_key(&fl4, sk, skb); +@@ -1127,7 +1127,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) + + __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0); + +- rt = (struct rtable *)odst; ++ rt = dst_rtable(odst); + if (odst->obsolete && !odst->ops->check(odst, 0)) { + rt = ip_route_output_flow(sock_net(sk), &fl4, sk); + if (IS_ERR(rt)) +@@ -1136,7 +1136,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) + new = true; + } + +- __ip_rt_update_pmtu((struct rtable *)xfrm_dst_path(&rt->dst), &fl4, mtu); ++ __ip_rt_update_pmtu(dst_rtable(xfrm_dst_path(&rt->dst)), &fl4, mtu); + + if (!dst_check(&rt->dst, 0)) { + if (new) +@@ -1193,7 +1193,7 @@ EXPORT_SYMBOL_GPL(ipv4_sk_redirect); + INDIRECT_CALLABLE_SCOPE struct dst_entry *ipv4_dst_check(struct dst_entry *dst, + u32 cookie) + { +- struct rtable *rt = (struct rtable *) dst; ++ struct rtable *rt = dst_rtable(dst); + + /* All IPV4 dsts are created with ->obsolete set to the value + * DST_OBSOLETE_FORCE_CHK which forces validation calls down +@@ -1528,10 +1528,8 @@ void rt_del_uncached_list(struct rtable *rt) + + static void ipv4_dst_destroy(struct dst_entry *dst) + { +- struct rtable *rt = (struct rtable *)dst; +- + ip_dst_metrics_put(dst); +- rt_del_uncached_list(rt); ++ rt_del_uncached_list(dst_rtable(dst)); + } + + void rt_flush_dev(struct net_device *dev) +@@ -2832,7 +2830,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = { + + struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) + { +- struct rtable *ort = (struct rtable *) dst_orig; ++ struct rtable *ort = dst_rtable(dst_orig); + struct rtable *rt; + + rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, DST_OBSOLETE_DEAD, 0); +@@ -2877,9 +2875,9 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, + + if (flp4->flowi4_proto) { + flp4->flowi4_oif = rt->dst.dev->ifindex; +- rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst, +- flowi4_to_flowi(flp4), +- sk, 0); ++ rt = dst_rtable(xfrm_lookup_route(net, &rt->dst, ++ flowi4_to_flowi(flp4), ++ sk, 0)); + } + + return rt; +diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c +index e980869f18176..b2e1bfa0f625a 100644 +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -1220,7 +1220,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) + } + + if (connected) +- rt = (struct rtable *)sk_dst_check(sk, 0); ++ rt = dst_rtable(sk_dst_check(sk, 0)); + + if (!rt) { + struct net *net = sock_net(sk); +diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c +index c33bca2c38415..1853a8415d9f1 100644 +--- a/net/ipv4/xfrm4_policy.c ++++ b/net/ipv4/xfrm4_policy.c +@@ -69,7 +69,7 @@ static int xfrm4_get_saddr(struct net *net, int oif, + static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, + const struct flowi *fl) + { +- struct rtable *rt = (struct rtable *)xdst->route; ++ struct rtable *rt = dst_rtable(xdst->route); + const struct flowi4 *fl4 = &fl->u.ip4; + + xdst->u.rt.rt_iif = fl4->flowi4_iif; +diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c +index 9a2a9ed3ba478..4b4bd83353d5d 100644 +--- a/net/l2tp/l2tp_ip.c ++++ b/net/l2tp/l2tp_ip.c +@@ -459,7 +459,7 @@ static int l2tp_ip_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) + + fl4 = &inet->cork.fl.u.ip4; + if (connected) +- rt = (struct rtable *)__sk_dst_check(sk, 0); ++ rt = dst_rtable(__sk_dst_check(sk, 0)); + + rcu_read_lock(); + if (!rt) { +diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c +index 8985abcb7a058..89d0fa560d8fc 100644 +--- a/net/mpls/mpls_iptunnel.c ++++ b/net/mpls/mpls_iptunnel.c +@@ -83,7 +83,7 @@ static int mpls_xmit(struct sk_buff *skb) + ttl = net->mpls.default_ttl; + else + ttl = ip_hdr(skb)->ttl; +- rt = (struct rtable *)dst; ++ rt = dst_rtable(dst); + } else if (dst->ops->family == AF_INET6) { + if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DISABLED) + ttl = tun_encap_info->default_ttl; +diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c +index 5cd511162bc03..e1f17392f58c1 100644 +--- a/net/netfilter/ipvs/ip_vs_xmit.c ++++ b/net/netfilter/ipvs/ip_vs_xmit.c +@@ -318,7 +318,7 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, + if (dest) { + dest_dst = __ip_vs_dst_check(dest); + if (likely(dest_dst)) +- rt = (struct rtable *) dest_dst->dst_cache; ++ rt = dst_rtable(dest_dst->dst_cache); + else { + dest_dst = ip_vs_dest_dst_alloc(); + spin_lock_bh(&dest->dst_lock); +diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c +index 100887beed314..c2c005234dcd3 100644 +--- a/net/netfilter/nf_flow_table_ip.c ++++ b/net/netfilter/nf_flow_table_ip.c +@@ -434,7 +434,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, + return NF_ACCEPT; + + if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) { +- rt = (struct rtable *)tuplehash->tuple.dst_cache; ++ rt = dst_rtable(tuplehash->tuple.dst_cache); + memset(skb->cb, 0, sizeof(struct inet_skb_parm)); + IPCB(skb)->iif = skb->dev->ifindex; + IPCB(skb)->flags = IPSKB_FORWARDED; +@@ -446,7 +446,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, + + switch (tuplehash->tuple.xmit_type) { + case FLOW_OFFLOAD_XMIT_NEIGH: +- rt = (struct rtable *)tuplehash->tuple.dst_cache; ++ rt = dst_rtable(tuplehash->tuple.dst_cache); + outdev = rt->dst.dev; + skb->dev = outdev; + nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr); +diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c +index 2434c624aafde..14d88394bcb7f 100644 +--- a/net/netfilter/nft_rt.c ++++ b/net/netfilter/nft_rt.c +@@ -73,7 +73,7 @@ void nft_rt_get_eval(const struct nft_expr *expr, + if (nft_pf(pkt) != NFPROTO_IPV4) + goto err; + +- *dest = (__force u32)rt_nexthop((const struct rtable *)dst, ++ *dest = (__force u32)rt_nexthop(dst_rtable(dst), + ip_hdr(skb)->daddr); + break; + case NFT_RT_NEXTHOP6: +diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c +index 94c6dd53cd62d..ae68b48cebbe5 100644 +--- a/net/sctp/protocol.c ++++ b/net/sctp/protocol.c +@@ -552,7 +552,7 @@ static void sctp_v4_get_saddr(struct sctp_sock *sk, + struct flowi *fl) + { + union sctp_addr *saddr = &t->saddr; +- struct rtable *rt = (struct rtable *)t->dst; ++ struct rtable *rt = dst_rtable(t->dst); + + if (rt) { + saddr->v4.sin_family = AF_INET; +@@ -1085,7 +1085,7 @@ static inline int sctp_v4_xmit(struct sk_buff *skb, struct sctp_transport *t) + skb_reset_inner_mac_header(skb); + skb_reset_inner_transport_header(skb); + skb_set_inner_ipproto(skb, IPPROTO_SCTP); +- udp_tunnel_xmit_skb((struct rtable *)dst, sk, skb, fl4->saddr, ++ udp_tunnel_xmit_skb(dst_rtable(dst), sk, skb, fl4->saddr, + fl4->daddr, dscp, ip4_dst_hoplimit(dst), df, + sctp_sk(sk)->udp_port, t->encap_port, false, false); + return 0; +diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c +index f892b0903dbaf..b849a3d133a01 100644 +--- a/net/tipc/udp_media.c ++++ b/net/tipc/udp_media.c +@@ -174,7 +174,7 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb, + local_bh_disable(); + ndst = dst_cache_get(cache); + if (dst->proto == htons(ETH_P_IP)) { +- struct rtable *rt = (struct rtable *)ndst; ++ struct rtable *rt = dst_rtable(ndst); + + if (!rt) { + struct flowi4 fl = { +-- +2.43.0 + diff --git a/queue-6.8/ipv6-introduce-dst_rt6_info-helper.patch b/queue-6.8/ipv6-introduce-dst_rt6_info-helper.patch new file mode 100644 index 00000000000..ebbacba274e --- /dev/null +++ b/queue-6.8/ipv6-introduce-dst_rt6_info-helper.patch @@ -0,0 +1,841 @@ +From 1b5451a77ccb865ecdde404c01bb23dae86edd48 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 26 Apr 2024 15:19:52 +0000 +Subject: ipv6: introduce dst_rt6_info() helper + +From: Eric Dumazet + +[ Upstream commit e8dfd42c17faf183415323db1ef0c977be0d6489 ] + +Instead of (struct rt6_info *)dst casts, we can use : + + #define dst_rt6_info(_ptr) \ + container_of_const(_ptr, struct rt6_info, dst) + +Some places needed missing const qualifiers : + +ip6_confirm_neigh(), ipv6_anycast_destination(), +ipv6_unicast_destination(), has_gateway() + +v2: added missing parts (David Ahern) + +Signed-off-by: Eric Dumazet +Reviewed-by: David Ahern +Signed-off-by: David S. Miller +Stable-dep-of: 92f1655aa2b2 ("net: fix __dst_negative_advice() race") +Signed-off-by: Sasha Levin +--- + drivers/infiniband/core/addr.c | 6 ++-- + .../ethernet/mellanox/mlxsw/spectrum_span.c | 2 +- + drivers/net/vrf.c | 2 +- + drivers/net/vxlan/vxlan_core.c | 2 +- + drivers/s390/net/qeth_core.h | 4 +-- + include/net/ip6_fib.h | 6 ++-- + include/net/ip6_route.h | 11 ++++---- + net/bluetooth/6lowpan.c | 2 +- + net/core/dst_cache.c | 2 +- + net/core/filter.c | 2 +- + net/ipv4/ip_tunnel.c | 2 +- + net/ipv6/icmp.c | 8 +++--- + net/ipv6/ila/ila_lwt.c | 4 +-- + net/ipv6/ip6_output.c | 18 ++++++------ + net/ipv6/ip6mr.c | 2 +- + net/ipv6/ndisc.c | 2 +- + net/ipv6/ping.c | 2 +- + net/ipv6/raw.c | 4 +-- + net/ipv6/route.c | 28 +++++++++---------- + net/ipv6/tcp_ipv6.c | 4 +-- + net/ipv6/udp.c | 11 +++----- + net/ipv6/xfrm6_policy.c | 2 +- + net/l2tp/l2tp_ip6.c | 2 +- + net/mpls/mpls_iptunnel.c | 2 +- + net/netfilter/ipvs/ip_vs_xmit.c | 14 +++++----- + net/netfilter/nf_flow_table_core.c | 8 ++---- + net/netfilter/nf_flow_table_ip.c | 4 +-- + net/netfilter/nft_rt.c | 2 +- + net/sctp/ipv6.c | 2 +- + net/xfrm/xfrm_policy.c | 3 +- + 30 files changed, 77 insertions(+), 86 deletions(-) + +diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c +index f253295795f0a..f20dfe70fa0e4 100644 +--- a/drivers/infiniband/core/addr.c ++++ b/drivers/infiniband/core/addr.c +@@ -348,15 +348,15 @@ static int dst_fetch_ha(const struct dst_entry *dst, + + static bool has_gateway(const struct dst_entry *dst, sa_family_t family) + { +- struct rtable *rt; +- struct rt6_info *rt6; ++ const struct rtable *rt; ++ const struct rt6_info *rt6; + + if (family == AF_INET) { + rt = container_of(dst, struct rtable, dst); + return rt->rt_uses_gateway; + } + +- rt6 = container_of(dst, struct rt6_info, dst); ++ rt6 = dst_rt6_info(dst); + return rt6->rt6i_flags & RTF_GATEWAY; + } + +diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c +index af50ff9e5f267..ce49c9514f911 100644 +--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c +@@ -539,7 +539,7 @@ mlxsw_sp_span_gretap6_route(const struct net_device *to_dev, + if (!dst || dst->error) + goto out; + +- rt6 = container_of(dst, struct rt6_info, dst); ++ rt6 = dst_rt6_info(dst); + + dev = dst->dev; + *saddrp = fl6.saddr; +diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c +index bb95ce43cd97d..71cfa03a77449 100644 +--- a/drivers/net/vrf.c ++++ b/drivers/net/vrf.c +@@ -653,7 +653,7 @@ static int vrf_finish_output6(struct net *net, struct sock *sk, + skb->dev = dev; + + rcu_read_lock(); +- nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr); ++ nexthop = rt6_nexthop(dst_rt6_info(dst), &ipv6_hdr(skb)->daddr); + neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); + if (unlikely(!neigh)) + neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); +diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c +index 0a0b4a9717cec..9ae844d3b969c 100644 +--- a/drivers/net/vxlan/vxlan_core.c ++++ b/drivers/net/vxlan/vxlan_core.c +@@ -2528,7 +2528,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, + } + + if (!info) { +- u32 rt6i_flags = ((struct rt6_info *)ndst)->rt6i_flags; ++ u32 rt6i_flags = dst_rt6_info(ndst)->rt6i_flags; + + err = encap_bypass_if_local(skb, dev, vxlan, AF_INET6, + dst_port, ifindex, vni, +diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h +index 613eab7297046..5f17a2a5d0e33 100644 +--- a/drivers/s390/net/qeth_core.h ++++ b/drivers/s390/net/qeth_core.h +@@ -956,7 +956,7 @@ static inline struct dst_entry *qeth_dst_check_rcu(struct sk_buff *skb, + struct dst_entry *dst = skb_dst(skb); + struct rt6_info *rt; + +- rt = (struct rt6_info *) dst; ++ rt = dst_rt6_info(dst); + if (dst) { + if (proto == htons(ETH_P_IPV6)) + dst = dst_check(dst, rt6_get_cookie(rt)); +@@ -978,7 +978,7 @@ static inline __be32 qeth_next_hop_v4_rcu(struct sk_buff *skb, + static inline struct in6_addr *qeth_next_hop_v6_rcu(struct sk_buff *skb, + struct dst_entry *dst) + { +- struct rt6_info *rt = (struct rt6_info *) dst; ++ struct rt6_info *rt = dst_rt6_info(dst); + + if (rt && !ipv6_addr_any(&rt->rt6i_gateway)) + return &rt->rt6i_gateway; +diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h +index 9ba6413fd2e3e..1121d614942c8 100644 +--- a/include/net/ip6_fib.h ++++ b/include/net/ip6_fib.h +@@ -237,9 +237,11 @@ struct fib6_result { + for (rt = (w)->leaf; rt; \ + rt = rcu_dereference_protected(rt->fib6_next, 1)) + +-static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst) ++#define dst_rt6_info(_ptr) container_of_const(_ptr, struct rt6_info, dst) ++ ++static inline struct inet6_dev *ip6_dst_idev(const struct dst_entry *dst) + { +- return ((struct rt6_info *)dst)->rt6i_idev; ++ return dst_rt6_info(dst)->rt6i_idev; + } + + static inline bool fib6_requires_src(const struct fib6_info *rt) +diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h +index 28b0657902615..e94d2867770c5 100644 +--- a/include/net/ip6_route.h ++++ b/include/net/ip6_route.h +@@ -209,12 +209,11 @@ void rt6_uncached_list_del(struct rt6_info *rt); + static inline const struct rt6_info *skb_rt6_info(const struct sk_buff *skb) + { + const struct dst_entry *dst = skb_dst(skb); +- const struct rt6_info *rt6 = NULL; + + if (dst) +- rt6 = container_of(dst, struct rt6_info, dst); ++ return dst_rt6_info(dst); + +- return rt6; ++ return NULL; + } + + /* +@@ -226,7 +225,7 @@ static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst, + { + struct ipv6_pinfo *np = inet6_sk(sk); + +- np->dst_cookie = rt6_get_cookie((struct rt6_info *)dst); ++ np->dst_cookie = rt6_get_cookie(dst_rt6_info(dst)); + sk_setup_caps(sk, dst); + np->daddr_cache = daddr; + #ifdef CONFIG_IPV6_SUBTREES +@@ -239,7 +238,7 @@ void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst, + + static inline bool ipv6_unicast_destination(const struct sk_buff *skb) + { +- struct rt6_info *rt = (struct rt6_info *) skb_dst(skb); ++ const struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); + + return rt->rt6i_flags & RTF_LOCAL; + } +@@ -247,7 +246,7 @@ static inline bool ipv6_unicast_destination(const struct sk_buff *skb) + static inline bool ipv6_anycast_destination(const struct dst_entry *dst, + const struct in6_addr *daddr) + { +- struct rt6_info *rt = (struct rt6_info *)dst; ++ const struct rt6_info *rt = dst_rt6_info(dst); + + return rt->rt6i_flags & RTF_ANYCAST || + (rt->rt6i_dst.plen < 127 && +diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c +index 715cbafbf6631..0fcaa4d832c98 100644 +--- a/net/bluetooth/6lowpan.c ++++ b/net/bluetooth/6lowpan.c +@@ -133,7 +133,7 @@ static inline struct lowpan_peer *peer_lookup_dst(struct lowpan_btle_dev *dev, + struct in6_addr *daddr, + struct sk_buff *skb) + { +- struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); ++ struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); + int count = atomic_read(&dev->peer_count); + const struct in6_addr *nexthop; + struct lowpan_peer *peer; +diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c +index 0ccfd5fa5cb9b..b17171345d649 100644 +--- a/net/core/dst_cache.c ++++ b/net/core/dst_cache.c +@@ -112,7 +112,7 @@ void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst, + + idst = this_cpu_ptr(dst_cache->cache); + dst_cache_per_cpu_dst_set(this_cpu_ptr(dst_cache->cache), dst, +- rt6_get_cookie((struct rt6_info *)dst)); ++ rt6_get_cookie(dst_rt6_info(dst))); + idst->in6_saddr = *saddr; + } + EXPORT_SYMBOL_GPL(dst_cache_set_ip6); +diff --git a/net/core/filter.c b/net/core/filter.c +index 75cdaa16046bb..9c9300664320b 100644 +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -2215,7 +2215,7 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb, + rcu_read_lock(); + if (!nh) { + dst = skb_dst(skb); +- nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst), ++ nexthop = rt6_nexthop(dst_rt6_info(dst), + &ipv6_hdr(skb)->daddr); + } else { + nexthop = &nh->ipv6_nh; +diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c +index 55039191b8d2b..236448e57e550 100644 +--- a/net/ipv4/ip_tunnel.c ++++ b/net/ipv4/ip_tunnel.c +@@ -544,7 +544,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, + struct rt6_info *rt6; + __be32 daddr; + +- rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) : ++ rt6 = skb_valid_dst(skb) ? dst_rt6_info(skb_dst(skb)) : + NULL; + daddr = md ? dst : tunnel->parms.iph.daddr; + +diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c +index 1635da07285f2..d285c1f6f1a61 100644 +--- a/net/ipv6/icmp.c ++++ b/net/ipv6/icmp.c +@@ -212,7 +212,7 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, + } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) { + res = true; + } else { +- struct rt6_info *rt = (struct rt6_info *)dst; ++ struct rt6_info *rt = dst_rt6_info(dst); + int tmo = net->ipv6.sysctl.icmpv6_time; + struct inet_peer *peer; + +@@ -241,7 +241,7 @@ static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type, + + dst = ip6_route_output(net, sk, fl6); + if (!dst->error) { +- struct rt6_info *rt = (struct rt6_info *)dst; ++ struct rt6_info *rt = dst_rt6_info(dst); + struct in6_addr prefsrc; + + rt6_get_prefsrc(rt, &prefsrc); +@@ -616,7 +616,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, + if (ip6_append_data(sk, icmpv6_getfrag, &msg, + len + sizeof(struct icmp6hdr), + sizeof(struct icmp6hdr), +- &ipc6, &fl6, (struct rt6_info *)dst, ++ &ipc6, &fl6, dst_rt6_info(dst), + MSG_DONTWAIT)) { + ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); + ip6_flush_pending_frames(sk); +@@ -803,7 +803,7 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb) + if (ip6_append_data(sk, icmpv6_getfrag, &msg, + skb->len + sizeof(struct icmp6hdr), + sizeof(struct icmp6hdr), &ipc6, &fl6, +- (struct rt6_info *)dst, MSG_DONTWAIT)) { ++ dst_rt6_info(dst), MSG_DONTWAIT)) { + __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); + ip6_flush_pending_frames(sk); + } else { +diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c +index 8c1ce78956bae..0601bad798221 100644 +--- a/net/ipv6/ila/ila_lwt.c ++++ b/net/ipv6/ila/ila_lwt.c +@@ -38,7 +38,7 @@ static inline struct ila_params *ila_params_lwtunnel( + static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb) + { + struct dst_entry *orig_dst = skb_dst(skb); +- struct rt6_info *rt = (struct rt6_info *)orig_dst; ++ struct rt6_info *rt = dst_rt6_info(orig_dst); + struct ila_lwt *ilwt = ila_lwt_lwtunnel(orig_dst->lwtstate); + struct dst_entry *dst; + int err = -EINVAL; +@@ -70,7 +70,7 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb) + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_oif = orig_dst->dev->ifindex; + fl6.flowi6_iif = LOOPBACK_IFINDEX; +- fl6.daddr = *rt6_nexthop((struct rt6_info *)orig_dst, ++ fl6.daddr = *rt6_nexthop(dst_rt6_info(orig_dst), + &ip6h->daddr); + + dst = ip6_route_output(net, NULL, &fl6); +diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c +index 51b358f6c3918..995e560098d1c 100644 +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -120,7 +120,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * + IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); + + rcu_read_lock(); +- nexthop = rt6_nexthop((struct rt6_info *)dst, daddr); ++ nexthop = rt6_nexthop(dst_rt6_info(dst), daddr); + neigh = __ipv6_neigh_lookup_noref(dev, nexthop); + + if (unlikely(IS_ERR_OR_NULL(neigh))) { +@@ -599,7 +599,7 @@ int ip6_forward(struct sk_buff *skb) + * send a redirect. + */ + +- rt = (struct rt6_info *) dst; ++ rt = dst_rt6_info(dst); + if (rt->rt6i_flags & RTF_GATEWAY) + target = &rt->rt6i_gateway; + else +@@ -856,7 +856,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, + int (*output)(struct net *, struct sock *, struct sk_buff *)) + { + struct sk_buff *frag; +- struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); ++ struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); + struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? + inet6_sk(skb->sk) : NULL; + bool mono_delivery_time = skb->mono_delivery_time; +@@ -1063,7 +1063,7 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk, + return NULL; + } + +- rt = (struct rt6_info *)dst; ++ rt = dst_rt6_info(dst); + /* Yes, checking route validity in not connected + * case is not very simple. Take into account, + * that we do not support routing by source, TOS, +@@ -1118,7 +1118,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, + struct rt6_info *rt; + + *dst = ip6_route_output(net, sk, fl6); +- rt = (*dst)->error ? NULL : (struct rt6_info *)*dst; ++ rt = (*dst)->error ? NULL : dst_rt6_info(*dst); + + rcu_read_lock(); + from = rt ? rcu_dereference(rt->from) : NULL; +@@ -1159,7 +1159,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, + * dst entry and replace it instead with the + * dst entry of the nexthop router + */ +- rt = (struct rt6_info *) *dst; ++ rt = dst_rt6_info(*dst); + rcu_read_lock(); + n = __ipv6_neigh_lookup_noref(rt->dst.dev, + rt6_nexthop(rt, &fl6->daddr)); +@@ -1423,7 +1423,7 @@ static int __ip6_append_data(struct sock *sk, + int offset = 0; + bool zc = false; + u32 tskey = 0; +- struct rt6_info *rt = (struct rt6_info *)cork->dst; ++ struct rt6_info *rt = dst_rt6_info(cork->dst); + bool paged, hold_tskey, extra_uref = false; + struct ipv6_txoptions *opt = v6_cork->opt; + int csummode = CHECKSUM_NONE; +@@ -1877,7 +1877,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, + struct net *net = sock_net(sk); + struct ipv6hdr *hdr; + struct ipv6_txoptions *opt = v6_cork->opt; +- struct rt6_info *rt = (struct rt6_info *)cork->base.dst; ++ struct rt6_info *rt = dst_rt6_info(cork->base.dst); + struct flowi6 *fl6 = &cork->fl.u.ip6; + unsigned char proto = fl6->flowi6_proto; + +@@ -1949,7 +1949,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, + int ip6_send_skb(struct sk_buff *skb) + { + struct net *net = sock_net(skb->sk); +- struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); ++ struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); + int err; + + err = ip6_local_out(net, skb->sk, skb); +diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c +index 9782c180fee64..5f841b5c1c59e 100644 +--- a/net/ipv6/ip6mr.c ++++ b/net/ipv6/ip6mr.c +@@ -2276,7 +2276,7 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, + int err; + struct mr_table *mrt; + struct mfc6_cache *cache; +- struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); ++ struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); + + mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); + if (!mrt) +diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c +index a19999b30bc07..acf33fb83942a 100644 +--- a/net/ipv6/ndisc.c ++++ b/net/ipv6/ndisc.c +@@ -1708,7 +1708,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) + if (IS_ERR(dst)) + return; + +- rt = (struct rt6_info *) dst; ++ rt = dst_rt6_info(dst); + + if (rt->rt6i_flags & RTF_GATEWAY) { + ND_PRINTK(2, warn, +diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c +index ef2059c889554..88b3fcacd4f94 100644 +--- a/net/ipv6/ping.c ++++ b/net/ipv6/ping.c +@@ -154,7 +154,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) + dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, false); + if (IS_ERR(dst)) + return PTR_ERR(dst); +- rt = (struct rt6_info *) dst; ++ rt = dst_rt6_info(dst); + + if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) + fl6.flowi6_oif = READ_ONCE(np->mcast_oif); +diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c +index 03dbb874c363b..b97a5eb0b0bc8 100644 +--- a/net/ipv6/raw.c ++++ b/net/ipv6/raw.c +@@ -592,7 +592,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, + struct ipv6hdr *iph; + struct sk_buff *skb; + int err; +- struct rt6_info *rt = (struct rt6_info *)*dstp; ++ struct rt6_info *rt = dst_rt6_info(*dstp); + int hlen = LL_RESERVED_SPACE(rt->dst.dev); + int tlen = rt->dst.dev->needed_tailroom; + +@@ -911,7 +911,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) + ipc6.opt = opt; + lock_sock(sk); + err = ip6_append_data(sk, raw6_getfrag, &rfv, +- len, 0, &ipc6, &fl6, (struct rt6_info *)dst, ++ len, 0, &ipc6, &fl6, dst_rt6_info(dst), + msg->msg_flags); + + if (err) +diff --git a/net/ipv6/route.c b/net/ipv6/route.c +index ef815ba583a8f..0d6dd1f0c51eb 100644 +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -226,7 +226,7 @@ static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr) + { +- const struct rt6_info *rt = container_of(dst, struct rt6_info, dst); ++ const struct rt6_info *rt = dst_rt6_info(dst); + + return ip6_neigh_lookup(rt6_nexthop(rt, &in6addr_any), + dst->dev, skb, daddr); +@@ -234,8 +234,8 @@ static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst, + + static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr) + { ++ const struct rt6_info *rt = dst_rt6_info(dst); + struct net_device *dev = dst->dev; +- struct rt6_info *rt = (struct rt6_info *)dst; + + daddr = choose_neigh_daddr(rt6_nexthop(rt, &in6addr_any), NULL, daddr); + if (!daddr) +@@ -354,7 +354,7 @@ EXPORT_SYMBOL(ip6_dst_alloc); + + static void ip6_dst_destroy(struct dst_entry *dst) + { +- struct rt6_info *rt = (struct rt6_info *)dst; ++ struct rt6_info *rt = dst_rt6_info(dst); + struct fib6_info *from; + struct inet6_dev *idev; + +@@ -373,7 +373,7 @@ static void ip6_dst_destroy(struct dst_entry *dst) + + static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev) + { +- struct rt6_info *rt = (struct rt6_info *)dst; ++ struct rt6_info *rt = dst_rt6_info(dst); + struct inet6_dev *idev = rt->rt6i_idev; + + if (idev && idev->dev != blackhole_netdev) { +@@ -1278,7 +1278,7 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, + + dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup); + if (dst->error == 0) +- return (struct rt6_info *) dst; ++ return dst_rt6_info(dst); + + dst_release(dst); + +@@ -2637,7 +2637,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, + + rcu_read_lock(); + dst = ip6_route_output_flags_noref(net, sk, fl6, flags); +- rt6 = (struct rt6_info *)dst; ++ rt6 = dst_rt6_info(dst); + /* For dst cached in uncached_list, refcnt is already taken. */ + if (list_empty(&rt6->dst.rt_uncached) && !dst_hold_safe(dst)) { + dst = &net->ipv6.ip6_null_entry->dst; +@@ -2651,7 +2651,7 @@ EXPORT_SYMBOL_GPL(ip6_route_output_flags); + + struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) + { +- struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig; ++ struct rt6_info *rt, *ort = dst_rt6_info(dst_orig); + struct net_device *loopback_dev = net->loopback_dev; + struct dst_entry *new = NULL; + +@@ -2734,7 +2734,7 @@ INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst, + struct fib6_info *from; + struct rt6_info *rt; + +- rt = container_of(dst, struct rt6_info, dst); ++ rt = dst_rt6_info(dst); + + if (rt->sernum) + return rt6_is_valid(rt) ? dst : NULL; +@@ -2762,7 +2762,7 @@ EXPORT_INDIRECT_CALLABLE(ip6_dst_check); + + static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) + { +- struct rt6_info *rt = (struct rt6_info *) dst; ++ struct rt6_info *rt = dst_rt6_info(dst); + + if (rt) { + if (rt->rt6i_flags & RTF_CACHE) { +@@ -2786,7 +2786,7 @@ static void ip6_link_failure(struct sk_buff *skb) + + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); + +- rt = (struct rt6_info *) skb_dst(skb); ++ rt = dst_rt6_info(skb_dst(skb)); + if (rt) { + rcu_read_lock(); + if (rt->rt6i_flags & RTF_CACHE) { +@@ -2842,7 +2842,7 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, + bool confirm_neigh) + { + const struct in6_addr *daddr, *saddr; +- struct rt6_info *rt6 = (struct rt6_info *)dst; ++ struct rt6_info *rt6 = dst_rt6_info(dst); + + /* Note: do *NOT* check dst_metric_locked(dst, RTAX_MTU) + * IPv6 pmtu discovery isn't optional, so 'mtu lock' cannot disable it. +@@ -4165,7 +4165,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu + } + } + +- rt = (struct rt6_info *) dst; ++ rt = dst_rt6_info(dst); + if (rt->rt6i_flags & RTF_REJECT) { + net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n"); + return; +@@ -5597,7 +5597,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, + int iif, int type, u32 portid, u32 seq, + unsigned int flags) + { +- struct rt6_info *rt6 = (struct rt6_info *)dst; ++ struct rt6_info *rt6 = dst_rt6_info(dst); + struct rt6key *rt6_dst, *rt6_src; + u32 *pmetrics, table, rt6_flags; + unsigned char nh_flags = 0; +@@ -6100,7 +6100,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, + } + + +- rt = container_of(dst, struct rt6_info, dst); ++ rt = dst_rt6_info(dst); + if (rt->dst.error) { + err = rt->dst.error; + ip6_rt_put(rt); +diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c +index 012adcfcabeb5..b23e4b2873138 100644 +--- a/net/ipv6/tcp_ipv6.c ++++ b/net/ipv6/tcp_ipv6.c +@@ -94,11 +94,9 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) + struct dst_entry *dst = skb_dst(skb); + + if (dst && dst_hold_safe(dst)) { +- const struct rt6_info *rt = (const struct rt6_info *)dst; +- + rcu_assign_pointer(sk->sk_rx_dst, dst); + sk->sk_rx_dst_ifindex = skb->skb_iif; +- sk->sk_rx_dst_cookie = rt6_get_cookie(rt); ++ sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst)); + } + } + +diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c +index 936b51f358a9a..9a9434c4cc893 100644 +--- a/net/ipv6/udp.c ++++ b/net/ipv6/udp.c +@@ -913,11 +913,8 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, + + static void udp6_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) + { +- if (udp_sk_rx_dst_set(sk, dst)) { +- const struct rt6_info *rt = (const struct rt6_info *)dst; +- +- sk->sk_rx_dst_cookie = rt6_get_cookie(rt); +- } ++ if (udp_sk_rx_dst_set(sk, dst)) ++ sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst)); + } + + /* wrapper for udp_queue_rcv_skb tacking care of csum conversion and +@@ -1587,7 +1584,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) + + skb = ip6_make_skb(sk, getfrag, msg, ulen, + sizeof(struct udphdr), &ipc6, +- (struct rt6_info *)dst, ++ dst_rt6_info(dst), + msg->msg_flags, &cork); + err = PTR_ERR(skb); + if (!IS_ERR_OR_NULL(skb)) +@@ -1614,7 +1611,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) + ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk); + up->len += ulen; + err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr), +- &ipc6, fl6, (struct rt6_info *)dst, ++ &ipc6, fl6, dst_rt6_info(dst), + corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); + if (err) + udp_v6_flush_pending_frames(sk); +diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c +index 42fb6996b0777..ce48173c60e56 100644 +--- a/net/ipv6/xfrm6_policy.c ++++ b/net/ipv6/xfrm6_policy.c +@@ -70,7 +70,7 @@ static int xfrm6_get_saddr(struct net *net, int oif, + static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, + const struct flowi *fl) + { +- struct rt6_info *rt = (struct rt6_info *)xdst->route; ++ struct rt6_info *rt = dst_rt6_info(xdst->route); + + xdst->u.dst.dev = dev; + netdev_hold(dev, &xdst->u.dst.dev_tracker, GFP_ATOMIC); +diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c +index 7bf14cf9ffaa9..8780ec64f3769 100644 +--- a/net/l2tp/l2tp_ip6.c ++++ b/net/l2tp/l2tp_ip6.c +@@ -630,7 +630,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) + ulen = len + (skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0); + err = ip6_append_data(sk, ip_generic_getfrag, msg, + ulen, transhdrlen, &ipc6, +- &fl6, (struct rt6_info *)dst, ++ &fl6, dst_rt6_info(dst), + msg->msg_flags); + if (err) + ip6_flush_pending_frames(sk); +diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c +index ef59e25dc4827..8985abcb7a058 100644 +--- a/net/mpls/mpls_iptunnel.c ++++ b/net/mpls/mpls_iptunnel.c +@@ -92,7 +92,7 @@ static int mpls_xmit(struct sk_buff *skb) + ttl = net->mpls.default_ttl; + else + ttl = ipv6_hdr(skb)->hop_limit; +- rt6 = (struct rt6_info *)dst; ++ rt6 = dst_rt6_info(dst); + } else { + goto drop; + } +diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c +index 65e0259178da4..5cd511162bc03 100644 +--- a/net/netfilter/ipvs/ip_vs_xmit.c ++++ b/net/netfilter/ipvs/ip_vs_xmit.c +@@ -180,7 +180,7 @@ static inline bool crosses_local_route_boundary(int skb_af, struct sk_buff *skb, + (!skb->dev || skb->dev->flags & IFF_LOOPBACK) && + (addr_type & IPV6_ADDR_LOOPBACK); + old_rt_is_local = __ip_vs_is_local_route6( +- (struct rt6_info *)skb_dst(skb)); ++ dst_rt6_info(skb_dst(skb))); + } else + #endif + { +@@ -481,7 +481,7 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, + if (dest) { + dest_dst = __ip_vs_dst_check(dest); + if (likely(dest_dst)) +- rt = (struct rt6_info *) dest_dst->dst_cache; ++ rt = dst_rt6_info(dest_dst->dst_cache); + else { + u32 cookie; + +@@ -501,7 +501,7 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, + ip_vs_dest_dst_free(dest_dst); + goto err_unreach; + } +- rt = (struct rt6_info *) dst; ++ rt = dst_rt6_info(dst); + cookie = rt6_get_cookie(rt); + __ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie); + spin_unlock_bh(&dest->dst_lock); +@@ -517,7 +517,7 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, + rt_mode); + if (!dst) + goto err_unreach; +- rt = (struct rt6_info *) dst; ++ rt = dst_rt6_info(dst); + } + + local = __ip_vs_is_local_route6(rt); +@@ -862,7 +862,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, + IP_VS_RT_MODE_RDR); + if (local < 0) + goto tx_error; +- rt = (struct rt6_info *) skb_dst(skb); ++ rt = dst_rt6_info(skb_dst(skb)); + /* + * Avoid duplicate tuple in reply direction for NAT traffic + * to local address when connection is sync-ed +@@ -1288,7 +1288,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, + if (local) + return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1); + +- rt = (struct rt6_info *) skb_dst(skb); ++ rt = dst_rt6_info(skb_dst(skb)); + tdev = rt->dst.dev; + + /* +@@ -1590,7 +1590,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, + &cp->daddr.in6, NULL, ipvsh, 0, rt_mode); + if (local < 0) + goto tx_error; +- rt = (struct rt6_info *) skb_dst(skb); ++ rt = dst_rt6_info(skb_dst(skb)); + /* + * Avoid duplicate tuple in reply direction for NAT traffic + * to local address when connection is sync-ed +diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c +index a0571339239c4..5c1ff07eaee0b 100644 +--- a/net/netfilter/nf_flow_table_core.c ++++ b/net/netfilter/nf_flow_table_core.c +@@ -77,12 +77,8 @@ EXPORT_SYMBOL_GPL(flow_offload_alloc); + + static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple) + { +- const struct rt6_info *rt; +- +- if (flow_tuple->l3proto == NFPROTO_IPV6) { +- rt = (const struct rt6_info *)flow_tuple->dst_cache; +- return rt6_get_cookie(rt); +- } ++ if (flow_tuple->l3proto == NFPROTO_IPV6) ++ return rt6_get_cookie(dst_rt6_info(flow_tuple->dst_cache)); + + return 0; + } +diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c +index 5383bed3d3e00..100887beed314 100644 +--- a/net/netfilter/nf_flow_table_ip.c ++++ b/net/netfilter/nf_flow_table_ip.c +@@ -729,7 +729,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, + return NF_ACCEPT; + + if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) { +- rt = (struct rt6_info *)tuplehash->tuple.dst_cache; ++ rt = dst_rt6_info(tuplehash->tuple.dst_cache); + memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); + IP6CB(skb)->iif = skb->dev->ifindex; + IP6CB(skb)->flags = IP6SKB_FORWARDED; +@@ -741,7 +741,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, + + switch (tuplehash->tuple.xmit_type) { + case FLOW_OFFLOAD_XMIT_NEIGH: +- rt = (struct rt6_info *)tuplehash->tuple.dst_cache; ++ rt = dst_rt6_info(tuplehash->tuple.dst_cache); + outdev = rt->dst.dev; + skb->dev = outdev; + nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6); +diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c +index 24d9771385729..2434c624aafde 100644 +--- a/net/netfilter/nft_rt.c ++++ b/net/netfilter/nft_rt.c +@@ -80,7 +80,7 @@ void nft_rt_get_eval(const struct nft_expr *expr, + if (nft_pf(pkt) != NFPROTO_IPV6) + goto err; + +- memcpy(dest, rt6_nexthop((struct rt6_info *)dst, ++ memcpy(dest, rt6_nexthop(dst_rt6_info(dst), + &ipv6_hdr(skb)->daddr), + sizeof(struct in6_addr)); + break; +diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c +index 24368f755ab19..f7b809c0d142c 100644 +--- a/net/sctp/ipv6.c ++++ b/net/sctp/ipv6.c +@@ -415,7 +415,7 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, + if (!IS_ERR_OR_NULL(dst)) { + struct rt6_info *rt; + +- rt = (struct rt6_info *)dst; ++ rt = dst_rt6_info(dst); + t->dst_cookie = rt6_get_cookie(rt); + pr_debug("rt6_dst:%pI6/%d rt6_src:%pI6\n", + &rt->rt6i_dst.addr, rt->rt6i_dst.plen, +diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c +index da6ecc6b3e153..55db22473e3eb 100644 +--- a/net/xfrm/xfrm_policy.c ++++ b/net/xfrm/xfrm_policy.c +@@ -2597,8 +2597,7 @@ static void xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, + int nfheader_len) + { + if (dst->ops->family == AF_INET6) { +- struct rt6_info *rt = (struct rt6_info *)dst; +- path->path_cookie = rt6_get_cookie(rt); ++ path->path_cookie = rt6_get_cookie(dst_rt6_info(dst)); + path->u.rt6.rt6i_nfheader_len = nfheader_len; + } + } +-- +2.43.0 + diff --git a/queue-6.8/ipvlan-dont-use-skb-sk-in-ipvlan_process_v-4-6-_outb.patch b/queue-6.8/ipvlan-dont-use-skb-sk-in-ipvlan_process_v-4-6-_outb.patch new file mode 100644 index 00000000000..5cf799de740 --- /dev/null +++ b/queue-6.8/ipvlan-dont-use-skb-sk-in-ipvlan_process_v-4-6-_outb.patch @@ -0,0 +1,107 @@ +From 016063ac631d44411fe694e3d912cd005ca48dab Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 29 May 2024 17:56:33 +0800 +Subject: ipvlan: Dont Use skb->sk in ipvlan_process_v{4,6}_outbound + +From: Yue Haibing + +[ Upstream commit b3dc6e8003b500861fa307e9a3400c52e78e4d3a ] + +Raw packet from PF_PACKET socket ontop of an IPv6-backed ipvlan device will +hit WARN_ON_ONCE() in sk_mc_loop() through sch_direct_xmit() path. + +WARNING: CPU: 2 PID: 0 at net/core/sock.c:775 sk_mc_loop+0x2d/0x70 +Modules linked in: sch_netem ipvlan rfkill cirrus drm_shmem_helper sg drm_kms_helper +CPU: 2 PID: 0 Comm: swapper/2 Kdump: loaded Not tainted 6.9.0+ #279 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 +RIP: 0010:sk_mc_loop+0x2d/0x70 +Code: fa 0f 1f 44 00 00 65 0f b7 15 f7 96 a3 4f 31 c0 66 85 d2 75 26 48 85 ff 74 1c +RSP: 0018:ffffa9584015cd78 EFLAGS: 00010212 +RAX: 0000000000000011 RBX: ffff91e585793e00 RCX: 0000000002c6a001 +RDX: 0000000000000000 RSI: 0000000000000040 RDI: ffff91e589c0f000 +RBP: ffff91e5855bd100 R08: 0000000000000000 R09: 3d00545216f43d00 +R10: ffff91e584fdcc50 R11: 00000060dd8616f4 R12: ffff91e58132d000 +R13: ffff91e584fdcc68 R14: ffff91e5869ce800 R15: ffff91e589c0f000 +FS: 0000000000000000(0000) GS:ffff91e898100000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00007f788f7c44c0 CR3: 0000000008e1a000 CR4: 00000000000006f0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + + ? __warn (kernel/panic.c:693) + ? sk_mc_loop (net/core/sock.c:760) + ? report_bug (lib/bug.c:201 lib/bug.c:219) + ? handle_bug (arch/x86/kernel/traps.c:239) + ? exc_invalid_op (arch/x86/kernel/traps.c:260 (discriminator 1)) + ? asm_exc_invalid_op (./arch/x86/include/asm/idtentry.h:621) + ? sk_mc_loop (net/core/sock.c:760) + ip6_finish_output2 (net/ipv6/ip6_output.c:83 (discriminator 1)) + ? nf_hook_slow (net/netfilter/core.c:626) + ip6_finish_output (net/ipv6/ip6_output.c:222) + ? __pfx_ip6_finish_output (net/ipv6/ip6_output.c:215) + ipvlan_xmit_mode_l3 (drivers/net/ipvlan/ipvlan_core.c:602) ipvlan + ipvlan_start_xmit (drivers/net/ipvlan/ipvlan_main.c:226) ipvlan + dev_hard_start_xmit (net/core/dev.c:3594) + sch_direct_xmit (net/sched/sch_generic.c:343) + __qdisc_run (net/sched/sch_generic.c:416) + net_tx_action (net/core/dev.c:5286) + handle_softirqs (kernel/softirq.c:555) + __irq_exit_rcu (kernel/softirq.c:589) + sysvec_apic_timer_interrupt (arch/x86/kernel/apic/apic.c:1043) + +The warning triggers as this: +packet_sendmsg + packet_snd //skb->sk is packet sk + __dev_queue_xmit + __dev_xmit_skb //q->enqueue is not NULL + __qdisc_run + sch_direct_xmit + dev_hard_start_xmit + ipvlan_start_xmit + ipvlan_xmit_mode_l3 //l3 mode + ipvlan_process_outbound //vepa flag + ipvlan_process_v6_outbound + ip6_local_out + __ip6_finish_output + ip6_finish_output2 //multicast packet + sk_mc_loop //sk->sk_family is AF_PACKET + +Call ip{6}_local_out() with NULL sk in ipvlan as other tunnels to fix this. + +Fixes: 2ad7bf363841 ("ipvlan: Initial check-in of the IPVLAN driver.") +Suggested-by: Eric Dumazet +Signed-off-by: Yue Haibing +Reviewed-by: Eric Dumazet +Link: https://lore.kernel.org/r/20240529095633.613103-1-yuehaibing@huawei.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ipvlan/ipvlan_core.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c +index 2d5b021b4ea60..fef4eff7753a7 100644 +--- a/drivers/net/ipvlan/ipvlan_core.c ++++ b/drivers/net/ipvlan/ipvlan_core.c +@@ -439,7 +439,7 @@ static noinline_for_stack int ipvlan_process_v4_outbound(struct sk_buff *skb) + + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + +- err = ip_local_out(net, skb->sk, skb); ++ err = ip_local_out(net, NULL, skb); + if (unlikely(net_xmit_eval(err))) + DEV_STATS_INC(dev, tx_errors); + else +@@ -494,7 +494,7 @@ static int ipvlan_process_v6_outbound(struct sk_buff *skb) + + memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); + +- err = ip6_local_out(dev_net(dev), skb->sk, skb); ++ err = ip6_local_out(dev_net(dev), NULL, skb); + if (unlikely(net_xmit_eval(err))) + DEV_STATS_INC(dev, tx_errors); + else +-- +2.43.0 + diff --git a/queue-6.8/kconfig-fix-comparison-to-constant-symbols-m-n.patch b/queue-6.8/kconfig-fix-comparison-to-constant-symbols-m-n.patch new file mode 100644 index 00000000000..6d5f3874e30 --- /dev/null +++ b/queue-6.8/kconfig-fix-comparison-to-constant-symbols-m-n.patch @@ -0,0 +1,128 @@ +From a117acade7508d3a8a832d2bc98926d6886aa590 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 19 May 2024 18:22:27 +0900 +Subject: kconfig: fix comparison to constant symbols, 'm', 'n' + +From: Masahiro Yamada + +[ Upstream commit aabdc960a283ba78086b0bf66ee74326f49e218e ] + +Currently, comparisons to 'm' or 'n' result in incorrect output. + +[Test Code] + + config MODULES + def_bool y + modules + + config A + def_tristate m + + config B + def_bool A > n + +CONFIG_B is unset, while CONFIG_B=y is expected. + +The reason for the issue is because Kconfig compares the tristate values +as strings. + +Currently, the .type fields in the constant symbol definitions, +symbol_{yes,mod,no} are unspecified, i.e., S_UNKNOWN. + +When expr_calc_value() evaluates 'A > n', it checks the types of 'A' and +'n' to determine how to compare them. + +The left-hand side, 'A', is a tristate symbol with a value of 'm', which +corresponds to a numeric value of 1. (Internally, 'y', 'm', and 'n' are +represented as 2, 1, and 0, respectively.) + +The right-hand side, 'n', has an unknown type, so it is treated as the +string "n" during the comparison. + +expr_calc_value() compares two values numerically only when both can +have numeric values. Otherwise, they are compared as strings. + + symbol numeric value ASCII code + ------------------------------------- + y 2 0x79 + m 1 0x6d + n 0 0x6e + +'m' is greater than 'n' if compared numerically (since 1 is greater +than 0), but smaller than 'n' if compared as strings (since the ASCII +code 0x6d is smaller than 0x6e). + +Specifying .type=S_TRISTATE for symbol_{yes,mod,no} fixes the above +test code. + +Doing so, however, would cause a regression to the following test code. + +[Test Code 2] + + config MODULES + def_bool n + modules + + config A + def_tristate n + + config B + def_bool A = m + +You would get CONFIG_B=y, while CONFIG_B should not be set. + +The reason is because sym_get_string_value() turns 'm' into 'n' when the +module feature is disabled. Consequently, expr_calc_value() evaluates +'A = n' instead of 'A = m'. This oddity has been hidden because the type +of 'm' was previously S_UNKNOWN instead of S_TRISTATE. + +sym_get_string_value() should not tweak the string because the tristate +value has already been correctly calculated. There is no reason to +return the string "n" where its tristate value is mod. + +Fixes: 31847b67bec0 ("kconfig: allow use of relations other than (in)equality") +Signed-off-by: Masahiro Yamada +Signed-off-by: Sasha Levin +--- + scripts/kconfig/symbol.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c +index e9e9fb8d86746..0d419b2c4ac58 100644 +--- a/scripts/kconfig/symbol.c ++++ b/scripts/kconfig/symbol.c +@@ -13,18 +13,21 @@ + + struct symbol symbol_yes = { + .name = "y", ++ .type = S_TRISTATE, + .curr = { "y", yes }, + .flags = SYMBOL_CONST|SYMBOL_VALID, + }; + + struct symbol symbol_mod = { + .name = "m", ++ .type = S_TRISTATE, + .curr = { "m", mod }, + .flags = SYMBOL_CONST|SYMBOL_VALID, + }; + + struct symbol symbol_no = { + .name = "n", ++ .type = S_TRISTATE, + .curr = { "n", no }, + .flags = SYMBOL_CONST|SYMBOL_VALID, + }; +@@ -786,8 +789,7 @@ const char *sym_get_string_value(struct symbol *sym) + case no: + return "n"; + case mod: +- sym_calc_value(modules_sym); +- return (modules_sym->curr.tri == no) ? "n" : "m"; ++ return "m"; + case yes: + return "y"; + } +-- +2.43.0 + diff --git a/queue-6.8/kheaders-use-command-v-to-test-for-existence-of-cpio.patch b/queue-6.8/kheaders-use-command-v-to-test-for-existence-of-cpio.patch new file mode 100644 index 00000000000..80947004d52 --- /dev/null +++ b/queue-6.8/kheaders-use-command-v-to-test-for-existence-of-cpio.patch @@ -0,0 +1,57 @@ +From 6b722a9cb743458ab1ad73116b6520becad775a8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 28 May 2024 18:31:50 +0200 +Subject: kheaders: use `command -v` to test for existence of `cpio` + +From: Miguel Ojeda + +[ Upstream commit 6e58e0173507e506a5627741358bc770f220e356 ] + +Commit 13e1df09284d ("kheaders: explicitly validate existence of cpio +command") added an explicit check for `cpio` using `type`. + +However, `type` in `dash` (which is used in some popular distributions +and base images as the shell script runner) prints the missing message +to standard output, and thus no error is printed: + + $ bash -c 'type missing >/dev/null' + bash: line 1: type: missing: not found + $ dash -c 'type missing >/dev/null' + $ + +For instance, this issue may be seen by loongarch builders, given its +defconfig enables CONFIG_IKHEADERS since commit 9cc1df421f00 ("LoongArch: +Update Loongson-3 default config file"). + +Therefore, use `command -v` instead to have consistent behavior, and +take the chance to provide a more explicit error. + +Fixes: 13e1df09284d ("kheaders: explicitly validate existence of cpio command") +Signed-off-by: Miguel Ojeda +Signed-off-by: Masahiro Yamada +Signed-off-by: Sasha Levin +--- + kernel/gen_kheaders.sh | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh +index 6d443ea22bb73..4ba5fd3d73ae2 100755 +--- a/kernel/gen_kheaders.sh ++++ b/kernel/gen_kheaders.sh +@@ -14,7 +14,12 @@ include/ + arch/$SRCARCH/include/ + " + +-type cpio > /dev/null ++if ! command -v cpio >/dev/null; then ++ echo >&2 "***" ++ echo >&2 "*** 'cpio' could not be found." ++ echo >&2 "***" ++ exit 1 ++fi + + # Support incremental builds by skipping archive generation + # if timestamps of files being archived are not changed. +-- +2.43.0 + diff --git a/queue-6.8/net-dsa-microchip-fix-rgmii-error-in-ksz-dsa-driver.patch b/queue-6.8/net-dsa-microchip-fix-rgmii-error-in-ksz-dsa-driver.patch new file mode 100644 index 00000000000..1dd9d8b4c41 --- /dev/null +++ b/queue-6.8/net-dsa-microchip-fix-rgmii-error-in-ksz-dsa-driver.patch @@ -0,0 +1,39 @@ +From 0ed4d34e95c00f6b8ca21aacedd720b2b4c93875 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 28 May 2024 14:34:26 -0700 +Subject: net: dsa: microchip: fix RGMII error in KSZ DSA driver + +From: Tristram Ha + +[ Upstream commit 278d65ccdadb5f0fa0ceaf7b9cc97b305cd72822 ] + +The driver should return RMII interface when XMII is running in RMII mode. + +Fixes: 0ab7f6bf1675 ("net: dsa: microchip: ksz9477: use common xmii function") +Signed-off-by: Tristram Ha +Acked-by: Arun Ramadoss +Acked-by: Jerry Ray +Reviewed-by: Andrew Lunn +Link: https://lore.kernel.org/r/1716932066-3342-1-git-send-email-Tristram.Ha@microchip.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/microchip/ksz_common.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c +index 25a49708f4842..47b4e137c1a4f 100644 +--- a/drivers/net/dsa/microchip/ksz_common.c ++++ b/drivers/net/dsa/microchip/ksz_common.c +@@ -2968,7 +2968,7 @@ phy_interface_t ksz_get_xmii(struct ksz_device *dev, int port, bool gbit) + else + interface = PHY_INTERFACE_MODE_MII; + } else if (val == bitval[P_RMII_SEL]) { +- interface = PHY_INTERFACE_MODE_RGMII; ++ interface = PHY_INTERFACE_MODE_RMII; + } else { + interface = PHY_INTERFACE_MODE_RGMII; + if (data8 & P_RGMII_ID_EG_ENABLE) +-- +2.43.0 + diff --git a/queue-6.8/net-ena-fix-redundant-device-numa-node-override.patch b/queue-6.8/net-ena-fix-redundant-device-numa-node-override.patch new file mode 100644 index 00000000000..d4891dbde54 --- /dev/null +++ b/queue-6.8/net-ena-fix-redundant-device-numa-node-override.patch @@ -0,0 +1,85 @@ +From 256dbb5a2dc8640a4318107ced73f46a72677fb5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 28 May 2024 20:09:12 +0300 +Subject: net: ena: Fix redundant device NUMA node override + +From: Shay Agroskin + +[ Upstream commit 2dc8b1e7177d4f49f492ce648440caf2de0c3616 ] + +The driver overrides the NUMA node id of the device regardless of +whether it knows its correct value (often setting it to -1 even though +the node id is advertised in 'struct device'). This can lead to +suboptimal configurations. + +This patch fixes this behavior and makes the shared memory allocation +functions use the NUMA node id advertised by the underlying device. + +Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") +Signed-off-by: Shay Agroskin +Link: https://lore.kernel.org/r/20240528170912.1204417-1-shayagr@amazon.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/amazon/ena/ena_com.c | 11 ----------- + 1 file changed, 11 deletions(-) + +diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c +index e733419dd3f49..276f6a8631fb1 100644 +--- a/drivers/net/ethernet/amazon/ena/ena_com.c ++++ b/drivers/net/ethernet/amazon/ena/ena_com.c +@@ -312,7 +312,6 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev, + struct ena_com_io_sq *io_sq) + { + size_t size; +- int dev_node = 0; + + memset(&io_sq->desc_addr, 0x0, sizeof(io_sq->desc_addr)); + +@@ -325,12 +324,9 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev, + size = io_sq->desc_entry_size * io_sq->q_depth; + + if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) { +- dev_node = dev_to_node(ena_dev->dmadev); +- set_dev_node(ena_dev->dmadev, ctx->numa_node); + io_sq->desc_addr.virt_addr = + dma_alloc_coherent(ena_dev->dmadev, size, &io_sq->desc_addr.phys_addr, + GFP_KERNEL); +- set_dev_node(ena_dev->dmadev, dev_node); + if (!io_sq->desc_addr.virt_addr) { + io_sq->desc_addr.virt_addr = + dma_alloc_coherent(ena_dev->dmadev, size, +@@ -354,10 +350,7 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev, + size = (size_t)io_sq->bounce_buf_ctrl.buffer_size * + io_sq->bounce_buf_ctrl.buffers_num; + +- dev_node = dev_to_node(ena_dev->dmadev); +- set_dev_node(ena_dev->dmadev, ctx->numa_node); + io_sq->bounce_buf_ctrl.base_buffer = devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL); +- set_dev_node(ena_dev->dmadev, dev_node); + if (!io_sq->bounce_buf_ctrl.base_buffer) + io_sq->bounce_buf_ctrl.base_buffer = + devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL); +@@ -397,7 +390,6 @@ static int ena_com_init_io_cq(struct ena_com_dev *ena_dev, + struct ena_com_io_cq *io_cq) + { + size_t size; +- int prev_node = 0; + + memset(&io_cq->cdesc_addr, 0x0, sizeof(io_cq->cdesc_addr)); + +@@ -409,11 +401,8 @@ static int ena_com_init_io_cq(struct ena_com_dev *ena_dev, + + size = io_cq->cdesc_entry_size_in_bytes * io_cq->q_depth; + +- prev_node = dev_to_node(ena_dev->dmadev); +- set_dev_node(ena_dev->dmadev, ctx->numa_node); + io_cq->cdesc_addr.virt_addr = + dma_alloc_coherent(ena_dev->dmadev, size, &io_cq->cdesc_addr.phys_addr, GFP_KERNEL); +- set_dev_node(ena_dev->dmadev, prev_node); + if (!io_cq->cdesc_addr.virt_addr) { + io_cq->cdesc_addr.virt_addr = + dma_alloc_coherent(ena_dev->dmadev, size, &io_cq->cdesc_addr.phys_addr, +-- +2.43.0 + diff --git a/queue-6.8/net-ena-reduce-lines-with-longer-column-width-bounda.patch b/queue-6.8/net-ena-reduce-lines-with-longer-column-width-bounda.patch new file mode 100644 index 00000000000..2b305050001 --- /dev/null +++ b/queue-6.8/net-ena-reduce-lines-with-longer-column-width-bounda.patch @@ -0,0 +1,1265 @@ +From e20280464cebe38538cde1f77020811fde1d0a61 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 30 Jan 2024 09:53:53 +0000 +Subject: net: ena: Reduce lines with longer column width boundary + +From: David Arinzon + +[ Upstream commit 50613650c3d6255cef13a129ccaa919ca73a6743 ] + +This patch reduces some of the lines by removing newlines +where more variables or print strings can be pushed back +to the previous line while still adhering to the styling +guidelines. + +Signed-off-by: David Arinzon +Signed-off-by: Paolo Abeni +Stable-dep-of: 2dc8b1e7177d ("net: ena: Fix redundant device NUMA node override") +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/amazon/ena/ena_com.c | 315 +++++++----------- + drivers/net/ethernet/amazon/ena/ena_eth_com.c | 49 ++- + drivers/net/ethernet/amazon/ena/ena_eth_com.h | 15 +- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 32 +- + 4 files changed, 151 insertions(+), 260 deletions(-) + +diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c +index 4db689372980e..e733419dd3f49 100644 +--- a/drivers/net/ethernet/amazon/ena/ena_com.c ++++ b/drivers/net/ethernet/amazon/ena/ena_com.c +@@ -90,8 +90,7 @@ static int ena_com_admin_init_sq(struct ena_com_admin_queue *admin_queue) + struct ena_com_admin_sq *sq = &admin_queue->sq; + u16 size = ADMIN_SQ_SIZE(admin_queue->q_depth); + +- sq->entries = dma_alloc_coherent(admin_queue->q_dmadev, size, +- &sq->dma_addr, GFP_KERNEL); ++ sq->entries = dma_alloc_coherent(admin_queue->q_dmadev, size, &sq->dma_addr, GFP_KERNEL); + + if (!sq->entries) { + netdev_err(ena_dev->net_device, "Memory allocation failed\n"); +@@ -113,8 +112,7 @@ static int ena_com_admin_init_cq(struct ena_com_admin_queue *admin_queue) + struct ena_com_admin_cq *cq = &admin_queue->cq; + u16 size = ADMIN_CQ_SIZE(admin_queue->q_depth); + +- cq->entries = dma_alloc_coherent(admin_queue->q_dmadev, size, +- &cq->dma_addr, GFP_KERNEL); ++ cq->entries = dma_alloc_coherent(admin_queue->q_dmadev, size, &cq->dma_addr, GFP_KERNEL); + + if (!cq->entries) { + netdev_err(ena_dev->net_device, "Memory allocation failed\n"); +@@ -136,8 +134,7 @@ static int ena_com_admin_init_aenq(struct ena_com_dev *ena_dev, + + ena_dev->aenq.q_depth = ENA_ASYNC_QUEUE_DEPTH; + size = ADMIN_AENQ_SIZE(ENA_ASYNC_QUEUE_DEPTH); +- aenq->entries = dma_alloc_coherent(ena_dev->dmadev, size, +- &aenq->dma_addr, GFP_KERNEL); ++ aenq->entries = dma_alloc_coherent(ena_dev->dmadev, size, &aenq->dma_addr, GFP_KERNEL); + + if (!aenq->entries) { + netdev_err(ena_dev->net_device, "Memory allocation failed\n"); +@@ -155,14 +152,13 @@ static int ena_com_admin_init_aenq(struct ena_com_dev *ena_dev, + + aenq_caps = 0; + aenq_caps |= ena_dev->aenq.q_depth & ENA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK; +- aenq_caps |= (sizeof(struct ena_admin_aenq_entry) +- << ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT) & +- ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK; ++ aenq_caps |= ++ (sizeof(struct ena_admin_aenq_entry) << ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT) & ++ ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK; + writel(aenq_caps, ena_dev->reg_bar + ENA_REGS_AENQ_CAPS_OFF); + + if (unlikely(!aenq_handlers)) { +- netdev_err(ena_dev->net_device, +- "AENQ handlers pointer is NULL\n"); ++ netdev_err(ena_dev->net_device, "AENQ handlers pointer is NULL\n"); + return -EINVAL; + } + +@@ -189,14 +185,12 @@ static struct ena_comp_ctx *get_comp_ctxt(struct ena_com_admin_queue *admin_queu + } + + if (unlikely(!admin_queue->comp_ctx)) { +- netdev_err(admin_queue->ena_dev->net_device, +- "Completion context is NULL\n"); ++ netdev_err(admin_queue->ena_dev->net_device, "Completion context is NULL\n"); + return NULL; + } + + if (unlikely(admin_queue->comp_ctx[command_id].occupied && capture)) { +- netdev_err(admin_queue->ena_dev->net_device, +- "Completion context is occupied\n"); ++ netdev_err(admin_queue->ena_dev->net_device, "Completion context is occupied\n"); + return NULL; + } + +@@ -226,8 +220,7 @@ static struct ena_comp_ctx *__ena_com_submit_admin_cmd(struct ena_com_admin_queu + /* In case of queue FULL */ + cnt = (u16)atomic_read(&admin_queue->outstanding_cmds); + if (cnt >= admin_queue->q_depth) { +- netdev_dbg(admin_queue->ena_dev->net_device, +- "Admin queue is full.\n"); ++ netdev_dbg(admin_queue->ena_dev->net_device, "Admin queue is full.\n"); + admin_queue->stats.out_of_space++; + return ERR_PTR(-ENOSPC); + } +@@ -274,8 +267,7 @@ static int ena_com_init_comp_ctxt(struct ena_com_admin_queue *admin_queue) + struct ena_comp_ctx *comp_ctx; + u16 i; + +- admin_queue->comp_ctx = +- devm_kzalloc(admin_queue->q_dmadev, size, GFP_KERNEL); ++ admin_queue->comp_ctx = devm_kzalloc(admin_queue->q_dmadev, size, GFP_KERNEL); + if (unlikely(!admin_queue->comp_ctx)) { + netdev_err(ena_dev->net_device, "Memory allocation failed\n"); + return -ENOMEM; +@@ -336,20 +328,17 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev, + dev_node = dev_to_node(ena_dev->dmadev); + set_dev_node(ena_dev->dmadev, ctx->numa_node); + io_sq->desc_addr.virt_addr = +- dma_alloc_coherent(ena_dev->dmadev, size, +- &io_sq->desc_addr.phys_addr, ++ dma_alloc_coherent(ena_dev->dmadev, size, &io_sq->desc_addr.phys_addr, + GFP_KERNEL); + set_dev_node(ena_dev->dmadev, dev_node); + if (!io_sq->desc_addr.virt_addr) { + io_sq->desc_addr.virt_addr = + dma_alloc_coherent(ena_dev->dmadev, size, +- &io_sq->desc_addr.phys_addr, +- GFP_KERNEL); ++ &io_sq->desc_addr.phys_addr, GFP_KERNEL); + } + + if (!io_sq->desc_addr.virt_addr) { +- netdev_err(ena_dev->net_device, +- "Memory allocation failed\n"); ++ netdev_err(ena_dev->net_device, "Memory allocation failed\n"); + return -ENOMEM; + } + } +@@ -367,16 +356,14 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev, + + dev_node = dev_to_node(ena_dev->dmadev); + set_dev_node(ena_dev->dmadev, ctx->numa_node); +- io_sq->bounce_buf_ctrl.base_buffer = +- devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL); ++ io_sq->bounce_buf_ctrl.base_buffer = devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL); + set_dev_node(ena_dev->dmadev, dev_node); + if (!io_sq->bounce_buf_ctrl.base_buffer) + io_sq->bounce_buf_ctrl.base_buffer = + devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL); + + if (!io_sq->bounce_buf_ctrl.base_buffer) { +- netdev_err(ena_dev->net_device, +- "Bounce buffer memory allocation failed\n"); ++ netdev_err(ena_dev->net_device, "Bounce buffer memory allocation failed\n"); + return -ENOMEM; + } + +@@ -425,13 +412,11 @@ static int ena_com_init_io_cq(struct ena_com_dev *ena_dev, + prev_node = dev_to_node(ena_dev->dmadev); + set_dev_node(ena_dev->dmadev, ctx->numa_node); + io_cq->cdesc_addr.virt_addr = +- dma_alloc_coherent(ena_dev->dmadev, size, +- &io_cq->cdesc_addr.phys_addr, GFP_KERNEL); ++ dma_alloc_coherent(ena_dev->dmadev, size, &io_cq->cdesc_addr.phys_addr, GFP_KERNEL); + set_dev_node(ena_dev->dmadev, prev_node); + if (!io_cq->cdesc_addr.virt_addr) { + io_cq->cdesc_addr.virt_addr = +- dma_alloc_coherent(ena_dev->dmadev, size, +- &io_cq->cdesc_addr.phys_addr, ++ dma_alloc_coherent(ena_dev->dmadev, size, &io_cq->cdesc_addr.phys_addr, + GFP_KERNEL); + } + +@@ -514,8 +499,8 @@ static int ena_com_comp_status_to_errno(struct ena_com_admin_queue *admin_queue, + u8 comp_status) + { + if (unlikely(comp_status != 0)) +- netdev_err(admin_queue->ena_dev->net_device, +- "Admin command failed[%u]\n", comp_status); ++ netdev_err(admin_queue->ena_dev->net_device, "Admin command failed[%u]\n", ++ comp_status); + + switch (comp_status) { + case ENA_ADMIN_SUCCESS: +@@ -580,8 +565,7 @@ static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_c + } + + if (unlikely(comp_ctx->status == ENA_CMD_ABORTED)) { +- netdev_err(admin_queue->ena_dev->net_device, +- "Command was aborted\n"); ++ netdev_err(admin_queue->ena_dev->net_device, "Command was aborted\n"); + spin_lock_irqsave(&admin_queue->q_lock, flags); + admin_queue->stats.aborted_cmd++; + spin_unlock_irqrestore(&admin_queue->q_lock, flags); +@@ -589,8 +573,7 @@ static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_c + goto err; + } + +- WARN(comp_ctx->status != ENA_CMD_COMPLETED, "Invalid comp status %d\n", +- comp_ctx->status); ++ WARN(comp_ctx->status != ENA_CMD_COMPLETED, "Invalid comp status %d\n", comp_ctx->status); + + ret = ena_com_comp_status_to_errno(admin_queue, comp_ctx->comp_status); + err: +@@ -634,8 +617,7 @@ static int ena_com_set_llq(struct ena_com_dev *ena_dev) + sizeof(resp)); + + if (unlikely(ret)) +- netdev_err(ena_dev->net_device, +- "Failed to set LLQ configurations: %d\n", ret); ++ netdev_err(ena_dev->net_device, "Failed to set LLQ configurations: %d\n", ret); + + return ret; + } +@@ -658,8 +640,7 @@ static int ena_com_config_llq_info(struct ena_com_dev *ena_dev, + llq_default_cfg->llq_header_location; + } else { + netdev_err(ena_dev->net_device, +- "Invalid header location control, supported: 0x%x\n", +- supported_feat); ++ "Invalid header location control, supported: 0x%x\n", supported_feat); + return -EINVAL; + } + +@@ -681,8 +662,8 @@ static int ena_com_config_llq_info(struct ena_com_dev *ena_dev, + + netdev_err(ena_dev->net_device, + "Default llq stride ctrl is not supported, performing fallback, default: 0x%x, supported: 0x%x, used: 0x%x\n", +- llq_default_cfg->llq_stride_ctrl, +- supported_feat, llq_info->desc_stride_ctrl); ++ llq_default_cfg->llq_stride_ctrl, supported_feat, ++ llq_info->desc_stride_ctrl); + } + } else { + llq_info->desc_stride_ctrl = 0; +@@ -704,8 +685,7 @@ static int ena_com_config_llq_info(struct ena_com_dev *ena_dev, + llq_info->desc_list_entry_size = 256; + } else { + netdev_err(ena_dev->net_device, +- "Invalid entry_size_ctrl, supported: 0x%x\n", +- supported_feat); ++ "Invalid entry_size_ctrl, supported: 0x%x\n", supported_feat); + return -EINVAL; + } + +@@ -750,8 +730,8 @@ static int ena_com_config_llq_info(struct ena_com_dev *ena_dev, + + netdev_err(ena_dev->net_device, + "Default llq num descs before header is not supported, performing fallback, default: 0x%x, supported: 0x%x, used: 0x%x\n", +- llq_default_cfg->llq_num_decs_before_header, +- supported_feat, llq_info->descs_num_before_header); ++ llq_default_cfg->llq_num_decs_before_header, supported_feat, ++ llq_info->descs_num_before_header); + } + /* Check for accelerated queue supported */ + llq_accel_mode_get = llq_features->accel_mode.u.get; +@@ -767,8 +747,7 @@ static int ena_com_config_llq_info(struct ena_com_dev *ena_dev, + + rc = ena_com_set_llq(ena_dev); + if (rc) +- netdev_err(ena_dev->net_device, +- "Cannot set LLQ configuration: %d\n", rc); ++ netdev_err(ena_dev->net_device, "Cannot set LLQ configuration: %d\n", rc); + + return rc; + } +@@ -780,8 +759,7 @@ static int ena_com_wait_and_process_admin_cq_interrupts(struct ena_comp_ctx *com + int ret; + + wait_for_completion_timeout(&comp_ctx->wait_event, +- usecs_to_jiffies( +- admin_queue->completion_timeout)); ++ usecs_to_jiffies(admin_queue->completion_timeout)); + + /* In case the command wasn't completed find out the root cause. + * There might be 2 kinds of errors +@@ -797,8 +775,7 @@ static int ena_com_wait_and_process_admin_cq_interrupts(struct ena_comp_ctx *com + if (comp_ctx->status == ENA_CMD_COMPLETED) { + netdev_err(admin_queue->ena_dev->net_device, + "The ena device sent a completion but the driver didn't receive a MSI-X interrupt (cmd %d), autopolling mode is %s\n", +- comp_ctx->cmd_opcode, +- admin_queue->auto_polling ? "ON" : "OFF"); ++ comp_ctx->cmd_opcode, admin_queue->auto_polling ? "ON" : "OFF"); + /* Check if fallback to polling is enabled */ + if (admin_queue->auto_polling) + admin_queue->polling = true; +@@ -867,15 +844,13 @@ static u32 ena_com_reg_bar_read32(struct ena_com_dev *ena_dev, u16 offset) + if (unlikely(i == timeout)) { + netdev_err(ena_dev->net_device, + "Reading reg failed for timeout. expected: req id[%u] offset[%u] actual: req id[%u] offset[%u]\n", +- mmio_read->seq_num, offset, read_resp->req_id, +- read_resp->reg_off); ++ mmio_read->seq_num, offset, read_resp->req_id, read_resp->reg_off); + ret = ENA_MMIO_READ_TIMEOUT; + goto err; + } + + if (read_resp->reg_off != offset) { +- netdev_err(ena_dev->net_device, +- "Read failure: wrong offset provided\n"); ++ netdev_err(ena_dev->net_device, "Read failure: wrong offset provided\n"); + ret = ENA_MMIO_READ_TIMEOUT; + } else { + ret = read_resp->reg_val; +@@ -934,8 +909,7 @@ static int ena_com_destroy_io_sq(struct ena_com_dev *ena_dev, + sizeof(destroy_resp)); + + if (unlikely(ret && (ret != -ENODEV))) +- netdev_err(ena_dev->net_device, +- "Failed to destroy io sq error: %d\n", ret); ++ netdev_err(ena_dev->net_device, "Failed to destroy io sq error: %d\n", ret); + + return ret; + } +@@ -949,8 +923,7 @@ static void ena_com_io_queue_free(struct ena_com_dev *ena_dev, + if (io_cq->cdesc_addr.virt_addr) { + size = io_cq->cdesc_entry_size_in_bytes * io_cq->q_depth; + +- dma_free_coherent(ena_dev->dmadev, size, +- io_cq->cdesc_addr.virt_addr, ++ dma_free_coherent(ena_dev->dmadev, size, io_cq->cdesc_addr.virt_addr, + io_cq->cdesc_addr.phys_addr); + + io_cq->cdesc_addr.virt_addr = NULL; +@@ -959,8 +932,7 @@ static void ena_com_io_queue_free(struct ena_com_dev *ena_dev, + if (io_sq->desc_addr.virt_addr) { + size = io_sq->desc_entry_size * io_sq->q_depth; + +- dma_free_coherent(ena_dev->dmadev, size, +- io_sq->desc_addr.virt_addr, ++ dma_free_coherent(ena_dev->dmadev, size, io_sq->desc_addr.virt_addr, + io_sq->desc_addr.phys_addr); + + io_sq->desc_addr.virt_addr = NULL; +@@ -985,8 +957,7 @@ static int wait_for_reset_state(struct ena_com_dev *ena_dev, u32 timeout, + val = ena_com_reg_bar_read32(ena_dev, ENA_REGS_DEV_STS_OFF); + + if (unlikely(val == ENA_MMIO_READ_TIMEOUT)) { +- netdev_err(ena_dev->net_device, +- "Reg read timeout occurred\n"); ++ netdev_err(ena_dev->net_device, "Reg read timeout occurred\n"); + return -ETIME; + } + +@@ -1026,8 +997,7 @@ static int ena_com_get_feature_ex(struct ena_com_dev *ena_dev, + int ret; + + if (!ena_com_check_supported_feature_id(ena_dev, feature_id)) { +- netdev_dbg(ena_dev->net_device, "Feature %d isn't supported\n", +- feature_id); ++ netdev_dbg(ena_dev->net_device, "Feature %d isn't supported\n", feature_id); + return -EOPNOTSUPP; + } + +@@ -1064,8 +1034,7 @@ static int ena_com_get_feature_ex(struct ena_com_dev *ena_dev, + + if (unlikely(ret)) + netdev_err(ena_dev->net_device, +- "Failed to submit get_feature command %d error: %d\n", +- feature_id, ret); ++ "Failed to submit get_feature command %d error: %d\n", feature_id, ret); + + return ret; + } +@@ -1104,13 +1073,11 @@ static int ena_com_hash_key_allocate(struct ena_com_dev *ena_dev) + { + struct ena_rss *rss = &ena_dev->rss; + +- if (!ena_com_check_supported_feature_id(ena_dev, +- ENA_ADMIN_RSS_HASH_FUNCTION)) ++ if (!ena_com_check_supported_feature_id(ena_dev, ENA_ADMIN_RSS_HASH_FUNCTION)) + return -EOPNOTSUPP; + +- rss->hash_key = +- dma_alloc_coherent(ena_dev->dmadev, sizeof(*rss->hash_key), +- &rss->hash_key_dma_addr, GFP_KERNEL); ++ rss->hash_key = dma_alloc_coherent(ena_dev->dmadev, sizeof(*rss->hash_key), ++ &rss->hash_key_dma_addr, GFP_KERNEL); + + if (unlikely(!rss->hash_key)) + return -ENOMEM; +@@ -1123,8 +1090,8 @@ static void ena_com_hash_key_destroy(struct ena_com_dev *ena_dev) + struct ena_rss *rss = &ena_dev->rss; + + if (rss->hash_key) +- dma_free_coherent(ena_dev->dmadev, sizeof(*rss->hash_key), +- rss->hash_key, rss->hash_key_dma_addr); ++ dma_free_coherent(ena_dev->dmadev, sizeof(*rss->hash_key), rss->hash_key, ++ rss->hash_key_dma_addr); + rss->hash_key = NULL; + } + +@@ -1132,9 +1099,8 @@ static int ena_com_hash_ctrl_init(struct ena_com_dev *ena_dev) + { + struct ena_rss *rss = &ena_dev->rss; + +- rss->hash_ctrl = +- dma_alloc_coherent(ena_dev->dmadev, sizeof(*rss->hash_ctrl), +- &rss->hash_ctrl_dma_addr, GFP_KERNEL); ++ rss->hash_ctrl = dma_alloc_coherent(ena_dev->dmadev, sizeof(*rss->hash_ctrl), ++ &rss->hash_ctrl_dma_addr, GFP_KERNEL); + + if (unlikely(!rss->hash_ctrl)) + return -ENOMEM; +@@ -1147,8 +1113,8 @@ static void ena_com_hash_ctrl_destroy(struct ena_com_dev *ena_dev) + struct ena_rss *rss = &ena_dev->rss; + + if (rss->hash_ctrl) +- dma_free_coherent(ena_dev->dmadev, sizeof(*rss->hash_ctrl), +- rss->hash_ctrl, rss->hash_ctrl_dma_addr); ++ dma_free_coherent(ena_dev->dmadev, sizeof(*rss->hash_ctrl), rss->hash_ctrl, ++ rss->hash_ctrl_dma_addr); + rss->hash_ctrl = NULL; + } + +@@ -1177,15 +1143,13 @@ static int ena_com_indirect_table_allocate(struct ena_com_dev *ena_dev, + tbl_size = (1ULL << log_size) * + sizeof(struct ena_admin_rss_ind_table_entry); + +- rss->rss_ind_tbl = +- dma_alloc_coherent(ena_dev->dmadev, tbl_size, +- &rss->rss_ind_tbl_dma_addr, GFP_KERNEL); ++ rss->rss_ind_tbl = dma_alloc_coherent(ena_dev->dmadev, tbl_size, &rss->rss_ind_tbl_dma_addr, ++ GFP_KERNEL); + if (unlikely(!rss->rss_ind_tbl)) + goto mem_err1; + + tbl_size = (1ULL << log_size) * sizeof(u16); +- rss->host_rss_ind_tbl = +- devm_kzalloc(ena_dev->dmadev, tbl_size, GFP_KERNEL); ++ rss->host_rss_ind_tbl = devm_kzalloc(ena_dev->dmadev, tbl_size, GFP_KERNEL); + if (unlikely(!rss->host_rss_ind_tbl)) + goto mem_err2; + +@@ -1197,8 +1161,7 @@ static int ena_com_indirect_table_allocate(struct ena_com_dev *ena_dev, + tbl_size = (1ULL << log_size) * + sizeof(struct ena_admin_rss_ind_table_entry); + +- dma_free_coherent(ena_dev->dmadev, tbl_size, rss->rss_ind_tbl, +- rss->rss_ind_tbl_dma_addr); ++ dma_free_coherent(ena_dev->dmadev, tbl_size, rss->rss_ind_tbl, rss->rss_ind_tbl_dma_addr); + rss->rss_ind_tbl = NULL; + mem_err1: + rss->tbl_log_size = 0; +@@ -1261,8 +1224,7 @@ static int ena_com_create_io_sq(struct ena_com_dev *ena_dev, + &create_cmd.sq_ba, + io_sq->desc_addr.phys_addr); + if (unlikely(ret)) { +- netdev_err(ena_dev->net_device, +- "Memory address set failed\n"); ++ netdev_err(ena_dev->net_device, "Memory address set failed\n"); + return ret; + } + } +@@ -1273,8 +1235,7 @@ static int ena_com_create_io_sq(struct ena_com_dev *ena_dev, + (struct ena_admin_acq_entry *)&cmd_completion, + sizeof(cmd_completion)); + if (unlikely(ret)) { +- netdev_err(ena_dev->net_device, +- "Failed to create IO SQ. error: %d\n", ret); ++ netdev_err(ena_dev->net_device, "Failed to create IO SQ. error: %d\n", ret); + return ret; + } + +@@ -1292,8 +1253,7 @@ static int ena_com_create_io_sq(struct ena_com_dev *ena_dev, + cmd_completion.llq_descriptors_offset); + } + +- netdev_dbg(ena_dev->net_device, "Created sq[%u], depth[%u]\n", +- io_sq->idx, io_sq->q_depth); ++ netdev_dbg(ena_dev->net_device, "Created sq[%u], depth[%u]\n", io_sq->idx, io_sq->q_depth); + + return ret; + } +@@ -1420,8 +1380,7 @@ int ena_com_create_io_cq(struct ena_com_dev *ena_dev, + (struct ena_admin_acq_entry *)&cmd_completion, + sizeof(cmd_completion)); + if (unlikely(ret)) { +- netdev_err(ena_dev->net_device, +- "Failed to create IO CQ. error: %d\n", ret); ++ netdev_err(ena_dev->net_device, "Failed to create IO CQ. error: %d\n", ret); + return ret; + } + +@@ -1440,8 +1399,7 @@ int ena_com_create_io_cq(struct ena_com_dev *ena_dev, + (u32 __iomem *)((uintptr_t)ena_dev->reg_bar + + cmd_completion.numa_node_register_offset); + +- netdev_dbg(ena_dev->net_device, "Created cq[%u], depth[%u]\n", +- io_cq->idx, io_cq->q_depth); ++ netdev_dbg(ena_dev->net_device, "Created cq[%u], depth[%u]\n", io_cq->idx, io_cq->q_depth); + + return ret; + } +@@ -1451,8 +1409,7 @@ int ena_com_get_io_handlers(struct ena_com_dev *ena_dev, u16 qid, + struct ena_com_io_cq **io_cq) + { + if (qid >= ENA_TOTAL_NUM_QUEUES) { +- netdev_err(ena_dev->net_device, +- "Invalid queue number %d but the max is %d\n", qid, ++ netdev_err(ena_dev->net_device, "Invalid queue number %d but the max is %d\n", qid, + ENA_TOTAL_NUM_QUEUES); + return -EINVAL; + } +@@ -1492,8 +1449,7 @@ void ena_com_wait_for_abort_completion(struct ena_com_dev *ena_dev) + spin_lock_irqsave(&admin_queue->q_lock, flags); + while (atomic_read(&admin_queue->outstanding_cmds) != 0) { + spin_unlock_irqrestore(&admin_queue->q_lock, flags); +- ena_delay_exponential_backoff_us(exp++, +- ena_dev->ena_min_poll_delay_us); ++ ena_delay_exponential_backoff_us(exp++, ena_dev->ena_min_poll_delay_us); + spin_lock_irqsave(&admin_queue->q_lock, flags); + } + spin_unlock_irqrestore(&admin_queue->q_lock, flags); +@@ -1519,8 +1475,7 @@ int ena_com_destroy_io_cq(struct ena_com_dev *ena_dev, + sizeof(destroy_resp)); + + if (unlikely(ret && (ret != -ENODEV))) +- netdev_err(ena_dev->net_device, +- "Failed to destroy IO CQ. error: %d\n", ret); ++ netdev_err(ena_dev->net_device, "Failed to destroy IO CQ. error: %d\n", ret); + + return ret; + } +@@ -1588,8 +1543,7 @@ int ena_com_set_aenq_config(struct ena_com_dev *ena_dev, u32 groups_flag) + sizeof(resp)); + + if (unlikely(ret)) +- netdev_err(ena_dev->net_device, +- "Failed to config AENQ ret: %d\n", ret); ++ netdev_err(ena_dev->net_device, "Failed to config AENQ ret: %d\n", ret); + + return ret; + } +@@ -1610,8 +1564,7 @@ int ena_com_get_dma_width(struct ena_com_dev *ena_dev) + netdev_dbg(ena_dev->net_device, "ENA dma width: %d\n", width); + + if ((width < 32) || width > ENA_MAX_PHYS_ADDR_SIZE_BITS) { +- netdev_err(ena_dev->net_device, "DMA width illegal value: %d\n", +- width); ++ netdev_err(ena_dev->net_device, "DMA width illegal value: %d\n", width); + return -EINVAL; + } + +@@ -1633,19 +1586,16 @@ int ena_com_validate_version(struct ena_com_dev *ena_dev) + ctrl_ver = ena_com_reg_bar_read32(ena_dev, + ENA_REGS_CONTROLLER_VERSION_OFF); + +- if (unlikely((ver == ENA_MMIO_READ_TIMEOUT) || +- (ctrl_ver == ENA_MMIO_READ_TIMEOUT))) { ++ if (unlikely((ver == ENA_MMIO_READ_TIMEOUT) || (ctrl_ver == ENA_MMIO_READ_TIMEOUT))) { + netdev_err(ena_dev->net_device, "Reg read timeout occurred\n"); + return -ETIME; + } + + dev_info(ena_dev->dmadev, "ENA device version: %d.%d\n", +- (ver & ENA_REGS_VERSION_MAJOR_VERSION_MASK) >> +- ENA_REGS_VERSION_MAJOR_VERSION_SHIFT, ++ (ver & ENA_REGS_VERSION_MAJOR_VERSION_MASK) >> ENA_REGS_VERSION_MAJOR_VERSION_SHIFT, + ver & ENA_REGS_VERSION_MINOR_VERSION_MASK); + +- dev_info(ena_dev->dmadev, +- "ENA controller version: %d.%d.%d implementation version %d\n", ++ dev_info(ena_dev->dmadev, "ENA controller version: %d.%d.%d implementation version %d\n", + (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) >> + ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT, + (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) >> +@@ -1694,20 +1644,17 @@ void ena_com_admin_destroy(struct ena_com_dev *ena_dev) + + size = ADMIN_SQ_SIZE(admin_queue->q_depth); + if (sq->entries) +- dma_free_coherent(ena_dev->dmadev, size, sq->entries, +- sq->dma_addr); ++ dma_free_coherent(ena_dev->dmadev, size, sq->entries, sq->dma_addr); + sq->entries = NULL; + + size = ADMIN_CQ_SIZE(admin_queue->q_depth); + if (cq->entries) +- dma_free_coherent(ena_dev->dmadev, size, cq->entries, +- cq->dma_addr); ++ dma_free_coherent(ena_dev->dmadev, size, cq->entries, cq->dma_addr); + cq->entries = NULL; + + size = ADMIN_AENQ_SIZE(aenq->q_depth); + if (ena_dev->aenq.entries) +- dma_free_coherent(ena_dev->dmadev, size, aenq->entries, +- aenq->dma_addr); ++ dma_free_coherent(ena_dev->dmadev, size, aenq->entries, aenq->dma_addr); + aenq->entries = NULL; + } + +@@ -1733,10 +1680,8 @@ int ena_com_mmio_reg_read_request_init(struct ena_com_dev *ena_dev) + struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read; + + spin_lock_init(&mmio_read->lock); +- mmio_read->read_resp = +- dma_alloc_coherent(ena_dev->dmadev, +- sizeof(*mmio_read->read_resp), +- &mmio_read->read_resp_dma_addr, GFP_KERNEL); ++ mmio_read->read_resp = dma_alloc_coherent(ena_dev->dmadev, sizeof(*mmio_read->read_resp), ++ &mmio_read->read_resp_dma_addr, GFP_KERNEL); + if (unlikely(!mmio_read->read_resp)) + goto err; + +@@ -1767,8 +1712,8 @@ void ena_com_mmio_reg_read_request_destroy(struct ena_com_dev *ena_dev) + writel(0x0, ena_dev->reg_bar + ENA_REGS_MMIO_RESP_LO_OFF); + writel(0x0, ena_dev->reg_bar + ENA_REGS_MMIO_RESP_HI_OFF); + +- dma_free_coherent(ena_dev->dmadev, sizeof(*mmio_read->read_resp), +- mmio_read->read_resp, mmio_read->read_resp_dma_addr); ++ dma_free_coherent(ena_dev->dmadev, sizeof(*mmio_read->read_resp), mmio_read->read_resp, ++ mmio_read->read_resp_dma_addr); + + mmio_read->read_resp = NULL; + } +@@ -1800,8 +1745,7 @@ int ena_com_admin_init(struct ena_com_dev *ena_dev, + } + + if (!(dev_sts & ENA_REGS_DEV_STS_READY_MASK)) { +- netdev_err(ena_dev->net_device, +- "Device isn't ready, abort com init\n"); ++ netdev_err(ena_dev->net_device, "Device isn't ready, abort com init\n"); + return -ENODEV; + } + +@@ -1878,8 +1822,7 @@ int ena_com_create_io_queue(struct ena_com_dev *ena_dev, + int ret; + + if (ctx->qid >= ENA_TOTAL_NUM_QUEUES) { +- netdev_err(ena_dev->net_device, +- "Qid (%d) is bigger than max num of queues (%d)\n", ++ netdev_err(ena_dev->net_device, "Qid (%d) is bigger than max num of queues (%d)\n", + ctx->qid, ENA_TOTAL_NUM_QUEUES); + return -EINVAL; + } +@@ -1905,8 +1848,7 @@ int ena_com_create_io_queue(struct ena_com_dev *ena_dev, + + if (ctx->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) + /* header length is limited to 8 bits */ +- io_sq->tx_max_header_size = +- min_t(u32, ena_dev->tx_max_header_size, SZ_256); ++ io_sq->tx_max_header_size = min_t(u32, ena_dev->tx_max_header_size, SZ_256); + + ret = ena_com_init_io_sq(ena_dev, ctx, io_sq); + if (ret) +@@ -1938,8 +1880,7 @@ void ena_com_destroy_io_queue(struct ena_com_dev *ena_dev, u16 qid) + struct ena_com_io_cq *io_cq; + + if (qid >= ENA_TOTAL_NUM_QUEUES) { +- netdev_err(ena_dev->net_device, +- "Qid (%d) is bigger than max num of queues (%d)\n", ++ netdev_err(ena_dev->net_device, "Qid (%d) is bigger than max num of queues (%d)\n", + qid, ENA_TOTAL_NUM_QUEUES); + return; + } +@@ -1983,8 +1924,7 @@ int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev, + if (rc) + return rc; + +- if (get_resp.u.max_queue_ext.version != +- ENA_FEATURE_MAX_QUEUE_EXT_VER) ++ if (get_resp.u.max_queue_ext.version != ENA_FEATURE_MAX_QUEUE_EXT_VER) + return -EINVAL; + + memcpy(&get_feat_ctx->max_queue_ext, &get_resp.u.max_queue_ext, +@@ -2025,18 +1965,15 @@ int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev, + rc = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_HW_HINTS, 0); + + if (!rc) +- memcpy(&get_feat_ctx->hw_hints, &get_resp.u.hw_hints, +- sizeof(get_resp.u.hw_hints)); ++ memcpy(&get_feat_ctx->hw_hints, &get_resp.u.hw_hints, sizeof(get_resp.u.hw_hints)); + else if (rc == -EOPNOTSUPP) +- memset(&get_feat_ctx->hw_hints, 0x0, +- sizeof(get_feat_ctx->hw_hints)); ++ memset(&get_feat_ctx->hw_hints, 0x0, sizeof(get_feat_ctx->hw_hints)); + else + return rc; + + rc = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_LLQ, 0); + if (!rc) +- memcpy(&get_feat_ctx->llq, &get_resp.u.llq, +- sizeof(get_resp.u.llq)); ++ memcpy(&get_feat_ctx->llq, &get_resp.u.llq, sizeof(get_resp.u.llq)); + else if (rc == -EOPNOTSUPP) + memset(&get_feat_ctx->llq, 0x0, sizeof(get_feat_ctx->llq)); + else +@@ -2084,8 +2021,7 @@ void ena_com_aenq_intr_handler(struct ena_com_dev *ena_dev, void *data) + aenq_common = &aenq_e->aenq_common_desc; + + /* Go over all the events */ +- while ((READ_ONCE(aenq_common->flags) & +- ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) == phase) { ++ while ((READ_ONCE(aenq_common->flags) & ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) == phase) { + /* Make sure the phase bit (ownership) is as expected before + * reading the rest of the descriptor. + */ +@@ -2094,8 +2030,7 @@ void ena_com_aenq_intr_handler(struct ena_com_dev *ena_dev, void *data) + timestamp = (u64)aenq_common->timestamp_low | + ((u64)aenq_common->timestamp_high << 32); + +- netdev_dbg(ena_dev->net_device, +- "AENQ! Group[%x] Syndrome[%x] timestamp: [%llus]\n", ++ netdev_dbg(ena_dev->net_device, "AENQ! Group[%x] Syndrome[%x] timestamp: [%llus]\n", + aenq_common->group, aenq_common->syndrome, timestamp); + + /* Handle specific event*/ +@@ -2124,8 +2059,7 @@ void ena_com_aenq_intr_handler(struct ena_com_dev *ena_dev, void *data) + + /* write the aenq doorbell after all AENQ descriptors were read */ + mb(); +- writel_relaxed((u32)aenq->head, +- ena_dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF); ++ writel_relaxed((u32)aenq->head, ena_dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF); + } + + int ena_com_dev_reset(struct ena_com_dev *ena_dev, +@@ -2137,15 +2071,13 @@ int ena_com_dev_reset(struct ena_com_dev *ena_dev, + stat = ena_com_reg_bar_read32(ena_dev, ENA_REGS_DEV_STS_OFF); + cap = ena_com_reg_bar_read32(ena_dev, ENA_REGS_CAPS_OFF); + +- if (unlikely((stat == ENA_MMIO_READ_TIMEOUT) || +- (cap == ENA_MMIO_READ_TIMEOUT))) { ++ if (unlikely((stat == ENA_MMIO_READ_TIMEOUT) || (cap == ENA_MMIO_READ_TIMEOUT))) { + netdev_err(ena_dev->net_device, "Reg read32 timeout occurred\n"); + return -ETIME; + } + + if ((stat & ENA_REGS_DEV_STS_READY_MASK) == 0) { +- netdev_err(ena_dev->net_device, +- "Device isn't ready, can't reset device\n"); ++ netdev_err(ena_dev->net_device, "Device isn't ready, can't reset device\n"); + return -EINVAL; + } + +@@ -2168,8 +2100,7 @@ int ena_com_dev_reset(struct ena_com_dev *ena_dev, + rc = wait_for_reset_state(ena_dev, timeout, + ENA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK); + if (rc != 0) { +- netdev_err(ena_dev->net_device, +- "Reset indication didn't turn on\n"); ++ netdev_err(ena_dev->net_device, "Reset indication didn't turn on\n"); + return rc; + } + +@@ -2177,8 +2108,7 @@ int ena_com_dev_reset(struct ena_com_dev *ena_dev, + writel(0, ena_dev->reg_bar + ENA_REGS_DEV_CTL_OFF); + rc = wait_for_reset_state(ena_dev, timeout, 0); + if (rc != 0) { +- netdev_err(ena_dev->net_device, +- "Reset indication didn't turn off\n"); ++ netdev_err(ena_dev->net_device, "Reset indication didn't turn off\n"); + return rc; + } + +@@ -2215,8 +2145,7 @@ static int ena_get_dev_stats(struct ena_com_dev *ena_dev, + sizeof(*get_resp)); + + if (unlikely(ret)) +- netdev_err(ena_dev->net_device, +- "Failed to get stats. error: %d\n", ret); ++ netdev_err(ena_dev->net_device, "Failed to get stats. error: %d\n", ret); + + return ret; + } +@@ -2228,8 +2157,7 @@ int ena_com_get_eni_stats(struct ena_com_dev *ena_dev, + int ret; + + if (!ena_com_get_cap(ena_dev, ENA_ADMIN_ENI_STATS)) { +- netdev_err(ena_dev->net_device, +- "Capability %d isn't supported\n", ++ netdev_err(ena_dev->net_device, "Capability %d isn't supported\n", + ENA_ADMIN_ENI_STATS); + return -EOPNOTSUPP; + } +@@ -2266,8 +2194,7 @@ int ena_com_set_dev_mtu(struct ena_com_dev *ena_dev, u32 mtu) + int ret; + + if (!ena_com_check_supported_feature_id(ena_dev, ENA_ADMIN_MTU)) { +- netdev_dbg(ena_dev->net_device, "Feature %d isn't supported\n", +- ENA_ADMIN_MTU); ++ netdev_dbg(ena_dev->net_device, "Feature %d isn't supported\n", ENA_ADMIN_MTU); + return -EOPNOTSUPP; + } + +@@ -2286,8 +2213,7 @@ int ena_com_set_dev_mtu(struct ena_com_dev *ena_dev, u32 mtu) + sizeof(resp)); + + if (unlikely(ret)) +- netdev_err(ena_dev->net_device, +- "Failed to set mtu %d. error: %d\n", mtu, ret); ++ netdev_err(ena_dev->net_device, "Failed to set mtu %d. error: %d\n", mtu, ret); + + return ret; + } +@@ -2301,8 +2227,7 @@ int ena_com_get_offload_settings(struct ena_com_dev *ena_dev, + ret = ena_com_get_feature(ena_dev, &resp, + ENA_ADMIN_STATELESS_OFFLOAD_CONFIG, 0); + if (unlikely(ret)) { +- netdev_err(ena_dev->net_device, +- "Failed to get offload capabilities %d\n", ret); ++ netdev_err(ena_dev->net_device, "Failed to get offload capabilities %d\n", ret); + return ret; + } + +@@ -2320,8 +2245,7 @@ int ena_com_set_hash_function(struct ena_com_dev *ena_dev) + struct ena_admin_get_feat_resp get_resp; + int ret; + +- if (!ena_com_check_supported_feature_id(ena_dev, +- ENA_ADMIN_RSS_HASH_FUNCTION)) { ++ if (!ena_com_check_supported_feature_id(ena_dev, ENA_ADMIN_RSS_HASH_FUNCTION)) { + netdev_dbg(ena_dev->net_device, "Feature %d isn't supported\n", + ENA_ADMIN_RSS_HASH_FUNCTION); + return -EOPNOTSUPP; +@@ -2334,8 +2258,7 @@ int ena_com_set_hash_function(struct ena_com_dev *ena_dev) + return ret; + + if (!(get_resp.u.flow_hash_func.supported_func & BIT(rss->hash_func))) { +- netdev_err(ena_dev->net_device, +- "Func hash %d isn't supported by device, abort\n", ++ netdev_err(ena_dev->net_device, "Func hash %d isn't supported by device, abort\n", + rss->hash_func); + return -EOPNOTSUPP; + } +@@ -2365,8 +2288,7 @@ int ena_com_set_hash_function(struct ena_com_dev *ena_dev) + (struct ena_admin_acq_entry *)&resp, + sizeof(resp)); + if (unlikely(ret)) { +- netdev_err(ena_dev->net_device, +- "Failed to set hash function %d. error: %d\n", ++ netdev_err(ena_dev->net_device, "Failed to set hash function %d. error: %d\n", + rss->hash_func, ret); + return -EINVAL; + } +@@ -2398,16 +2320,15 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev, + return rc; + + if (!(BIT(func) & get_resp.u.flow_hash_func.supported_func)) { +- netdev_err(ena_dev->net_device, +- "Flow hash function %d isn't supported\n", func); ++ netdev_err(ena_dev->net_device, "Flow hash function %d isn't supported\n", func); + return -EOPNOTSUPP; + } + + if ((func == ENA_ADMIN_TOEPLITZ) && key) { + if (key_len != sizeof(hash_key->key)) { + netdev_err(ena_dev->net_device, +- "key len (%u) doesn't equal the supported size (%zu)\n", +- key_len, sizeof(hash_key->key)); ++ "key len (%u) doesn't equal the supported size (%zu)\n", key_len, ++ sizeof(hash_key->key)); + return -EINVAL; + } + memcpy(hash_key->key, key, key_len); +@@ -2495,8 +2416,7 @@ int ena_com_set_hash_ctrl(struct ena_com_dev *ena_dev) + struct ena_admin_set_feat_resp resp; + int ret; + +- if (!ena_com_check_supported_feature_id(ena_dev, +- ENA_ADMIN_RSS_HASH_INPUT)) { ++ if (!ena_com_check_supported_feature_id(ena_dev, ENA_ADMIN_RSS_HASH_INPUT)) { + netdev_dbg(ena_dev->net_device, "Feature %d isn't supported\n", + ENA_ADMIN_RSS_HASH_INPUT); + return -EOPNOTSUPP; +@@ -2527,8 +2447,7 @@ int ena_com_set_hash_ctrl(struct ena_com_dev *ena_dev) + (struct ena_admin_acq_entry *)&resp, + sizeof(resp)); + if (unlikely(ret)) +- netdev_err(ena_dev->net_device, +- "Failed to set hash input. error: %d\n", ret); ++ netdev_err(ena_dev->net_device, "Failed to set hash input. error: %d\n", ret); + + return ret; + } +@@ -2605,8 +2524,7 @@ int ena_com_fill_hash_ctrl(struct ena_com_dev *ena_dev, + int rc; + + if (proto >= ENA_ADMIN_RSS_PROTO_NUM) { +- netdev_err(ena_dev->net_device, "Invalid proto num (%u)\n", +- proto); ++ netdev_err(ena_dev->net_device, "Invalid proto num (%u)\n", proto); + return -EINVAL; + } + +@@ -2658,8 +2576,7 @@ int ena_com_indirect_table_set(struct ena_com_dev *ena_dev) + struct ena_admin_set_feat_resp resp; + int ret; + +- if (!ena_com_check_supported_feature_id( +- ena_dev, ENA_ADMIN_RSS_INDIRECTION_TABLE_CONFIG)) { ++ if (!ena_com_check_supported_feature_id(ena_dev, ENA_ADMIN_RSS_INDIRECTION_TABLE_CONFIG)) { + netdev_dbg(ena_dev->net_device, "Feature %d isn't supported\n", + ENA_ADMIN_RSS_INDIRECTION_TABLE_CONFIG); + return -EOPNOTSUPP; +@@ -2699,8 +2616,7 @@ int ena_com_indirect_table_set(struct ena_com_dev *ena_dev) + sizeof(resp)); + + if (unlikely(ret)) +- netdev_err(ena_dev->net_device, +- "Failed to set indirect table. error: %d\n", ret); ++ netdev_err(ena_dev->net_device, "Failed to set indirect table. error: %d\n", ret); + + return ret; + } +@@ -2779,9 +2695,8 @@ int ena_com_allocate_host_info(struct ena_com_dev *ena_dev) + { + struct ena_host_attribute *host_attr = &ena_dev->host_attr; + +- host_attr->host_info = +- dma_alloc_coherent(ena_dev->dmadev, SZ_4K, +- &host_attr->host_info_dma_addr, GFP_KERNEL); ++ host_attr->host_info = dma_alloc_coherent(ena_dev->dmadev, SZ_4K, ++ &host_attr->host_info_dma_addr, GFP_KERNEL); + if (unlikely(!host_attr->host_info)) + return -ENOMEM; + +@@ -2827,8 +2742,7 @@ void ena_com_delete_debug_area(struct ena_com_dev *ena_dev) + + if (host_attr->debug_area_virt_addr) { + dma_free_coherent(ena_dev->dmadev, host_attr->debug_area_size, +- host_attr->debug_area_virt_addr, +- host_attr->debug_area_dma_addr); ++ host_attr->debug_area_virt_addr, host_attr->debug_area_dma_addr); + host_attr->debug_area_virt_addr = NULL; + } + } +@@ -2877,8 +2791,7 @@ int ena_com_set_host_attributes(struct ena_com_dev *ena_dev) + sizeof(resp)); + + if (unlikely(ret)) +- netdev_err(ena_dev->net_device, +- "Failed to set host attributes: %d\n", ret); ++ netdev_err(ena_dev->net_device, "Failed to set host attributes: %d\n", ret); + + return ret; + } +@@ -2896,8 +2809,7 @@ static int ena_com_update_nonadaptive_moderation_interval(struct ena_com_dev *en + u32 *intr_moder_interval) + { + if (!intr_delay_resolution) { +- netdev_err(ena_dev->net_device, +- "Illegal interrupt delay granularity value\n"); ++ netdev_err(ena_dev->net_device, "Illegal interrupt delay granularity value\n"); + return -EFAULT; + } + +@@ -2935,14 +2847,12 @@ int ena_com_init_interrupt_moderation(struct ena_com_dev *ena_dev) + + if (rc) { + if (rc == -EOPNOTSUPP) { +- netdev_dbg(ena_dev->net_device, +- "Feature %d isn't supported\n", ++ netdev_dbg(ena_dev->net_device, "Feature %d isn't supported\n", + ENA_ADMIN_INTERRUPT_MODERATION); + rc = 0; + } else { + netdev_err(ena_dev->net_device, +- "Failed to get interrupt moderation admin cmd. rc: %d\n", +- rc); ++ "Failed to get interrupt moderation admin cmd. rc: %d\n", rc); + } + + /* no moderation supported, disable adaptive support */ +@@ -2990,8 +2900,7 @@ int ena_com_config_dev_mode(struct ena_com_dev *ena_dev, + (llq_info->descs_num_before_header * sizeof(struct ena_eth_io_tx_desc)); + + if (unlikely(ena_dev->tx_max_header_size == 0)) { +- netdev_err(ena_dev->net_device, +- "The size of the LLQ entry is smaller than needed\n"); ++ netdev_err(ena_dev->net_device, "The size of the LLQ entry is smaller than needed\n"); + return -EINVAL; + } + +diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.c b/drivers/net/ethernet/amazon/ena/ena_eth_com.c +index f9f886289b970..933e619b3a313 100644 +--- a/drivers/net/ethernet/amazon/ena/ena_eth_com.c ++++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.c +@@ -18,8 +18,7 @@ static struct ena_eth_io_rx_cdesc_base *ena_com_get_next_rx_cdesc( + cdesc = (struct ena_eth_io_rx_cdesc_base *)(io_cq->cdesc_addr.virt_addr + + (head_masked * io_cq->cdesc_entry_size_in_bytes)); + +- desc_phase = (READ_ONCE(cdesc->status) & +- ENA_ETH_IO_RX_CDESC_BASE_PHASE_MASK) >> ++ desc_phase = (READ_ONCE(cdesc->status) & ENA_ETH_IO_RX_CDESC_BASE_PHASE_MASK) >> + ENA_ETH_IO_RX_CDESC_BASE_PHASE_SHIFT; + + if (desc_phase != expected_phase) +@@ -65,8 +64,8 @@ static int ena_com_write_bounce_buffer_to_dev(struct ena_com_io_sq *io_sq, + + io_sq->entries_in_tx_burst_left--; + netdev_dbg(ena_com_io_sq_to_ena_dev(io_sq)->net_device, +- "Decreasing entries_in_tx_burst_left of queue %d to %d\n", +- io_sq->qid, io_sq->entries_in_tx_burst_left); ++ "Decreasing entries_in_tx_burst_left of queue %d to %d\n", io_sq->qid, ++ io_sq->entries_in_tx_burst_left); + } + + /* Make sure everything was written into the bounce buffer before +@@ -75,8 +74,8 @@ static int ena_com_write_bounce_buffer_to_dev(struct ena_com_io_sq *io_sq, + wmb(); + + /* The line is completed. Copy it to dev */ +- __iowrite64_copy(io_sq->desc_addr.pbuf_dev_addr + dst_offset, +- bounce_buffer, (llq_info->desc_list_entry_size) / 8); ++ __iowrite64_copy(io_sq->desc_addr.pbuf_dev_addr + dst_offset, bounce_buffer, ++ (llq_info->desc_list_entry_size) / 8); + + io_sq->tail++; + +@@ -102,16 +101,14 @@ static int ena_com_write_header_to_bounce(struct ena_com_io_sq *io_sq, + header_offset = + llq_info->descs_num_before_header * io_sq->desc_entry_size; + +- if (unlikely((header_offset + header_len) > +- llq_info->desc_list_entry_size)) { ++ if (unlikely((header_offset + header_len) > llq_info->desc_list_entry_size)) { + netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device, + "Trying to write header larger than llq entry can accommodate\n"); + return -EFAULT; + } + + if (unlikely(!bounce_buffer)) { +- netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device, +- "Bounce buffer is NULL\n"); ++ netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device, "Bounce buffer is NULL\n"); + return -EFAULT; + } + +@@ -129,8 +126,7 @@ static void *get_sq_desc_llq(struct ena_com_io_sq *io_sq) + bounce_buffer = pkt_ctrl->curr_bounce_buf; + + if (unlikely(!bounce_buffer)) { +- netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device, +- "Bounce buffer is NULL\n"); ++ netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device, "Bounce buffer is NULL\n"); + return NULL; + } + +@@ -247,8 +243,7 @@ static u16 ena_com_cdesc_rx_pkt_get(struct ena_com_io_cq *io_cq, + + ena_com_cq_inc_head(io_cq); + count++; +- last = (READ_ONCE(cdesc->status) & +- ENA_ETH_IO_RX_CDESC_BASE_LAST_MASK) >> ++ last = (READ_ONCE(cdesc->status) & ENA_ETH_IO_RX_CDESC_BASE_LAST_MASK) >> + ENA_ETH_IO_RX_CDESC_BASE_LAST_SHIFT; + } while (!last); + +@@ -369,9 +364,8 @@ static void ena_com_rx_set_flags(struct ena_com_io_cq *io_cq, + + netdev_dbg(ena_com_io_cq_to_ena_dev(io_cq)->net_device, + "l3_proto %d l4_proto %d l3_csum_err %d l4_csum_err %d hash %d frag %d cdesc_status %x\n", +- ena_rx_ctx->l3_proto, ena_rx_ctx->l4_proto, +- ena_rx_ctx->l3_csum_err, ena_rx_ctx->l4_csum_err, +- ena_rx_ctx->hash, ena_rx_ctx->frag, cdesc->status); ++ ena_rx_ctx->l3_proto, ena_rx_ctx->l4_proto, ena_rx_ctx->l3_csum_err, ++ ena_rx_ctx->l4_csum_err, ena_rx_ctx->hash, ena_rx_ctx->frag, cdesc->status); + } + + /*****************************************************************************/ +@@ -403,13 +397,12 @@ int ena_com_prepare_tx(struct ena_com_io_sq *io_sq, + + if (unlikely(header_len > io_sq->tx_max_header_size)) { + netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device, +- "Header size is too large %d max header: %d\n", +- header_len, io_sq->tx_max_header_size); ++ "Header size is too large %d max header: %d\n", header_len, ++ io_sq->tx_max_header_size); + return -EINVAL; + } + +- if (unlikely(io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV && +- !buffer_to_push)) { ++ if (unlikely(io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV && !buffer_to_push)) { + netdev_err(ena_com_io_sq_to_ena_dev(io_sq)->net_device, + "Push header wasn't provided in LLQ mode\n"); + return -EINVAL; +@@ -556,13 +549,11 @@ int ena_com_rx_pkt(struct ena_com_io_cq *io_cq, + } + + netdev_dbg(ena_com_io_cq_to_ena_dev(io_cq)->net_device, +- "Fetch rx packet: queue %d completed desc: %d\n", io_cq->qid, +- nb_hw_desc); ++ "Fetch rx packet: queue %d completed desc: %d\n", io_cq->qid, nb_hw_desc); + + if (unlikely(nb_hw_desc > ena_rx_ctx->max_bufs)) { + netdev_err(ena_com_io_cq_to_ena_dev(io_cq)->net_device, +- "Too many RX cdescs (%d) > MAX(%d)\n", nb_hw_desc, +- ena_rx_ctx->max_bufs); ++ "Too many RX cdescs (%d) > MAX(%d)\n", nb_hw_desc, ena_rx_ctx->max_bufs); + return -ENOSPC; + } + +@@ -586,8 +577,8 @@ int ena_com_rx_pkt(struct ena_com_io_cq *io_cq, + io_sq->next_to_comp += nb_hw_desc; + + netdev_dbg(ena_com_io_cq_to_ena_dev(io_cq)->net_device, +- "[%s][QID#%d] Updating SQ head to: %d\n", __func__, +- io_sq->qid, io_sq->next_to_comp); ++ "[%s][QID#%d] Updating SQ head to: %d\n", __func__, io_sq->qid, ++ io_sq->next_to_comp); + + /* Get rx flags from the last pkt */ + ena_com_rx_set_flags(io_cq, ena_rx_ctx, cdesc); +@@ -624,8 +615,8 @@ int ena_com_add_single_rx_desc(struct ena_com_io_sq *io_sq, + desc->req_id = req_id; + + netdev_dbg(ena_com_io_sq_to_ena_dev(io_sq)->net_device, +- "[%s] Adding single RX desc, Queue: %u, req_id: %u\n", +- __func__, io_sq->qid, req_id); ++ "[%s] Adding single RX desc, Queue: %u, req_id: %u\n", __func__, io_sq->qid, ++ req_id); + + desc->buff_addr_lo = (u32)ena_buf->paddr; + desc->buff_addr_hi = +diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.h b/drivers/net/ethernet/amazon/ena/ena_eth_com.h +index 372b259279eca..6eba034646525 100644 +--- a/drivers/net/ethernet/amazon/ena/ena_eth_com.h ++++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.h +@@ -145,8 +145,8 @@ static inline bool ena_com_is_doorbell_needed(struct ena_com_io_sq *io_sq, + } + + netdev_dbg(ena_com_io_sq_to_ena_dev(io_sq)->net_device, +- "Queue: %d num_descs: %d num_entries_needed: %d\n", +- io_sq->qid, num_descs, num_entries_needed); ++ "Queue: %d num_descs: %d num_entries_needed: %d\n", io_sq->qid, num_descs, ++ num_entries_needed); + + return num_entries_needed > io_sq->entries_in_tx_burst_left; + } +@@ -157,15 +157,14 @@ static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq) + u16 tail = io_sq->tail; + + netdev_dbg(ena_com_io_sq_to_ena_dev(io_sq)->net_device, +- "Write submission queue doorbell for queue: %d tail: %d\n", +- io_sq->qid, tail); ++ "Write submission queue doorbell for queue: %d tail: %d\n", io_sq->qid, tail); + + writel(tail, io_sq->db_addr); + + if (is_llq_max_tx_burst_exists(io_sq)) { + netdev_dbg(ena_com_io_sq_to_ena_dev(io_sq)->net_device, +- "Reset available entries in tx burst for queue %d to %d\n", +- io_sq->qid, max_entries_in_tx_burst); ++ "Reset available entries in tx burst for queue %d to %d\n", io_sq->qid, ++ max_entries_in_tx_burst); + io_sq->entries_in_tx_burst_left = max_entries_in_tx_burst; + } + +@@ -248,8 +247,8 @@ static inline int ena_com_tx_comp_req_id_get(struct ena_com_io_cq *io_cq, + + *req_id = READ_ONCE(cdesc->req_id); + if (unlikely(*req_id >= io_cq->q_depth)) { +- netdev_err(ena_com_io_cq_to_ena_dev(io_cq)->net_device, +- "Invalid req id %d\n", cdesc->req_id); ++ netdev_err(ena_com_io_cq_to_ena_dev(io_cq)->net_device, "Invalid req id %d\n", ++ cdesc->req_id); + return -EINVAL; + } + +diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c +index 95ed32542edfe..e343e0ae8ffda 100644 +--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -116,11 +116,9 @@ int ena_xmit_common(struct ena_adapter *adapter, + if (unlikely(rc)) { + netif_err(adapter, tx_queued, adapter->netdev, + "Failed to prepare tx bufs\n"); +- ena_increase_stat(&ring->tx_stats.prepare_ctx_err, 1, +- &ring->syncp); ++ ena_increase_stat(&ring->tx_stats.prepare_ctx_err, 1, &ring->syncp); + if (rc != -ENOMEM) +- ena_reset_device(adapter, +- ENA_REGS_RESET_DRIVER_INVALID_STATE); ++ ena_reset_device(adapter, ENA_REGS_RESET_DRIVER_INVALID_STATE); + return rc; + } + +@@ -485,8 +483,7 @@ static struct page *ena_alloc_map_page(struct ena_ring *rx_ring, + */ + page = dev_alloc_page(); + if (!page) { +- ena_increase_stat(&rx_ring->rx_stats.page_alloc_fail, 1, +- &rx_ring->syncp); ++ ena_increase_stat(&rx_ring->rx_stats.page_alloc_fail, 1, &rx_ring->syncp); + return ERR_PTR(-ENOSPC); + } + +@@ -545,8 +542,8 @@ static void ena_unmap_rx_buff_attrs(struct ena_ring *rx_ring, + struct ena_rx_buffer *rx_info, + unsigned long attrs) + { +- dma_unmap_page_attrs(rx_ring->dev, rx_info->dma_addr, ENA_PAGE_SIZE, +- DMA_BIDIRECTIONAL, attrs); ++ dma_unmap_page_attrs(rx_ring->dev, rx_info->dma_addr, ENA_PAGE_SIZE, DMA_BIDIRECTIONAL, ++ attrs); + } + + static void ena_free_rx_page(struct ena_ring *rx_ring, +@@ -827,8 +824,7 @@ static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget) + &req_id); + if (rc) { + if (unlikely(rc == -EINVAL)) +- handle_invalid_req_id(tx_ring, req_id, NULL, +- false); ++ handle_invalid_req_id(tx_ring, req_id, NULL, false); + break; + } + +@@ -1054,8 +1050,7 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, + DMA_FROM_DEVICE); + + if (!reuse_rx_buf_page) +- ena_unmap_rx_buff_attrs(rx_ring, rx_info, +- DMA_ATTR_SKIP_CPU_SYNC); ++ ena_unmap_rx_buff_attrs(rx_ring, rx_info, DMA_ATTR_SKIP_CPU_SYNC); + + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page, + page_offset + buf_offset, len, buf_len); +@@ -1328,8 +1323,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, + adapter = netdev_priv(rx_ring->netdev); + + if (rc == -ENOSPC) { +- ena_increase_stat(&rx_ring->rx_stats.bad_desc_num, 1, +- &rx_ring->syncp); ++ ena_increase_stat(&rx_ring->rx_stats.bad_desc_num, 1, &rx_ring->syncp); + ena_reset_device(adapter, ENA_REGS_RESET_TOO_MANY_RX_DESCS); + } else { + ena_increase_stat(&rx_ring->rx_stats.bad_req_id, 1, +@@ -1819,8 +1813,7 @@ static int ena_rss_configure(struct ena_adapter *adapter) + if (!ena_dev->rss.tbl_log_size) { + rc = ena_rss_init_default(adapter); + if (rc && (rc != -EOPNOTSUPP)) { +- netif_err(adapter, ifup, adapter->netdev, +- "Failed to init RSS rc: %d\n", rc); ++ netif_err(adapter, ifup, adapter->netdev, "Failed to init RSS rc: %d\n", rc); + return rc; + } + } +@@ -2756,8 +2749,7 @@ static void ena_config_debug_area(struct ena_adapter *adapter) + rc = ena_com_set_host_attributes(adapter->ena_dev); + if (rc) { + if (rc == -EOPNOTSUPP) +- netif_warn(adapter, drv, adapter->netdev, +- "Cannot set host attributes\n"); ++ netif_warn(adapter, drv, adapter->netdev, "Cannot set host attributes\n"); + else + netif_err(adapter, drv, adapter->netdev, + "Cannot set host attributes\n"); +@@ -3756,8 +3748,8 @@ static int ena_rss_init_default(struct ena_adapter *adapter) + } + } + +- rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_TOEPLITZ, NULL, +- ENA_HASH_KEY_SIZE, 0xFFFFFFFF); ++ rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_TOEPLITZ, NULL, ENA_HASH_KEY_SIZE, ++ 0xFFFFFFFF); + if (unlikely(rc && (rc != -EOPNOTSUPP))) { + dev_err(dev, "Cannot fill hash function\n"); + goto err_fill_indir; +-- +2.43.0 + diff --git a/queue-6.8/net-fec-add-fec_enet_deinit.patch b/queue-6.8/net-fec-add-fec_enet_deinit.patch new file mode 100644 index 00000000000..982b314ea78 --- /dev/null +++ b/queue-6.8/net-fec-add-fec_enet_deinit.patch @@ -0,0 +1,63 @@ +From bab53ba1c6481c1a020f17cb482aef8a884f83a4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 24 May 2024 13:05:28 +0800 +Subject: net:fec: Add fec_enet_deinit() + +From: Xiaolei Wang + +[ Upstream commit bf0497f53c8535f99b72041529d3f7708a6e2c0d ] + +When fec_probe() fails or fec_drv_remove() needs to release the +fec queue and remove a NAPI context, therefore add a function +corresponding to fec_enet_init() and call fec_enet_deinit() which +does the opposite to release memory and remove a NAPI context. + +Fixes: 59d0f7465644 ("net: fec: init multi queue date structure") +Signed-off-by: Xiaolei Wang +Reviewed-by: Wei Fang +Reviewed-by: Andrew Lunn +Link: https://lore.kernel.org/r/20240524050528.4115581-1-xiaolei.wang@windriver.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/freescale/fec_main.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c +index e92a830330590..eec38f1e17360 100644 +--- a/drivers/net/ethernet/freescale/fec_main.c ++++ b/drivers/net/ethernet/freescale/fec_main.c +@@ -4134,6 +4134,14 @@ static int fec_enet_init(struct net_device *ndev) + return ret; + } + ++static void fec_enet_deinit(struct net_device *ndev) ++{ ++ struct fec_enet_private *fep = netdev_priv(ndev); ++ ++ netif_napi_del(&fep->napi); ++ fec_enet_free_queue(ndev); ++} ++ + #ifdef CONFIG_OF + static int fec_reset_phy(struct platform_device *pdev) + { +@@ -4528,6 +4536,7 @@ fec_probe(struct platform_device *pdev) + fec_enet_mii_remove(fep); + failed_mii_init: + failed_irq: ++ fec_enet_deinit(ndev); + failed_init: + fec_ptp_stop(pdev); + failed_reset: +@@ -4591,6 +4600,7 @@ fec_drv_remove(struct platform_device *pdev) + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_disable(&pdev->dev); + ++ fec_enet_deinit(ndev); + free_netdev(ndev); + } + +-- +2.43.0 + diff --git a/queue-6.8/net-fix-__dst_negative_advice-race.patch b/queue-6.8/net-fix-__dst_negative_advice-race.patch new file mode 100644 index 00000000000..f4752e9d801 --- /dev/null +++ b/queue-6.8/net-fix-__dst_negative_advice-race.patch @@ -0,0 +1,206 @@ +From 3404ff41b808130d00a6b5ff72d5fc1b9cc6f29b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 28 May 2024 11:43:53 +0000 +Subject: net: fix __dst_negative_advice() race + +From: Eric Dumazet + +[ Upstream commit 92f1655aa2b2294d0b49925f3b875a634bd3b59e ] + +__dst_negative_advice() does not enforce proper RCU rules when +sk->dst_cache must be cleared, leading to possible UAF. + +RCU rules are that we must first clear sk->sk_dst_cache, +then call dst_release(old_dst). + +Note that sk_dst_reset(sk) is implementing this protocol correctly, +while __dst_negative_advice() uses the wrong order. + +Given that ip6_negative_advice() has special logic +against RTF_CACHE, this means each of the three ->negative_advice() +existing methods must perform the sk_dst_reset() themselves. + +Note the check against NULL dst is centralized in +__dst_negative_advice(), there is no need to duplicate +it in various callbacks. + +Many thanks to Clement Lecigne for tracking this issue. + +This old bug became visible after the blamed commit, using UDP sockets. + +Fixes: a87cb3e48ee8 ("net: Facility to report route quality of connected sockets") +Reported-by: Clement Lecigne +Diagnosed-by: Clement Lecigne +Signed-off-by: Eric Dumazet +Cc: Tom Herbert +Reviewed-by: David Ahern +Link: https://lore.kernel.org/r/20240528114353.1794151-1-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + include/net/dst_ops.h | 2 +- + include/net/sock.h | 13 +++---------- + net/ipv4/route.c | 22 ++++++++-------------- + net/ipv6/route.c | 29 +++++++++++++++-------------- + net/xfrm/xfrm_policy.c | 11 +++-------- + 5 files changed, 30 insertions(+), 47 deletions(-) + +diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h +index 6d1c8541183db..3a9001a042a5c 100644 +--- a/include/net/dst_ops.h ++++ b/include/net/dst_ops.h +@@ -24,7 +24,7 @@ struct dst_ops { + void (*destroy)(struct dst_entry *); + void (*ifdown)(struct dst_entry *, + struct net_device *dev); +- struct dst_entry * (*negative_advice)(struct dst_entry *); ++ void (*negative_advice)(struct sock *sk, struct dst_entry *); + void (*link_failure)(struct sk_buff *); + void (*update_pmtu)(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu, +diff --git a/include/net/sock.h b/include/net/sock.h +index 54a7967613348..afb5bcf217ab8 100644 +--- a/include/net/sock.h ++++ b/include/net/sock.h +@@ -2155,17 +2155,10 @@ sk_dst_get(const struct sock *sk) + + static inline void __dst_negative_advice(struct sock *sk) + { +- struct dst_entry *ndst, *dst = __sk_dst_get(sk); ++ struct dst_entry *dst = __sk_dst_get(sk); + +- if (dst && dst->ops->negative_advice) { +- ndst = dst->ops->negative_advice(dst); +- +- if (ndst != dst) { +- rcu_assign_pointer(sk->sk_dst_cache, ndst); +- sk_tx_queue_clear(sk); +- WRITE_ONCE(sk->sk_dst_pending_confirm, 0); +- } +- } ++ if (dst && dst->ops->negative_advice) ++ dst->ops->negative_advice(sk, dst); + } + + static inline void dst_negative_advice(struct sock *sk) +diff --git a/net/ipv4/route.c b/net/ipv4/route.c +index 6d6e1c3e67d8f..cab30af5be348 100644 +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -132,7 +132,8 @@ struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); + static unsigned int ipv4_default_advmss(const struct dst_entry *dst); + INDIRECT_CALLABLE_SCOPE + unsigned int ipv4_mtu(const struct dst_entry *dst); +-static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); ++static void ipv4_negative_advice(struct sock *sk, ++ struct dst_entry *dst); + static void ipv4_link_failure(struct sk_buff *skb); + static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu, +@@ -837,22 +838,15 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf + __ip_do_redirect(rt, skb, &fl4, true); + } + +-static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) ++static void ipv4_negative_advice(struct sock *sk, ++ struct dst_entry *dst) + { + struct rtable *rt = dst_rtable(dst); +- struct dst_entry *ret = dst; + +- if (rt) { +- if (dst->obsolete > 0) { +- ip_rt_put(rt); +- ret = NULL; +- } else if ((rt->rt_flags & RTCF_REDIRECTED) || +- rt->dst.expires) { +- ip_rt_put(rt); +- ret = NULL; +- } +- } +- return ret; ++ if ((dst->obsolete > 0) || ++ (rt->rt_flags & RTCF_REDIRECTED) || ++ rt->dst.expires) ++ sk_dst_reset(sk); + } + + /* +diff --git a/net/ipv6/route.c b/net/ipv6/route.c +index 0d6dd1f0c51eb..a28246192aeea 100644 +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -87,7 +87,8 @@ struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); + static unsigned int ip6_default_advmss(const struct dst_entry *dst); + INDIRECT_CALLABLE_SCOPE + unsigned int ip6_mtu(const struct dst_entry *dst); +-static struct dst_entry *ip6_negative_advice(struct dst_entry *); ++static void ip6_negative_advice(struct sock *sk, ++ struct dst_entry *dst); + static void ip6_dst_destroy(struct dst_entry *); + static void ip6_dst_ifdown(struct dst_entry *, + struct net_device *dev); +@@ -2760,24 +2761,24 @@ INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst, + } + EXPORT_INDIRECT_CALLABLE(ip6_dst_check); + +-static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) ++static void ip6_negative_advice(struct sock *sk, ++ struct dst_entry *dst) + { + struct rt6_info *rt = dst_rt6_info(dst); + +- if (rt) { +- if (rt->rt6i_flags & RTF_CACHE) { +- rcu_read_lock(); +- if (rt6_check_expired(rt)) { +- rt6_remove_exception_rt(rt); +- dst = NULL; +- } +- rcu_read_unlock(); +- } else { +- dst_release(dst); +- dst = NULL; ++ if (rt->rt6i_flags & RTF_CACHE) { ++ rcu_read_lock(); ++ if (rt6_check_expired(rt)) { ++ /* counteract the dst_release() in sk_dst_reset() */ ++ dst_hold(dst); ++ sk_dst_reset(sk); ++ ++ rt6_remove_exception_rt(rt); + } ++ rcu_read_unlock(); ++ return; + } +- return dst; ++ sk_dst_reset(sk); + } + + static void ip6_link_failure(struct sk_buff *skb) +diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c +index 55db22473e3eb..947a6e695b296 100644 +--- a/net/xfrm/xfrm_policy.c ++++ b/net/xfrm/xfrm_policy.c +@@ -3764,15 +3764,10 @@ static void xfrm_link_failure(struct sk_buff *skb) + /* Impossible. Such dst must be popped before reaches point of failure. */ + } + +-static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) ++static void xfrm_negative_advice(struct sock *sk, struct dst_entry *dst) + { +- if (dst) { +- if (dst->obsolete) { +- dst_release(dst); +- dst = NULL; +- } +- } +- return dst; ++ if (dst->obsolete) ++ sk_dst_reset(sk); + } + + static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr) +-- +2.43.0 + diff --git a/queue-6.8/net-micrel-fix-lan8841_config_intr-after-getting-out.patch b/queue-6.8/net-micrel-fix-lan8841_config_intr-after-getting-out.patch new file mode 100644 index 00000000000..9ecaa203957 --- /dev/null +++ b/queue-6.8/net-micrel-fix-lan8841_config_intr-after-getting-out.patch @@ -0,0 +1,62 @@ +From 4c543ad8fd3c0857794afe5289566186802710a5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 24 May 2024 10:53:50 +0200 +Subject: net: micrel: Fix lan8841_config_intr after getting out of sleep mode + +From: Horatiu Vultur + +[ Upstream commit 4fb679040d9f758eeb3b4d01bbde6405bf20e64e ] + +When the interrupt is enabled, the function lan8841_config_intr tries to +clear any pending interrupts by reading the interrupt status, then +checks the return value for errors and then continue to enable the +interrupt. It has been seen that once the system gets out of sleep mode, +the interrupt status has the value 0x400 meaning that the PHY detected +that the link was in low power. That is correct value but the problem is +that the check is wrong. We try to check for errors but we return an +error also in this case which is not an error. Therefore fix this by +returning only when there is an error. + +Fixes: a8f1a19d27ef ("net: micrel: Add support for lan8841 PHY") +Signed-off-by: Horatiu Vultur +Reviewed-by: Suman Ghosh +Reviewed-by: Andrew Lunn +Reviewed-by: Russell King (Oracle) +Link: https://lore.kernel.org/r/20240524085350.359812-1-horatiu.vultur@microchip.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/phy/micrel.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c +index 25041b3465734..37bf61eeafffe 100644 +--- a/drivers/net/phy/micrel.c ++++ b/drivers/net/phy/micrel.c +@@ -3475,7 +3475,7 @@ static int lan8841_config_intr(struct phy_device *phydev) + + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { + err = phy_read(phydev, LAN8814_INTS); +- if (err) ++ if (err < 0) + return err; + + /* Enable / disable interrupts. It is OK to enable PTP interrupt +@@ -3491,6 +3491,14 @@ static int lan8841_config_intr(struct phy_device *phydev) + return err; + + err = phy_read(phydev, LAN8814_INTS); ++ if (err < 0) ++ return err; ++ ++ /* Getting a positive value doesn't mean that is an error, it ++ * just indicates what was the status. Therefore make sure to ++ * clear the value and say that there is no error. ++ */ ++ err = 0; + } + + return err; +-- +2.43.0 + diff --git a/queue-6.8/net-mlx5-fix-mtmp-register-capability-offset-in-mcam.patch b/queue-6.8/net-mlx5-fix-mtmp-register-capability-offset-in-mcam.patch new file mode 100644 index 00000000000..4415908f106 --- /dev/null +++ b/queue-6.8/net-mlx5-fix-mtmp-register-capability-offset-in-mcam.patch @@ -0,0 +1,42 @@ +From 4a90585d81729743dd56e2ff1b9931e07b5ec415 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 22 May 2024 22:26:54 +0300 +Subject: net/mlx5: Fix MTMP register capability offset in MCAM register + +From: Gal Pressman + +[ Upstream commit 1b9f86c6d53245dab087f1b2c05727b5982142ff ] + +The MTMP register (0x900a) capability offset is off-by-one, move it to +the right place. + +Fixes: 1f507e80c700 ("net/mlx5: Expose NIC temperature via hardware monitoring kernel API") +Signed-off-by: Gal Pressman +Reviewed-by: Cosmin Ratiu +Signed-off-by: Tariq Toukan +Reviewed-by: Simon Horman +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + include/linux/mlx5/mlx5_ifc.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h +index 486b7492050c3..d2c27a7227bb4 100644 +--- a/include/linux/mlx5/mlx5_ifc.h ++++ b/include/linux/mlx5/mlx5_ifc.h +@@ -10265,9 +10265,9 @@ struct mlx5_ifc_mcam_access_reg_bits { + u8 mfrl[0x1]; + u8 regs_39_to_32[0x8]; + +- u8 regs_31_to_10[0x16]; ++ u8 regs_31_to_11[0x15]; + u8 mtmp[0x1]; +- u8 regs_8_to_0[0x9]; ++ u8 regs_9_to_0[0xa]; + }; + + struct mlx5_ifc_mcam_access_reg_bits1 { +-- +2.43.0 + diff --git a/queue-6.8/net-mlx5-lag-do-bond-only-if-slaves-agree-on-roce-st.patch b/queue-6.8/net-mlx5-lag-do-bond-only-if-slaves-agree-on-roce-st.patch new file mode 100644 index 00000000000..69625892724 --- /dev/null +++ b/queue-6.8/net-mlx5-lag-do-bond-only-if-slaves-agree-on-roce-st.patch @@ -0,0 +1,74 @@ +From bcf2fcaf60a5ed88fab38bf81c3e414c40a35c97 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 22 May 2024 22:26:52 +0300 +Subject: net/mlx5: Lag, do bond only if slaves agree on roce state +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Maher Sanalla + +[ Upstream commit 51ef9305b8f40946d65c40368ffb4c14636d369a ] + +Currently, the driver does not enforce that lag bond slaves must have +matching roce capabilities. Yet, in mlx5_do_bond(), the driver attempts +to enable roce on all vports of the bond slaves, causing the following +syndrome when one slave has no roce fw support: + +mlx5_cmd_out_err:809:(pid 25427): MODIFY_NIC_VPORT_CONTEXT(0×755) op_mod(0×0) +failed, status bad parameter(0×3), syndrome (0xc1f678), err(-22) + +Thus, create HW lag only if bond's slaves agree on roce state, +either all slaves have roce support resulting in a roce lag bond, +or none do, resulting in a raw eth bond. + +Fixes: 7907f23adc18 ("net/mlx5: Implement RoCE LAG feature") +Signed-off-by: Maher Sanalla +Signed-off-by: Tariq Toukan +Reviewed-by: Simon Horman +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +index 37598d116f3b8..58a452d20daf7 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +@@ -720,6 +720,7 @@ bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) + struct mlx5_core_dev *dev; + u8 mode; + #endif ++ bool roce_support; + int i; + + for (i = 0; i < ldev->ports; i++) +@@ -746,6 +747,11 @@ bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) + if (mlx5_sriov_is_enabled(ldev->pf[i].dev)) + return false; + #endif ++ roce_support = mlx5_get_roce_state(ldev->pf[MLX5_LAG_P1].dev); ++ for (i = 1; i < ldev->ports; i++) ++ if (mlx5_get_roce_state(ldev->pf[i].dev) != roce_support) ++ return false; ++ + return true; + } + +@@ -913,8 +919,10 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) + } else if (roce_lag) { + dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; + mlx5_rescan_drivers_locked(dev0); +- for (i = 1; i < ldev->ports; i++) +- mlx5_nic_vport_enable_roce(ldev->pf[i].dev); ++ for (i = 1; i < ldev->ports; i++) { ++ if (mlx5_get_roce_state(ldev->pf[i].dev)) ++ mlx5_nic_vport_enable_roce(ldev->pf[i].dev); ++ } + } else if (shared_fdb) { + int i; + +-- +2.43.0 + diff --git a/queue-6.8/net-mlx5-use-mlx5_ipsec_rx_status_destroy-to-correct.patch b/queue-6.8/net-mlx5-use-mlx5_ipsec_rx_status_destroy-to-correct.patch new file mode 100644 index 00000000000..f15ba849953 --- /dev/null +++ b/queue-6.8/net-mlx5-use-mlx5_ipsec_rx_status_destroy-to-correct.patch @@ -0,0 +1,70 @@ +From 9f48e8ff1b14b307bb6f3c38c402d81e0cebd4fe Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 22 May 2024 22:26:55 +0300 +Subject: net/mlx5: Use mlx5_ipsec_rx_status_destroy to correctly delete status + rules + +From: Rahul Rameshbabu + +[ Upstream commit 16d66a4fa81da07bc4ed19f4e53b87263c2f8d38 ] + +rx_create no longer allocates a modify_hdr instance that needs to be +cleaned up. The mlx5_modify_header_dealloc call will lead to a NULL pointer +dereference. A leak in the rules also previously occurred since there are +now two rules populated related to status. + + BUG: kernel NULL pointer dereference, address: 0000000000000000 + #PF: supervisor read access in kernel mode + #PF: error_code(0x0000) - not-present page + PGD 109907067 P4D 109907067 PUD 116890067 PMD 0 + Oops: 0000 [#1] SMP + CPU: 1 PID: 484 Comm: ip Not tainted 6.9.0-rc2-rrameshbabu+ #254 + Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS Arch Linux 1.16.3-1-1 04/01/2014 + RIP: 0010:mlx5_modify_header_dealloc+0xd/0x70 + + Call Trace: + + ? show_regs+0x60/0x70 + ? __die+0x24/0x70 + ? page_fault_oops+0x15f/0x430 + ? free_to_partial_list.constprop.0+0x79/0x150 + ? do_user_addr_fault+0x2c9/0x5c0 + ? exc_page_fault+0x63/0x110 + ? asm_exc_page_fault+0x27/0x30 + ? mlx5_modify_header_dealloc+0xd/0x70 + rx_create+0x374/0x590 + rx_add_rule+0x3ad/0x500 + ? rx_add_rule+0x3ad/0x500 + ? mlx5_cmd_exec+0x2c/0x40 + ? mlx5_create_ipsec_obj+0xd6/0x200 + mlx5e_accel_ipsec_fs_add_rule+0x31/0xf0 + mlx5e_xfrm_add_state+0x426/0xc00 + + +Fixes: 94af50c0a9bb ("net/mlx5e: Unify esw and normal IPsec status table creation/destruction") +Signed-off-by: Rahul Rameshbabu +Signed-off-by: Tariq Toukan +Reviewed-by: Simon Horman +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +index 41a2543a52cda..e51b03d4c717f 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +@@ -750,8 +750,7 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, + err_fs_ft: + if (rx->allow_tunnel_mode) + mlx5_eswitch_unblock_encap(mdev); +- mlx5_del_flow_rules(rx->status.rule); +- mlx5_modify_header_dealloc(mdev, rx->status.modify_hdr); ++ mlx5_ipsec_rx_status_destroy(ipsec, rx); + err_add: + mlx5_destroy_flow_table(rx->ft.status); + err_fs_ft_status: +-- +2.43.0 + diff --git a/queue-6.8/net-mlx5e-fix-ipsec-tunnel-mode-offload-feature-chec.patch b/queue-6.8/net-mlx5e-fix-ipsec-tunnel-mode-offload-feature-chec.patch new file mode 100644 index 00000000000..8ed2ffb20b8 --- /dev/null +++ b/queue-6.8/net-mlx5e-fix-ipsec-tunnel-mode-offload-feature-chec.patch @@ -0,0 +1,55 @@ +From d9ced53464fd9e533587039a43fe4f6243948c4e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 22 May 2024 22:26:56 +0300 +Subject: net/mlx5e: Fix IPsec tunnel mode offload feature check + +From: Rahul Rameshbabu + +[ Upstream commit 9a52f6d44f4521773b4699b4ed34b8e21d5a175c ] + +Remove faulty check disabling checksum offload and GSO for offload of +simple IPsec tunnel L4 traffic. Comment previously describing the deleted +code incorrectly claimed the check prevented double tunnel (or three layers +of ip headers). + +Fixes: f1267798c980 ("net/mlx5: Fix checksum issue of VXLAN and IPsec crypto offload") +Signed-off-by: Rahul Rameshbabu +Signed-off-by: Tariq Toukan +Reviewed-by: Simon Horman +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + .../mellanox/mlx5/core/en_accel/ipsec_rxtx.h | 17 +++++------------ + 1 file changed, 5 insertions(+), 12 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h +index 2ed99772f168a..e1a241d3b418c 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h +@@ -98,18 +98,11 @@ mlx5e_ipsec_feature_check(struct sk_buff *skb, netdev_features_t features) + if (!x || !x->xso.offload_handle) + goto out_disable; + +- if (xo->inner_ipproto) { +- /* Cannot support tunnel packet over IPsec tunnel mode +- * because we cannot offload three IP header csum +- */ +- if (x->props.mode == XFRM_MODE_TUNNEL) +- goto out_disable; +- +- /* Only support UDP or TCP L4 checksum */ +- if (xo->inner_ipproto != IPPROTO_UDP && +- xo->inner_ipproto != IPPROTO_TCP) +- goto out_disable; +- } ++ /* Only support UDP or TCP L4 checksum */ ++ if (xo->inner_ipproto && ++ xo->inner_ipproto != IPPROTO_UDP && ++ xo->inner_ipproto != IPPROTO_TCP) ++ goto out_disable; + + return features; + +-- +2.43.0 + diff --git a/queue-6.8/net-mlx5e-fix-udp-gso-for-encapsulated-packets.patch b/queue-6.8/net-mlx5e-fix-udp-gso-for-encapsulated-packets.patch new file mode 100644 index 00000000000..c2693f3e7cc --- /dev/null +++ b/queue-6.8/net-mlx5e-fix-udp-gso-for-encapsulated-packets.patch @@ -0,0 +1,68 @@ +From e0763d08d62c49e9df8ba7db0e7c0976f9026e53 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 22 May 2024 22:26:59 +0300 +Subject: net/mlx5e: Fix UDP GSO for encapsulated packets + +From: Gal Pressman + +[ Upstream commit 83fea49f2711fc90c0d115b0ed04046b45155b65 ] + +When the skb is encapsulated, adjust the inner UDP header instead of the +outer one, and account for UDP header (instead of TCP) in the inline +header size calculation. + +Fixes: 689adf0d4892 ("net/mlx5e: Add UDP GSO support") +Reported-by: Jason Baron +Closes: https://lore.kernel.org/netdev/c42961cb-50b9-4a9a-bd43-87fe48d88d29@akamai.com/ +Signed-off-by: Gal Pressman +Reviewed-by: Dragos Tatulea +Reviewed-by: Boris Pismenny +Signed-off-by: Tariq Toukan +Reviewed-by: Simon Horman +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + .../net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h | 8 +++++++- + drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 6 +++++- + 2 files changed, 12 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h +index caa34b9c161e5..33e32584b07f5 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h +@@ -102,8 +102,14 @@ static inline void + mlx5e_udp_gso_handle_tx_skb(struct sk_buff *skb) + { + int payload_len = skb_shinfo(skb)->gso_size + sizeof(struct udphdr); ++ struct udphdr *udphdr; + +- udp_hdr(skb)->len = htons(payload_len); ++ if (skb->encapsulation) ++ udphdr = (struct udphdr *)skb_inner_transport_header(skb); ++ else ++ udphdr = udp_hdr(skb); ++ ++ udphdr->len = htons(payload_len); + } + + struct mlx5e_accel_tx_state { +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +index e21a3b4128ce8..0964b16ca5619 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +@@ -153,7 +153,11 @@ mlx5e_tx_get_gso_ihs(struct mlx5e_txqsq *sq, struct sk_buff *skb, int *hopbyhop) + + *hopbyhop = 0; + if (skb->encapsulation) { +- ihs = skb_inner_tcp_all_headers(skb); ++ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ++ ihs = skb_inner_transport_offset(skb) + ++ sizeof(struct udphdr); ++ else ++ ihs = skb_inner_tcp_all_headers(skb); + stats->tso_inner_packets++; + stats->tso_inner_bytes += skb->len - ihs; + } else { +-- +2.43.0 + diff --git a/queue-6.8/net-mlx5e-use-rx_missed_errors-instead-of-rx_dropped.patch b/queue-6.8/net-mlx5e-use-rx_missed_errors-instead-of-rx_dropped.patch new file mode 100644 index 00000000000..ed177a73a89 --- /dev/null +++ b/queue-6.8/net-mlx5e-use-rx_missed_errors-instead-of-rx_dropped.patch @@ -0,0 +1,46 @@ +From baeb28aea7e825b3eaf7cfba0fdc19e5010016d3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 22 May 2024 22:26:58 +0300 +Subject: net/mlx5e: Use rx_missed_errors instead of rx_dropped for reporting + buffer exhaustion + +From: Carolina Jubran + +[ Upstream commit 5c74195d5dd977e97556e6fa76909b831c241230 ] + +Previously, the driver incorrectly used rx_dropped to report device +buffer exhaustion. + +According to the documentation, rx_dropped should not be used to count +packets dropped due to buffer exhaustion, which is the purpose of +rx_missed_errors. + +Use rx_missed_errors as intended for counting packets dropped due to +buffer exhaustion. + +Fixes: 269e6b3af3bf ("net/mlx5e: Report additional error statistics in get stats ndo") +Signed-off-by: Carolina Jubran +Signed-off-by: Tariq Toukan +Reviewed-by: Simon Horman +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +index 952f1f98138cc..d410625461550 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +@@ -3769,7 +3769,7 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) + mlx5e_fold_sw_stats64(priv, stats); + } + +- stats->rx_dropped = priv->stats.qcnt.rx_out_of_buffer; ++ stats->rx_missed_errors = priv->stats.qcnt.rx_out_of_buffer; + + stats->rx_length_errors = + PPORT_802_3_GET(pstats, a_in_range_length_errors) + +-- +2.43.0 + diff --git a/queue-6.8/net-phy-micrel-set-soft_reset-callback-to-genphy_sof.patch b/queue-6.8/net-phy-micrel-set-soft_reset-callback-to-genphy_sof.patch new file mode 100644 index 00000000000..ab718ddc7e4 --- /dev/null +++ b/queue-6.8/net-phy-micrel-set-soft_reset-callback-to-genphy_sof.patch @@ -0,0 +1,46 @@ +From 666d573baa8b4ead9813561cbda9e4249da54daf Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 21 May 2024 08:54:06 +0200 +Subject: net: phy: micrel: set soft_reset callback to genphy_soft_reset for + KSZ8061 + +From: Mathieu Othacehe + +[ Upstream commit 128d54fbcb14b8717ecf596d3dbded327b9980b3 ] + +Following a similar reinstate for the KSZ8081 and KSZ9031. + +Older kernels would use the genphy_soft_reset if the PHY did not implement +a .soft_reset. + +The KSZ8061 errata described here: +https://ww1.microchip.com/downloads/en/DeviceDoc/KSZ8061-Errata-DS80000688B.pdf +and worked around with 232ba3a51c ("net: phy: Micrel KSZ8061: link failure after cable connect") +is back again without this soft reset. + +Fixes: 6e2d85ec0559 ("net: phy: Stop with excessive soft reset") +Tested-by: Karim Ben Houcine +Signed-off-by: Mathieu Othacehe +Reviewed-by: Andrew Lunn +Reviewed-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/phy/micrel.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c +index 827db6a6ff397..25041b3465734 100644 +--- a/drivers/net/phy/micrel.c ++++ b/drivers/net/phy/micrel.c +@@ -4773,6 +4773,7 @@ static struct phy_driver ksphy_driver[] = { + /* PHY_BASIC_FEATURES */ + .probe = kszphy_probe, + .config_init = ksz8061_config_init, ++ .soft_reset = genphy_soft_reset, + .config_intr = kszphy_config_intr, + .handle_interrupt = kszphy_handle_interrupt, + .suspend = kszphy_suspend, +-- +2.43.0 + diff --git a/queue-6.8/net-sched-taprio-extend-minimum-interval-restriction.patch b/queue-6.8/net-sched-taprio-extend-minimum-interval-restriction.patch new file mode 100644 index 00000000000..6bbd49c6cbd --- /dev/null +++ b/queue-6.8/net-sched-taprio-extend-minimum-interval-restriction.patch @@ -0,0 +1,100 @@ +From 15dd8079f29f8966fdb63127f802873fedef4b00 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 27 May 2024 18:39:55 +0300 +Subject: net/sched: taprio: extend minimum interval restriction to entire + cycle too + +From: Vladimir Oltean + +[ Upstream commit fb66df20a7201e60f2b13d7f95d031b31a8831d3 ] + +It is possible for syzbot to side-step the restriction imposed by the +blamed commit in the Fixes: tag, because the taprio UAPI permits a +cycle-time different from (and potentially shorter than) the sum of +entry intervals. + +We need one more restriction, which is that the cycle time itself must +be larger than N * ETH_ZLEN bit times, where N is the number of schedule +entries. This restriction needs to apply regardless of whether the cycle +time came from the user or was the implicit, auto-calculated value, so +we move the existing "cycle == 0" check outside the "if "(!new->cycle_time)" +branch. This way covers both conditions and scenarios. + +Add a selftest which illustrates the issue triggered by syzbot. + +Fixes: b5b73b26b3ca ("taprio: Fix allowing too small intervals") +Reported-by: syzbot+a7d2b1d5d1af83035567@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/netdev/0000000000007d66bc06196e7c66@google.com/ +Signed-off-by: Vladimir Oltean +Link: https://lore.kernel.org/r/20240527153955.553333-2-vladimir.oltean@nxp.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/sched/sch_taprio.c | 10 ++++----- + .../tc-testing/tc-tests/qdiscs/taprio.json | 22 +++++++++++++++++++ + 2 files changed, 27 insertions(+), 5 deletions(-) + +diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c +index 501ce71500658..2db5db5afaa72 100644 +--- a/net/sched/sch_taprio.c ++++ b/net/sched/sch_taprio.c +@@ -1161,11 +1161,6 @@ static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb, + list_for_each_entry(entry, &new->entries, list) + cycle = ktime_add_ns(cycle, entry->interval); + +- if (!cycle) { +- NL_SET_ERR_MSG(extack, "'cycle_time' can never be 0"); +- return -EINVAL; +- } +- + if (cycle < 0 || cycle > INT_MAX) { + NL_SET_ERR_MSG(extack, "'cycle_time' is too big"); + return -EINVAL; +@@ -1174,6 +1169,11 @@ static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb, + new->cycle_time = cycle; + } + ++ if (new->cycle_time < new->num_entries * length_to_duration(q, ETH_ZLEN)) { ++ NL_SET_ERR_MSG(extack, "'cycle_time' is too small"); ++ return -EINVAL; ++ } ++ + taprio_calculate_gate_durations(q, new); + + return 0; +diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json +index 387db0d9bfd15..74dba8c671889 100644 +--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json ++++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json +@@ -154,6 +154,28 @@ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, ++ { ++ "id": "831f", ++ "name": "Add taprio Qdisc with too short cycle-time", ++ "category": [ ++ "qdisc", ++ "taprio" ++ ], ++ "plugins": { ++ "requires": "nsPlugin" ++ }, ++ "setup": [ ++ "echo \"1 1 8\" > /sys/bus/netdevsim/new_device" ++ ], ++ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: taprio num_tc 2 queues 1@0 1@1 sched-entry S 01 200000 sched-entry S 02 200000 cycle-time 100 clockid CLOCK_TAI", ++ "expExitCode": "2", ++ "verifyCmd": "$TC qdisc show dev $ETH", ++ "matchPattern": "qdisc taprio 1: root refcnt", ++ "matchCount": "0", ++ "teardown": [ ++ "echo \"1\" > /sys/bus/netdevsim/del_device" ++ ] ++ }, + { + "id": "3e1e", + "name": "Add taprio Qdisc with an invalid cycle-time", +-- +2.43.0 + diff --git a/queue-6.8/net-sched-taprio-make-q-picos_per_byte-available-to-.patch b/queue-6.8/net-sched-taprio-make-q-picos_per_byte-available-to-.patch new file mode 100644 index 00000000000..172fee99014 --- /dev/null +++ b/queue-6.8/net-sched-taprio-make-q-picos_per_byte-available-to-.patch @@ -0,0 +1,95 @@ +From 01a00193d4698c1eacccea8ba4fa2987e0a4141a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 27 May 2024 18:39:54 +0300 +Subject: net/sched: taprio: make q->picos_per_byte available to + fill_sched_entry() + +From: Vladimir Oltean + +[ Upstream commit e634134180885574d1fe7aa162777ba41e7fcd5b ] + +In commit b5b73b26b3ca ("taprio: Fix allowing too small intervals"), a +comparison of user input against length_to_duration(q, ETH_ZLEN) was +introduced, to avoid RCU stalls due to frequent hrtimers. + +The implementation of length_to_duration() depends on q->picos_per_byte +being set for the link speed. The blamed commit in the Fixes: tag has +moved this too late, so the checks introduced above are ineffective. +The q->picos_per_byte is zero at parse_taprio_schedule() -> +parse_sched_list() -> parse_sched_entry() -> fill_sched_entry() time. + +Move the taprio_set_picos_per_byte() call as one of the first things in +taprio_change(), before the bulk of the netlink attribute parsing is +done. That's because it is needed there. + +Add a selftest to make sure the issue doesn't get reintroduced. + +Fixes: 09dbdf28f9f9 ("net/sched: taprio: fix calculation of maximum gate durations") +Signed-off-by: Vladimir Oltean +Reviewed-by: Eric Dumazet +Link: https://lore.kernel.org/r/20240527153955.553333-1-vladimir.oltean@nxp.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/sched/sch_taprio.c | 4 +++- + .../tc-testing/tc-tests/qdiscs/taprio.json | 22 +++++++++++++++++++ + 2 files changed, 25 insertions(+), 1 deletion(-) + +diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c +index ad99409c6325e..501ce71500658 100644 +--- a/net/sched/sch_taprio.c ++++ b/net/sched/sch_taprio.c +@@ -1871,6 +1871,9 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, + + q->flags = err; + ++ /* Needed for length_to_duration() during netlink attribute parsing */ ++ taprio_set_picos_per_byte(dev, q); ++ + err = taprio_parse_mqprio_opt(dev, mqprio, extack, q->flags); + if (err < 0) + return err; +@@ -1930,7 +1933,6 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, + if (err < 0) + goto free_sched; + +- taprio_set_picos_per_byte(dev, q); + taprio_update_queue_max_sdu(q, new_admin, stab); + + if (FULL_OFFLOAD_IS_ENABLED(q->flags)) +diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json +index 2d603ef2e375c..387db0d9bfd15 100644 +--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json ++++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json +@@ -132,6 +132,28 @@ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, ++ { ++ "id": "6f62", ++ "name": "Add taprio Qdisc with too short interval", ++ "category": [ ++ "qdisc", ++ "taprio" ++ ], ++ "plugins": { ++ "requires": "nsPlugin" ++ }, ++ "setup": [ ++ "echo \"1 1 8\" > /sys/bus/netdevsim/new_device" ++ ], ++ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: taprio num_tc 2 queues 1@0 1@1 sched-entry S 01 300 sched-entry S 02 1700 clockid CLOCK_TAI", ++ "expExitCode": "2", ++ "verifyCmd": "$TC qdisc show dev $ETH", ++ "matchPattern": "qdisc taprio 1: root refcnt", ++ "matchCount": "0", ++ "teardown": [ ++ "echo \"1\" > /sys/bus/netdevsim/del_device" ++ ] ++ }, + { + "id": "3e1e", + "name": "Add taprio Qdisc with an invalid cycle-time", +-- +2.43.0 + diff --git a/queue-6.8/net-ti-icssg-prueth-fix-start-counter-for-ft1-filter.patch b/queue-6.8/net-ti-icssg-prueth-fix-start-counter-for-ft1-filter.patch new file mode 100644 index 00000000000..eeb5adb1781 --- /dev/null +++ b/queue-6.8/net-ti-icssg-prueth-fix-start-counter-for-ft1-filter.patch @@ -0,0 +1,39 @@ +From a6c6dfa2905e2fb0da9717ecc7ec4d2dd2d4cf6f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 27 May 2024 12:00:15 +0530 +Subject: net: ti: icssg-prueth: Fix start counter for ft1 filter + +From: MD Danish Anwar + +[ Upstream commit 56a5cf538c3f2d935b0d81040a8303b6e7fc5fd8 ] + +The start counter for FT1 filter is wrongly set to 0 in the driver. +FT1 is used for source address violation (SAV) check and source address +starts at Byte 6 not Byte 0. Fix this by changing start counter to +ETH_ALEN in icssg_ft1_set_mac_addr(). + +Fixes: e9b4ece7d74b ("net: ti: icssg-prueth: Add Firmware config and classification APIs.") +Signed-off-by: MD Danish Anwar +Link: https://lore.kernel.org/r/20240527063015.263748-1-danishanwar@ti.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/ti/icssg/icssg_classifier.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/ti/icssg/icssg_classifier.c b/drivers/net/ethernet/ti/icssg/icssg_classifier.c +index 6df53ab17fbc5..902a2717785cb 100644 +--- a/drivers/net/ethernet/ti/icssg/icssg_classifier.c ++++ b/drivers/net/ethernet/ti/icssg/icssg_classifier.c +@@ -360,7 +360,7 @@ void icssg_ft1_set_mac_addr(struct regmap *miig_rt, int slice, u8 *mac_addr) + { + const u8 mask_addr[] = { 0, 0, 0, 0, 0, 0, }; + +- rx_class_ft1_set_start_len(miig_rt, slice, 0, 6); ++ rx_class_ft1_set_start_len(miig_rt, slice, ETH_ALEN, ETH_ALEN); + rx_class_ft1_set_da(miig_rt, slice, 0, mac_addr); + rx_class_ft1_set_da_mask(miig_rt, slice, 0, mask_addr); + rx_class_ft1_cfg_set_type(miig_rt, slice, 0, FT1_CFG_TYPE_EQ); +-- +2.43.0 + diff --git a/queue-6.8/net-usb-smsc95xx-fix-changing-led_sel-bit-value-upda.patch b/queue-6.8/net-usb-smsc95xx-fix-changing-led_sel-bit-value-upda.patch new file mode 100644 index 00000000000..b070e2f7858 --- /dev/null +++ b/queue-6.8/net-usb-smsc95xx-fix-changing-led_sel-bit-value-upda.patch @@ -0,0 +1,68 @@ +From f5c188f6e58d5e9d7d0fb2c2ba1520ac4f8972d9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 23 May 2024 14:23:14 +0530 +Subject: net: usb: smsc95xx: fix changing LED_SEL bit value updated from + EEPROM + +From: Parthiban Veerasooran + +[ Upstream commit 52a2f0608366a629d43dacd3191039c95fef74ba ] + +LED Select (LED_SEL) bit in the LED General Purpose IO Configuration +register is used to determine the functionality of external LED pins +(Speed Indicator, Link and Activity Indicator, Full Duplex Link +Indicator). The default value for this bit is 0 when no EEPROM is +present. If a EEPROM is present, the default value is the value of the +LED Select bit in the Configuration Flags of the EEPROM. A USB Reset or +Lite Reset (LRST) will cause this bit to be restored to the image value +last loaded from EEPROM, or to be set to 0 if no EEPROM is present. + +While configuring the dual purpose GPIO/LED pins to LED outputs in the +LED General Purpose IO Configuration register, the LED_SEL bit is changed +as 0 and resulting the configured value from the EEPROM is cleared. The +issue is fixed by using read-modify-write approach. + +Fixes: f293501c61c5 ("smsc95xx: configure LED outputs") +Signed-off-by: Parthiban Veerasooran +Reviewed-by: Simon Horman +Reviewed-by: Woojung Huh +Link: https://lore.kernel.org/r/20240523085314.167650-1-Parthiban.Veerasooran@microchip.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/usb/smsc95xx.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c +index cbea246664795..8e82184be5e7d 100644 +--- a/drivers/net/usb/smsc95xx.c ++++ b/drivers/net/usb/smsc95xx.c +@@ -879,7 +879,7 @@ static int smsc95xx_start_rx_path(struct usbnet *dev) + static int smsc95xx_reset(struct usbnet *dev) + { + struct smsc95xx_priv *pdata = dev->driver_priv; +- u32 read_buf, write_buf, burst_cap; ++ u32 read_buf, burst_cap; + int ret = 0, timeout; + + netif_dbg(dev, ifup, dev->net, "entering smsc95xx_reset\n"); +@@ -1003,10 +1003,13 @@ static int smsc95xx_reset(struct usbnet *dev) + return ret; + netif_dbg(dev, ifup, dev->net, "ID_REV = 0x%08x\n", read_buf); + ++ ret = smsc95xx_read_reg(dev, LED_GPIO_CFG, &read_buf); ++ if (ret < 0) ++ return ret; + /* Configure GPIO pins as LED outputs */ +- write_buf = LED_GPIO_CFG_SPD_LED | LED_GPIO_CFG_LNK_LED | +- LED_GPIO_CFG_FDX_LED; +- ret = smsc95xx_write_reg(dev, LED_GPIO_CFG, write_buf); ++ read_buf |= LED_GPIO_CFG_SPD_LED | LED_GPIO_CFG_LNK_LED | ++ LED_GPIO_CFG_FDX_LED; ++ ret = smsc95xx_write_reg(dev, LED_GPIO_CFG, read_buf); + if (ret < 0) + return ret; + +-- +2.43.0 + diff --git a/queue-6.8/netfilter-ipset-add-list-flush-to-cancel_gc.patch b/queue-6.8/netfilter-ipset-add-list-flush-to-cancel_gc.patch new file mode 100644 index 00000000000..22973bd6129 --- /dev/null +++ b/queue-6.8/netfilter-ipset-add-list-flush-to-cancel_gc.patch @@ -0,0 +1,39 @@ +From 4095b6d6cb29f5eacec1b81abe9a1f8b9eccd902 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 17 Apr 2024 18:51:41 +0500 +Subject: netfilter: ipset: Add list flush to cancel_gc + +From: Alexander Maltsev + +[ Upstream commit c1193d9bbbd379defe9be3c6de566de684de8a6f ] + +Flushing list in cancel_gc drops references to other lists right away, +without waiting for RCU to destroy list. Fixes race when referenced +ipsets can't be destroyed while referring list is scheduled for destroy. + +Fixes: 97f7cf1cd80e ("netfilter: ipset: fix performance regression in swap operation") +Signed-off-by: Alexander Maltsev +Acked-by: Jozsef Kadlecsik +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/ipset/ip_set_list_set.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c +index 6c3f28bc59b32..54e2a1dd7f5f5 100644 +--- a/net/netfilter/ipset/ip_set_list_set.c ++++ b/net/netfilter/ipset/ip_set_list_set.c +@@ -549,6 +549,9 @@ list_set_cancel_gc(struct ip_set *set) + + if (SET_WITH_TIMEOUT(set)) + timer_shutdown_sync(&map->gc); ++ ++ /* Flush list to drop references to other ipsets */ ++ list_set_flush(set); + } + + static const struct ip_set_type_variant set_variant = { +-- +2.43.0 + diff --git a/queue-6.8/netfilter-nfnetlink_queue-acquire-rcu_read_lock-in-i.patch b/queue-6.8/netfilter-nfnetlink_queue-acquire-rcu_read_lock-in-i.patch new file mode 100644 index 00000000000..5bdec2ce277 --- /dev/null +++ b/queue-6.8/netfilter-nfnetlink_queue-acquire-rcu_read_lock-in-i.patch @@ -0,0 +1,79 @@ +From 706a77d15b5ddc1f8d41972cdf60f7bc413a337b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 15 May 2024 13:23:39 +0000 +Subject: netfilter: nfnetlink_queue: acquire rcu_read_lock() in + instance_destroy_rcu() + +From: Eric Dumazet + +[ Upstream commit dc21c6cc3d6986d938efbf95de62473982c98dec ] + +syzbot reported that nf_reinject() could be called without rcu_read_lock() : + +WARNING: suspicious RCU usage +6.9.0-rc7-syzkaller-02060-g5c1672705a1a #0 Not tainted + +net/netfilter/nfnetlink_queue.c:263 suspicious rcu_dereference_check() usage! + +other info that might help us debug this: + +rcu_scheduler_active = 2, debug_locks = 1 +2 locks held by syz-executor.4/13427: + #0: ffffffff8e334f60 (rcu_callback){....}-{0:0}, at: rcu_lock_acquire include/linux/rcupdate.h:329 [inline] + #0: ffffffff8e334f60 (rcu_callback){....}-{0:0}, at: rcu_do_batch kernel/rcu/tree.c:2190 [inline] + #0: ffffffff8e334f60 (rcu_callback){....}-{0:0}, at: rcu_core+0xa86/0x1830 kernel/rcu/tree.c:2471 + #1: ffff88801ca92958 (&inst->lock){+.-.}-{2:2}, at: spin_lock_bh include/linux/spinlock.h:356 [inline] + #1: ffff88801ca92958 (&inst->lock){+.-.}-{2:2}, at: nfqnl_flush net/netfilter/nfnetlink_queue.c:405 [inline] + #1: ffff88801ca92958 (&inst->lock){+.-.}-{2:2}, at: instance_destroy_rcu+0x30/0x220 net/netfilter/nfnetlink_queue.c:172 + +stack backtrace: +CPU: 0 PID: 13427 Comm: syz-executor.4 Not tainted 6.9.0-rc7-syzkaller-02060-g5c1672705a1a #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 04/02/2024 +Call Trace: + + __dump_stack lib/dump_stack.c:88 [inline] + dump_stack_lvl+0x241/0x360 lib/dump_stack.c:114 + lockdep_rcu_suspicious+0x221/0x340 kernel/locking/lockdep.c:6712 + nf_reinject net/netfilter/nfnetlink_queue.c:323 [inline] + nfqnl_reinject+0x6ec/0x1120 net/netfilter/nfnetlink_queue.c:397 + nfqnl_flush net/netfilter/nfnetlink_queue.c:410 [inline] + instance_destroy_rcu+0x1ae/0x220 net/netfilter/nfnetlink_queue.c:172 + rcu_do_batch kernel/rcu/tree.c:2196 [inline] + rcu_core+0xafd/0x1830 kernel/rcu/tree.c:2471 + handle_softirqs+0x2d6/0x990 kernel/softirq.c:554 + __do_softirq kernel/softirq.c:588 [inline] + invoke_softirq kernel/softirq.c:428 [inline] + __irq_exit_rcu+0xf4/0x1c0 kernel/softirq.c:637 + irq_exit_rcu+0x9/0x30 kernel/softirq.c:649 + instr_sysvec_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1043 [inline] + sysvec_apic_timer_interrupt+0xa6/0xc0 arch/x86/kernel/apic/apic.c:1043 + + + +Fixes: 9872bec773c2 ("[NETFILTER]: nfnetlink: use RCU for queue instances hash") +Reported-by: syzbot +Signed-off-by: Eric Dumazet +Acked-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nfnetlink_queue.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c +index 5cf38fc0a366a..29bfd996e4fc3 100644 +--- a/net/netfilter/nfnetlink_queue.c ++++ b/net/netfilter/nfnetlink_queue.c +@@ -169,7 +169,9 @@ instance_destroy_rcu(struct rcu_head *head) + struct nfqnl_instance *inst = container_of(head, struct nfqnl_instance, + rcu); + ++ rcu_read_lock(); + nfqnl_flush(inst, NULL, 0); ++ rcu_read_unlock(); + kfree(inst); + module_put(THIS_MODULE); + } +-- +2.43.0 + diff --git a/queue-6.8/netfilter-nft_fib-allow-from-forward-input-without-i.patch b/queue-6.8/netfilter-nft_fib-allow-from-forward-input-without-i.patch new file mode 100644 index 00000000000..c2bb9b10214 --- /dev/null +++ b/queue-6.8/netfilter-nft_fib-allow-from-forward-input-without-i.patch @@ -0,0 +1,45 @@ +From c713e81885ee9e0884c075b74516cd2902950dd4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 21 May 2024 10:25:05 -0400 +Subject: netfilter: nft_fib: allow from forward/input without iif selector + +From: Eric Garver + +[ Upstream commit e8ded22ef0f4831279c363c264cd41cd9d59ca9e ] + +This removes the restriction of needing iif selector in the +forward/input hooks for fib lookups when requested result is +oif/oifname. + +Removing this restriction allows "loose" lookups from the forward hooks. + +Fixes: be8be04e5ddb ("netfilter: nft_fib: reverse path filter for policy-based routing on iif") +Signed-off-by: Eric Garver +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nft_fib.c | 8 +++----- + 1 file changed, 3 insertions(+), 5 deletions(-) + +diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c +index 37cfe6dd712d8..b58f62195ff3e 100644 +--- a/net/netfilter/nft_fib.c ++++ b/net/netfilter/nft_fib.c +@@ -35,11 +35,9 @@ int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, + switch (priv->result) { + case NFT_FIB_RESULT_OIF: + case NFT_FIB_RESULT_OIFNAME: +- hooks = (1 << NF_INET_PRE_ROUTING); +- if (priv->flags & NFTA_FIB_F_IIF) { +- hooks |= (1 << NF_INET_LOCAL_IN) | +- (1 << NF_INET_FORWARD); +- } ++ hooks = (1 << NF_INET_PRE_ROUTING) | ++ (1 << NF_INET_LOCAL_IN) | ++ (1 << NF_INET_FORWARD); + break; + case NFT_FIB_RESULT_ADDRTYPE: + if (priv->flags & NFTA_FIB_F_IIF) +-- +2.43.0 + diff --git a/queue-6.8/netfilter-nft_payload-restore-vlan-q-in-q-match-supp.patch b/queue-6.8/netfilter-nft_payload-restore-vlan-q-in-q-match-supp.patch new file mode 100644 index 00000000000..be89a8533e7 --- /dev/null +++ b/queue-6.8/netfilter-nft_payload-restore-vlan-q-in-q-match-supp.patch @@ -0,0 +1,74 @@ +From af1b9360f8746d8e5322eaf09d6a3a8f10988703 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 9 May 2024 23:02:24 +0200 +Subject: netfilter: nft_payload: restore vlan q-in-q match support + +From: Pablo Neira Ayuso + +[ Upstream commit aff5c01fa1284d606f8e7cbdaafeef2511bb46c1 ] + +Revert f6ae9f120dad ("netfilter: nft_payload: add C-VLAN support"). + +f41f72d09ee1 ("netfilter: nft_payload: simplify vlan header handling") +already allows to match on inner vlan tags by subtract the vlan header +size to the payload offset which has been popped and stored in skbuff +metadata fields. + +Fixes: f6ae9f120dad ("netfilter: nft_payload: add C-VLAN support") +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nft_payload.c | 23 +++++++---------------- + 1 file changed, 7 insertions(+), 16 deletions(-) + +diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c +index 0a689c8e0295d..a3cb5dbcb362c 100644 +--- a/net/netfilter/nft_payload.c ++++ b/net/netfilter/nft_payload.c +@@ -45,36 +45,27 @@ nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len) + int mac_off = skb_mac_header(skb) - skb->data; + u8 *vlanh, *dst_u8 = (u8 *) d; + struct vlan_ethhdr veth; +- u8 vlan_hlen = 0; +- +- if ((skb->protocol == htons(ETH_P_8021AD) || +- skb->protocol == htons(ETH_P_8021Q)) && +- offset >= VLAN_ETH_HLEN && offset < VLAN_ETH_HLEN + VLAN_HLEN) +- vlan_hlen += VLAN_HLEN; + + vlanh = (u8 *) &veth; +- if (offset < VLAN_ETH_HLEN + vlan_hlen) { ++ if (offset < VLAN_ETH_HLEN) { + u8 ethlen = len; + +- if (vlan_hlen && +- skb_copy_bits(skb, mac_off, &veth, VLAN_ETH_HLEN) < 0) +- return false; +- else if (!nft_payload_rebuild_vlan_hdr(skb, mac_off, &veth)) ++ if (!nft_payload_rebuild_vlan_hdr(skb, mac_off, &veth)) + return false; + +- if (offset + len > VLAN_ETH_HLEN + vlan_hlen) +- ethlen -= offset + len - VLAN_ETH_HLEN - vlan_hlen; ++ if (offset + len > VLAN_ETH_HLEN) ++ ethlen -= offset + len - VLAN_ETH_HLEN; + +- memcpy(dst_u8, vlanh + offset - vlan_hlen, ethlen); ++ memcpy(dst_u8, vlanh + offset, ethlen); + + len -= ethlen; + if (len == 0) + return true; + + dst_u8 += ethlen; +- offset = ETH_HLEN + vlan_hlen; ++ offset = ETH_HLEN; + } else { +- offset -= VLAN_HLEN + vlan_hlen; ++ offset -= VLAN_HLEN; + } + + return skb_copy_bits(skb, offset + mac_off, dst_u8, len) == 0; +-- +2.43.0 + diff --git a/queue-6.8/netfilter-nft_payload-skbuff-vlan-metadata-mangle-su.patch b/queue-6.8/netfilter-nft_payload-skbuff-vlan-metadata-mangle-su.patch new file mode 100644 index 00000000000..71d1d14186a --- /dev/null +++ b/queue-6.8/netfilter-nft_payload-skbuff-vlan-metadata-mangle-su.patch @@ -0,0 +1,147 @@ +From 6d37f6b957bce5f41681f60d0d6a510870d6fc31 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 8 May 2024 22:50:34 +0200 +Subject: netfilter: nft_payload: skbuff vlan metadata mangle support + +From: Pablo Neira Ayuso + +[ Upstream commit 33c563ebf8d3deed7d8addd20d77398ac737ef9a ] + +Userspace assumes vlan header is present at a given offset, but vlan +offload allows to store this in metadata fields of the skbuff. Hence +mangling vlan results in a garbled packet. Handle this transparently by +adding a parser to the kernel. + +If vlan metadata is present and payload offset is over 12 bytes (source +and destination mac address fields), then subtract vlan header present +in vlan metadata, otherwise mangle vlan metadata based on offset and +length, extracting data from the source register. + +This is similar to: + + 8cfd23e67401 ("netfilter: nft_payload: work around vlan header stripping") + +to deal with vlan payload mangling. + +Fixes: 7ec3f7b47b8d ("netfilter: nft_payload: add packet mangling support") +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nft_payload.c | 72 +++++++++++++++++++++++++++++++++---- + 1 file changed, 65 insertions(+), 7 deletions(-) + +diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c +index a3cb5dbcb362c..0c43d748e23ae 100644 +--- a/net/netfilter/nft_payload.c ++++ b/net/netfilter/nft_payload.c +@@ -145,12 +145,12 @@ int nft_payload_inner_offset(const struct nft_pktinfo *pkt) + return pkt->inneroff; + } + +-static bool nft_payload_need_vlan_copy(const struct nft_payload *priv) ++static bool nft_payload_need_vlan_adjust(u32 offset, u32 len) + { +- unsigned int len = priv->offset + priv->len; ++ unsigned int boundary = offset + len; + + /* data past ether src/dst requested, copy needed */ +- if (len > offsetof(struct ethhdr, h_proto)) ++ if (boundary > offsetof(struct ethhdr, h_proto)) + return true; + + return false; +@@ -174,7 +174,7 @@ void nft_payload_eval(const struct nft_expr *expr, + goto err; + + if (skb_vlan_tag_present(skb) && +- nft_payload_need_vlan_copy(priv)) { ++ nft_payload_need_vlan_adjust(priv->offset, priv->len)) { + if (!nft_payload_copy_vlan(dest, skb, + priv->offset, priv->len)) + goto err; +@@ -801,21 +801,79 @@ struct nft_payload_set { + u8 csum_flags; + }; + ++/* This is not struct vlan_hdr. */ ++struct nft_payload_vlan_hdr { ++ __be16 h_vlan_proto; ++ __be16 h_vlan_TCI; ++}; ++ ++static bool ++nft_payload_set_vlan(const u32 *src, struct sk_buff *skb, u8 offset, u8 len, ++ int *vlan_hlen) ++{ ++ struct nft_payload_vlan_hdr *vlanh; ++ __be16 vlan_proto; ++ u16 vlan_tci; ++ ++ if (offset >= offsetof(struct vlan_ethhdr, h_vlan_encapsulated_proto)) { ++ *vlan_hlen = VLAN_HLEN; ++ return true; ++ } ++ ++ switch (offset) { ++ case offsetof(struct vlan_ethhdr, h_vlan_proto): ++ if (len == 2) { ++ vlan_proto = nft_reg_load_be16(src); ++ skb->vlan_proto = vlan_proto; ++ } else if (len == 4) { ++ vlanh = (struct nft_payload_vlan_hdr *)src; ++ __vlan_hwaccel_put_tag(skb, vlanh->h_vlan_proto, ++ ntohs(vlanh->h_vlan_TCI)); ++ } else { ++ return false; ++ } ++ break; ++ case offsetof(struct vlan_ethhdr, h_vlan_TCI): ++ if (len != 2) ++ return false; ++ ++ vlan_tci = ntohs(nft_reg_load_be16(src)); ++ skb->vlan_tci = vlan_tci; ++ break; ++ default: ++ return false; ++ } ++ ++ return true; ++} ++ + static void nft_payload_set_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) + { + const struct nft_payload_set *priv = nft_expr_priv(expr); +- struct sk_buff *skb = pkt->skb; + const u32 *src = ®s->data[priv->sreg]; +- int offset, csum_offset; ++ int offset, csum_offset, vlan_hlen = 0; ++ struct sk_buff *skb = pkt->skb; + __wsum fsum, tsum; + + switch (priv->base) { + case NFT_PAYLOAD_LL_HEADER: + if (!skb_mac_header_was_set(skb)) + goto err; +- offset = skb_mac_header(skb) - skb->data; ++ ++ if (skb_vlan_tag_present(skb) && ++ nft_payload_need_vlan_adjust(priv->offset, priv->len)) { ++ if (!nft_payload_set_vlan(src, skb, ++ priv->offset, priv->len, ++ &vlan_hlen)) ++ goto err; ++ ++ if (!vlan_hlen) ++ return; ++ } ++ ++ offset = skb_mac_header(skb) - skb->data - vlan_hlen; + break; + case NFT_PAYLOAD_NETWORK_HEADER: + offset = skb_network_offset(skb); +-- +2.43.0 + diff --git a/queue-6.8/netfilter-tproxy-bail-out-if-ip-has-been-disabled-on.patch b/queue-6.8/netfilter-tproxy-bail-out-if-ip-has-been-disabled-on.patch new file mode 100644 index 00000000000..0f8ea63d314 --- /dev/null +++ b/queue-6.8/netfilter-tproxy-bail-out-if-ip-has-been-disabled-on.patch @@ -0,0 +1,45 @@ +From 2ff89b874f5f4a71197f3effa21f306407fe7b80 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 13 May 2024 12:27:15 +0200 +Subject: netfilter: tproxy: bail out if IP has been disabled on the device + +From: Florian Westphal + +[ Upstream commit 21a673bddc8fd4873c370caf9ae70ffc6d47e8d3 ] + +syzbot reports: +general protection fault, probably for non-canonical address 0xdffffc0000000003: 0000 [#1] PREEMPT SMP KASAN PTI +KASAN: null-ptr-deref in range [0x0000000000000018-0x000000000000001f] +[..] +RIP: 0010:nf_tproxy_laddr4+0xb7/0x340 net/ipv4/netfilter/nf_tproxy_ipv4.c:62 +Call Trace: + nft_tproxy_eval_v4 net/netfilter/nft_tproxy.c:56 [inline] + nft_tproxy_eval+0xa9a/0x1a00 net/netfilter/nft_tproxy.c:168 + +__in_dev_get_rcu() can return NULL, so check for this. + +Reported-and-tested-by: syzbot+b94a6818504ea90d7661@syzkaller.appspotmail.com +Fixes: cc6eb4338569 ("tproxy: use the interface primary IP address as a default value for --on-ip") +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/ipv4/netfilter/nf_tproxy_ipv4.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/net/ipv4/netfilter/nf_tproxy_ipv4.c b/net/ipv4/netfilter/nf_tproxy_ipv4.c +index 69e3317996043..73e66a088e25e 100644 +--- a/net/ipv4/netfilter/nf_tproxy_ipv4.c ++++ b/net/ipv4/netfilter/nf_tproxy_ipv4.c +@@ -58,6 +58,8 @@ __be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr) + + laddr = 0; + indev = __in_dev_get_rcu(skb->dev); ++ if (!indev) ++ return daddr; + + in_dev_for_each_ifa_rcu(ifa, indev) { + if (ifa->ifa_flags & IFA_F_SECONDARY) +-- +2.43.0 + diff --git a/queue-6.8/netkit-fix-pkt_type-override-upon-netkit-pass-verdic.patch b/queue-6.8/netkit-fix-pkt_type-override-upon-netkit-pass-verdic.patch new file mode 100644 index 00000000000..96331f20f0e --- /dev/null +++ b/queue-6.8/netkit-fix-pkt_type-override-upon-netkit-pass-verdic.patch @@ -0,0 +1,104 @@ +From 8b4ba7e81807092d25ce35abbe0d4a8f75c3896a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 24 May 2024 18:36:17 +0200 +Subject: netkit: Fix pkt_type override upon netkit pass verdict + +From: Daniel Borkmann + +[ Upstream commit 3998d184267dfcff858aaa84d3de17429253629d ] + +When running Cilium connectivity test suite with netkit in L2 mode, we +found that compared to tcx a few tests were failing which pushed traffic +into an L7 proxy sitting in host namespace. The problem in particular is +around the invocation of eth_type_trans() in netkit. + +In case of tcx, this is run before the tcx ingress is triggered inside +host namespace and thus if the BPF program uses the bpf_skb_change_type() +helper the newly set type is retained. However, in case of netkit, the +late eth_type_trans() invocation overrides the earlier decision from the +BPF program which eventually leads to the test failure. + +Instead of eth_type_trans(), split out the relevant parts, meaning, reset +of mac header and call to eth_skb_pkt_type() before the BPF program is run +in order to have the same behavior as with tcx, and refactor a small helper +called eth_skb_pull_mac() which is run in case it's passed up the stack +where the mac header must be pulled. With this all connectivity tests pass. + +Fixes: 35dfaad7188c ("netkit, bpf: Add bpf programmable net device") +Signed-off-by: Daniel Borkmann +Acked-by: Nikolay Aleksandrov +Link: https://lore.kernel.org/r/20240524163619.26001-2-daniel@iogearbox.net +Signed-off-by: Alexei Starovoitov +Signed-off-by: Sasha Levin +--- + drivers/net/netkit.c | 4 +++- + include/linux/etherdevice.h | 8 ++++++++ + net/ethernet/eth.c | 4 +--- + 3 files changed, 12 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c +index c24aa9d93f9f6..5cc794a2a789d 100644 +--- a/drivers/net/netkit.c ++++ b/drivers/net/netkit.c +@@ -55,6 +55,7 @@ static void netkit_prep_forward(struct sk_buff *skb, bool xnet) + skb_scrub_packet(skb, xnet); + skb->priority = 0; + nf_skip_egress(skb, true); ++ skb_reset_mac_header(skb); + } + + static struct netkit *netkit_priv(const struct net_device *dev) +@@ -78,6 +79,7 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev) + skb_orphan_frags(skb, GFP_ATOMIC))) + goto drop; + netkit_prep_forward(skb, !net_eq(dev_net(dev), dev_net(peer))); ++ eth_skb_pkt_type(skb, peer); + skb->dev = peer; + entry = rcu_dereference(nk->active); + if (entry) +@@ -85,7 +87,7 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev) + switch (ret) { + case NETKIT_NEXT: + case NETKIT_PASS: +- skb->protocol = eth_type_trans(skb, skb->dev); ++ eth_skb_pull_mac(skb); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); + if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) { + dev_sw_netstats_tx_add(dev, 1, len); +diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h +index 297231854ada5..e44913a8200fd 100644 +--- a/include/linux/etherdevice.h ++++ b/include/linux/etherdevice.h +@@ -632,6 +632,14 @@ static inline void eth_skb_pkt_type(struct sk_buff *skb, + } + } + ++static inline struct ethhdr *eth_skb_pull_mac(struct sk_buff *skb) ++{ ++ struct ethhdr *eth = (struct ethhdr *)skb->data; ++ ++ skb_pull_inline(skb, ETH_HLEN); ++ return eth; ++} ++ + /** + * eth_skb_pad - Pad buffer to mininum number of octets for Ethernet frame + * @skb: Buffer to pad +diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c +index 049c3adeb8504..4e3651101b866 100644 +--- a/net/ethernet/eth.c ++++ b/net/ethernet/eth.c +@@ -161,9 +161,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) + skb->dev = dev; + skb_reset_mac_header(skb); + +- eth = (struct ethhdr *)skb->data; +- skb_pull_inline(skb, ETH_HLEN); +- ++ eth = eth_skb_pull_mac(skb); + eth_skb_pkt_type(skb, dev); + + /* +-- +2.43.0 + diff --git a/queue-6.8/netkit-fix-setting-mac-address-in-l2-mode.patch b/queue-6.8/netkit-fix-setting-mac-address-in-l2-mode.patch new file mode 100644 index 00000000000..65d10b9902e --- /dev/null +++ b/queue-6.8/netkit-fix-setting-mac-address-in-l2-mode.patch @@ -0,0 +1,101 @@ +From 20ff0db450100a4ea2e177eeae631f5595587a15 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 24 May 2024 18:36:16 +0200 +Subject: netkit: Fix setting mac address in l2 mode + +From: Daniel Borkmann + +[ Upstream commit d6fe532b7499e4575f9647879b7a34625817fe7f ] + +When running Cilium connectivity test suite with netkit in L2 mode, we +found that it is expected to be able to specify a custom MAC address for +the devices, in particular, cilium-cni obtains the specified MAC address +by querying the endpoint and sets the MAC address of the interface inside +the Pod. Thus, fix the missing support in netkit for L2 mode. + +Fixes: 35dfaad7188c ("netkit, bpf: Add bpf programmable net device") +Signed-off-by: Daniel Borkmann +Acked-by: Nikolay Aleksandrov +Acked-by: Stanislav Fomichev +Link: https://lore.kernel.org/r/20240524163619.26001-1-daniel@iogearbox.net +Signed-off-by: Alexei Starovoitov +Signed-off-by: Sasha Levin +--- + drivers/net/netkit.c | 26 +++++++++++++++++++++----- + 1 file changed, 21 insertions(+), 5 deletions(-) + +diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c +index 39171380ccf29..c24aa9d93f9f6 100644 +--- a/drivers/net/netkit.c ++++ b/drivers/net/netkit.c +@@ -155,6 +155,16 @@ static void netkit_set_multicast(struct net_device *dev) + /* Nothing to do, we receive whatever gets pushed to us! */ + } + ++static int netkit_set_macaddr(struct net_device *dev, void *sa) ++{ ++ struct netkit *nk = netkit_priv(dev); ++ ++ if (nk->mode != NETKIT_L2) ++ return -EOPNOTSUPP; ++ ++ return eth_mac_addr(dev, sa); ++} ++ + static void netkit_set_headroom(struct net_device *dev, int headroom) + { + struct netkit *nk = netkit_priv(dev), *nk2; +@@ -198,6 +208,7 @@ static const struct net_device_ops netkit_netdev_ops = { + .ndo_start_xmit = netkit_xmit, + .ndo_set_rx_mode = netkit_set_multicast, + .ndo_set_rx_headroom = netkit_set_headroom, ++ .ndo_set_mac_address = netkit_set_macaddr, + .ndo_get_iflink = netkit_get_iflink, + .ndo_get_peer_dev = netkit_peer_dev, + .ndo_get_stats64 = netkit_get_stats, +@@ -300,9 +311,11 @@ static int netkit_validate(struct nlattr *tb[], struct nlattr *data[], + + if (!attr) + return 0; +- NL_SET_ERR_MSG_ATTR(extack, attr, +- "Setting Ethernet address is not supported"); +- return -EOPNOTSUPP; ++ if (nla_len(attr) != ETH_ALEN) ++ return -EINVAL; ++ if (!is_valid_ether_addr(nla_data(attr))) ++ return -EADDRNOTAVAIL; ++ return 0; + } + + static struct rtnl_link_ops netkit_link_ops; +@@ -365,6 +378,9 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev, + strscpy(ifname, "nk%d", IFNAMSIZ); + ifname_assign_type = NET_NAME_ENUM; + } ++ if (mode != NETKIT_L2 && ++ (tb[IFLA_ADDRESS] || tbp[IFLA_ADDRESS])) ++ return -EOPNOTSUPP; + + net = rtnl_link_get_net(src_net, tbp); + if (IS_ERR(net)) +@@ -379,7 +395,7 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev, + + netif_inherit_tso_max(peer, dev); + +- if (mode == NETKIT_L2) ++ if (mode == NETKIT_L2 && !(ifmp && tbp[IFLA_ADDRESS])) + eth_hw_addr_random(peer); + if (ifmp && dev->ifindex) + peer->ifindex = ifmp->ifi_index; +@@ -402,7 +418,7 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev, + if (err < 0) + goto err_configure_peer; + +- if (mode == NETKIT_L2) ++ if (mode == NETKIT_L2 && !tb[IFLA_ADDRESS]) + eth_hw_addr_random(dev); + if (tb[IFLA_IFNAME]) + nla_strscpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ); +-- +2.43.0 + diff --git a/queue-6.8/nvme-fix-multipath-batched-completion-accounting.patch b/queue-6.8/nvme-fix-multipath-batched-completion-accounting.patch new file mode 100644 index 00000000000..5d6d3eddd61 --- /dev/null +++ b/queue-6.8/nvme-fix-multipath-batched-completion-accounting.patch @@ -0,0 +1,67 @@ +From 0e289ded592da7a9cb906e578793fa7f1d9e2759 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 21 May 2024 09:50:47 -0700 +Subject: nvme: fix multipath batched completion accounting + +From: Keith Busch + +[ Upstream commit 2fe7b422460d14b33027d8770f7be8d26bcb2639 ] + +Batched completions were missing the io stats accounting and bio trace +events. Move the common code to a helper and call it from the batched +and non-batched functions. + +Fixes: d4d957b53d91ee ("nvme-multipath: support io stats on the mpath device") +Reviewed-by: Christoph Hellwig +Reviewed-by: Sagi Grimberg +Reviewed-by: Chaitanya Kulkarni +Reviewed-by: Hannes Reinecke +Signed-off-by: Keith Busch +Signed-off-by: Sasha Levin +--- + drivers/nvme/host/core.c | 15 ++++++++++----- + 1 file changed, 10 insertions(+), 5 deletions(-) + +diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c +index 3cc79817e4d75..fcf7ce19caea4 100644 +--- a/drivers/nvme/host/core.c ++++ b/drivers/nvme/host/core.c +@@ -405,6 +405,14 @@ static inline void nvme_end_req_zoned(struct request *req) + } + } + ++static inline void __nvme_end_req(struct request *req) ++{ ++ nvme_end_req_zoned(req); ++ nvme_trace_bio_complete(req); ++ if (req->cmd_flags & REQ_NVME_MPATH) ++ nvme_mpath_end_request(req); ++} ++ + static inline void nvme_end_req(struct request *req) + { + blk_status_t status = nvme_error_status(nvme_req(req)->status); +@@ -415,10 +423,7 @@ static inline void nvme_end_req(struct request *req) + else + nvme_log_error(req); + } +- nvme_end_req_zoned(req); +- nvme_trace_bio_complete(req); +- if (req->cmd_flags & REQ_NVME_MPATH) +- nvme_mpath_end_request(req); ++ __nvme_end_req(req); + blk_mq_end_request(req, status); + } + +@@ -467,7 +472,7 @@ void nvme_complete_batch_req(struct request *req) + { + trace_nvme_complete_rq(req); + nvme_cleanup_cmd(req); +- nvme_end_req_zoned(req); ++ __nvme_end_req(req); + } + EXPORT_SYMBOL_GPL(nvme_complete_batch_req); + +-- +2.43.0 + diff --git a/queue-6.8/nvme-multipath-fix-io-accounting-on-failover.patch b/queue-6.8/nvme-multipath-fix-io-accounting-on-failover.patch new file mode 100644 index 00000000000..768dd67511c --- /dev/null +++ b/queue-6.8/nvme-multipath-fix-io-accounting-on-failover.patch @@ -0,0 +1,66 @@ +From 619788938c94a974f09e449de3c639469d28a4a5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 21 May 2024 11:02:28 -0700 +Subject: nvme-multipath: fix io accounting on failover + +From: Keith Busch + +[ Upstream commit a2e4c5f5f68dbd206f132bc709b98dea64afc3b8 ] + +There are io stats accounting that needs to be handled, so don't call +blk_mq_end_request() directly. Use the existing nvme_end_req() helper +that already handles everything. + +Fixes: d4d957b53d91ee ("nvme-multipath: support io stats on the mpath device") +Reviewed-by: Christoph Hellwig +Reviewed-by: Sagi Grimberg +Signed-off-by: Keith Busch +Signed-off-by: Sasha Levin +--- + drivers/nvme/host/core.c | 2 +- + drivers/nvme/host/multipath.c | 3 ++- + drivers/nvme/host/nvme.h | 1 + + 3 files changed, 4 insertions(+), 2 deletions(-) + +diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c +index fcf7ce19caea4..2f51c4a978589 100644 +--- a/drivers/nvme/host/core.c ++++ b/drivers/nvme/host/core.c +@@ -413,7 +413,7 @@ static inline void __nvme_end_req(struct request *req) + nvme_mpath_end_request(req); + } + +-static inline void nvme_end_req(struct request *req) ++void nvme_end_req(struct request *req) + { + blk_status_t status = nvme_error_status(nvme_req(req)->status); + +diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c +index 75386d3e0f981..e157c7983d466 100644 +--- a/drivers/nvme/host/multipath.c ++++ b/drivers/nvme/host/multipath.c +@@ -118,7 +118,8 @@ void nvme_failover_req(struct request *req) + blk_steal_bios(&ns->head->requeue_list, req); + spin_unlock_irqrestore(&ns->head->requeue_lock, flags); + +- blk_mq_end_request(req, 0); ++ nvme_req(req)->status = 0; ++ nvme_end_req(req); + kblockd_schedule_work(&ns->head->requeue_work); + } + +diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h +index 2a7bf574284f6..05d807fa0d3da 100644 +--- a/drivers/nvme/host/nvme.h ++++ b/drivers/nvme/host/nvme.h +@@ -766,6 +766,7 @@ static inline bool nvme_state_terminal(struct nvme_ctrl *ctrl) + } + } + ++void nvme_end_req(struct request *req); + void nvme_complete_rq(struct request *req); + void nvme_complete_batch_req(struct request *req); + +-- +2.43.0 + diff --git a/queue-6.8/nvmet-fix-ns-enable-disable-possible-hang.patch b/queue-6.8/nvmet-fix-ns-enable-disable-possible-hang.patch new file mode 100644 index 00000000000..309900b15ea --- /dev/null +++ b/queue-6.8/nvmet-fix-ns-enable-disable-possible-hang.patch @@ -0,0 +1,59 @@ +From 6e2dbfc93c551596374d4a9b5b70493e11ee2f28 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 21 May 2024 23:20:28 +0300 +Subject: nvmet: fix ns enable/disable possible hang + +From: Sagi Grimberg + +[ Upstream commit f97914e35fd98b2b18fb8a092e0a0799f73afdfe ] + +When disabling an nvmet namespace, there is a period where the +subsys->lock is released, as the ns disable waits for backend IO to +complete, and the ns percpu ref to be properly killed. The original +intent was to avoid taking the subsystem lock for a prolong period as +other processes may need to acquire it (for example new incoming +connections). + +However, it opens up a window where another process may come in and +enable the ns, (re)intiailizing the ns percpu_ref, causing the disable +sequence to hang. + +Solve this by taking the global nvmet_config_sem over the entire configfs +enable/disable sequence. + +Fixes: a07b4970f464 ("nvmet: add a generic NVMe target") +Signed-off-by: Sagi Grimberg +Reviewed-by: Christoph Hellwig +Reviewed-by: Chaitanya Kulkarni +Signed-off-by: Keith Busch +Signed-off-by: Sasha Levin +--- + drivers/nvme/target/configfs.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c +index 3ef6bc655661d..303b49c604bd7 100644 +--- a/drivers/nvme/target/configfs.c ++++ b/drivers/nvme/target/configfs.c +@@ -650,10 +650,18 @@ static ssize_t nvmet_ns_enable_store(struct config_item *item, + if (kstrtobool(page, &enable)) + return -EINVAL; + ++ /* ++ * take a global nvmet_config_sem because the disable routine has a ++ * window where it releases the subsys-lock, giving a chance to ++ * a parallel enable to concurrently execute causing the disable to ++ * have a misaccounting of the ns percpu_ref. ++ */ ++ down_write(&nvmet_config_sem); + if (enable) + ret = nvmet_ns_enable(ns); + else + nvmet_ns_disable(ns); ++ up_write(&nvmet_config_sem); + + return ret ? ret : count; + } +-- +2.43.0 + diff --git a/queue-6.8/octeontx2-pf-free-send-queue-buffers-incase-of-leaf-.patch b/queue-6.8/octeontx2-pf-free-send-queue-buffers-incase-of-leaf-.patch new file mode 100644 index 00000000000..98d0acd16f5 --- /dev/null +++ b/queue-6.8/octeontx2-pf-free-send-queue-buffers-incase-of-leaf-.patch @@ -0,0 +1,63 @@ +From dad40c052583e2678e701a557d0983541efeb7b8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 23 May 2024 13:06:26 +0530 +Subject: Octeontx2-pf: Free send queue buffers incase of leaf to inner + +From: Hariprasad Kelam + +[ Upstream commit 1684842147677a1279bcff95f8adb6de9a656e30 ] + +There are two type of classes. "Leaf classes" that are the +bottom of the class hierarchy. "Inner classes" that are neither +the root class nor leaf classes. QoS rules can only specify leaf +classes as targets for traffic. + + Root + / \ + / \ + 1 2 + /\ + / \ + 4 5 + classes 1,4 and 5 are leaf classes. + class 2 is a inner class. + +When a leaf class made as inner, or vice versa, resources associated +with send queue (send queue buffers and transmit schedulers) are not +getting freed. + +Fixes: 5e6808b4c68d ("octeontx2-pf: Add support for HTB offload") +Signed-off-by: Hariprasad Kelam +Link: https://lore.kernel.org/r/20240523073626.4114-1-hkelam@marvell.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/marvell/octeontx2/nic/qos.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c +index 1723e9912ae07..6cddb4da85b71 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c ++++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c +@@ -1407,7 +1407,10 @@ static int otx2_qos_leaf_to_inner(struct otx2_nic *pfvf, u16 classid, + otx2_qos_read_txschq_cfg(pfvf, node, old_cfg); + + /* delete the txschq nodes allocated for this node */ ++ otx2_qos_disable_sq(pfvf, qid); ++ otx2_qos_free_hw_node_schq(pfvf, node); + otx2_qos_free_sw_node_schq(pfvf, node); ++ pfvf->qos.qid_to_sqmap[qid] = OTX2_QOS_INVALID_SQ; + + /* mark this node as htb inner node */ + WRITE_ONCE(node->qid, OTX2_QOS_QID_INNER); +@@ -1554,6 +1557,7 @@ static int otx2_qos_leaf_del_last(struct otx2_nic *pfvf, u16 classid, bool force + dwrr_del_node = true; + + /* destroy the leaf node */ ++ otx2_qos_disable_sq(pfvf, qid); + otx2_qos_destroy_node(pfvf, node); + pfvf->qos.qid_to_sqmap[qid] = OTX2_QOS_INVALID_SQ; + +-- +2.43.0 + diff --git a/queue-6.8/powerpc-pseries-lparcfg-drop-error-message-from-gues.patch b/queue-6.8/powerpc-pseries-lparcfg-drop-error-message-from-gues.patch new file mode 100644 index 00000000000..19aaa9e7f8e --- /dev/null +++ b/queue-6.8/powerpc-pseries-lparcfg-drop-error-message-from-gues.patch @@ -0,0 +1,41 @@ +From 78b4194d20b8eece3bcdbb05e6f09933ccd568c1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 24 May 2024 14:29:54 -0500 +Subject: powerpc/pseries/lparcfg: drop error message from guest name lookup + +From: Nathan Lynch + +[ Upstream commit 12870ae3818e39ea65bf710f645972277b634f72 ] + +It's not an error or exceptional situation when the hosting +environment does not expose a name for the LP/guest via RTAS or the +device tree. This happens with qemu when run without the '-name' +option. The message also lacks a newline. Remove it. + +Signed-off-by: Nathan Lynch +Fixes: eddaa9a40275 ("powerpc/pseries: read the lpar name from the firmware") +Signed-off-by: Michael Ellerman +Link: https://msgid.link/20240524-lparcfg-updates-v2-1-62e2e9d28724@linux.ibm.com +Signed-off-by: Sasha Levin +--- + arch/powerpc/platforms/pseries/lparcfg.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c +index f04bfea1a97bd..11d5208817b9d 100644 +--- a/arch/powerpc/platforms/pseries/lparcfg.c ++++ b/arch/powerpc/platforms/pseries/lparcfg.c +@@ -357,8 +357,8 @@ static int read_dt_lpar_name(struct seq_file *m) + + static void read_lpar_name(struct seq_file *m) + { +- if (read_rtas_lpar_name(m) && read_dt_lpar_name(m)) +- pr_err_once("Error can't get the LPAR name"); ++ if (read_rtas_lpar_name(m)) ++ read_dt_lpar_name(m); + } + + #define SPLPAR_MAXLENGTH 1026*(sizeof(char)) +-- +2.43.0 + diff --git a/queue-6.8/powerpc-uaccess-use-yz-asm-constraint-for-ld.patch b/queue-6.8/powerpc-uaccess-use-yz-asm-constraint-for-ld.patch new file mode 100644 index 00000000000..d5ace9717ea --- /dev/null +++ b/queue-6.8/powerpc-uaccess-use-yz-asm-constraint-for-ld.patch @@ -0,0 +1,64 @@ +From afd9d483281107afc704e7da3df914f1d8662a56 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 29 May 2024 22:30:29 +1000 +Subject: powerpc/uaccess: Use YZ asm constraint for ld + +From: Michael Ellerman + +[ Upstream commit 50934945d54238d2d6d8db4b7c1d4c90d2696c57 ] + +The 'ld' instruction requires a 4-byte aligned displacement because it +is a DS-form instruction. But the "m" asm constraint doesn't enforce +that. + +Add a special case of __get_user_asm2_goto() so that the "YZ" constraint +can be used for "ld". + +The "Z" constraint is documented in the GCC manual PowerPC machine +constraints, and specifies a "memory operand accessed with indexed or +indirect addressing". "Y" is not documented in the manual but specifies +a "memory operand for a DS-form instruction". Using both allows the +compiler to generate a DS-form "ld" or X-form "ldx" as appropriate. + +The change has to be conditional on CONFIG_PPC_KERNEL_PREFIXED because +the "Y" constraint does not guarantee 4-byte alignment when prefixed +instructions are enabled. + +No build errors have been reported due to this, but the possibility is +there depending on compiler code generation decisions. + +Fixes: c20beffeec3c ("powerpc/uaccess: Use flexible addressing with __put_user()/__get_user()") +Signed-off-by: Michael Ellerman +Link: https://msgid.link/20240529123029.146953-2-mpe@ellerman.id.au +Signed-off-by: Sasha Levin +--- + arch/powerpc/include/asm/uaccess.h | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h +index de10437fd2065..ac4f6e4ae5810 100644 +--- a/arch/powerpc/include/asm/uaccess.h ++++ b/arch/powerpc/include/asm/uaccess.h +@@ -165,8 +165,19 @@ do { \ + #endif + + #ifdef __powerpc64__ ++#ifdef CONFIG_PPC_KERNEL_PREFIXED + #define __get_user_asm2_goto(x, addr, label) \ + __get_user_asm_goto(x, addr, label, "ld") ++#else ++#define __get_user_asm2_goto(x, addr, label) \ ++ asm_goto_output( \ ++ "1: ld%U1%X1 %0, %1 # get_user\n" \ ++ EX_TABLE(1b, %l2) \ ++ : "=r" (x) \ ++ : DS_FORM_CONSTRAINT (*addr) \ ++ : \ ++ : label) ++#endif // CONFIG_PPC_KERNEL_PREFIXED + #else /* __powerpc64__ */ + #define __get_user_asm2_goto(x, addr, label) \ + asm_goto_output( \ +-- +2.43.0 + diff --git a/queue-6.8/riscv-prevent-pt_regs-corruption-for-secondary-idle-.patch b/queue-6.8/riscv-prevent-pt_regs-corruption-for-secondary-idle-.patch new file mode 100644 index 00000000000..9d36274d616 --- /dev/null +++ b/queue-6.8/riscv-prevent-pt_regs-corruption-for-secondary-idle-.patch @@ -0,0 +1,63 @@ +From 08951e58f14bac2d5a1a31a8ed78a9f537343938 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 23 May 2024 11:43:23 +0300 +Subject: riscv: prevent pt_regs corruption for secondary idle threads + +From: Sergey Matyukevich + +[ Upstream commit a638b0461b58aa3205cd9d5f14d6f703d795b4af ] + +Top of the kernel thread stack should be reserved for pt_regs. However +this is not the case for the idle threads of the secondary boot harts. +Their stacks overlap with their pt_regs, so both may get corrupted. + +Similar issue has been fixed for the primary hart, see c7cdd96eca28 +("riscv: prevent stack corruption by reserving task_pt_regs(p) early"). +However that fix was not propagated to the secondary harts. The problem +has been noticed in some CPU hotplug tests with V enabled. The function +smp_callin stored several registers on stack, corrupting top of pt_regs +structure including status field. As a result, kernel attempted to save +or restore inexistent V context. + +Fixes: 9a2451f18663 ("RISC-V: Avoid using per cpu array for ordered booting") +Fixes: 2875fe056156 ("RISC-V: Add cpu_ops and modify default booting method") +Signed-off-by: Sergey Matyukevich +Reviewed-by: Alexandre Ghiti +Link: https://lore.kernel.org/r/20240523084327.2013211-1-geomatsi@gmail.com +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + arch/riscv/kernel/cpu_ops_sbi.c | 2 +- + arch/riscv/kernel/cpu_ops_spinwait.c | 3 +-- + 2 files changed, 2 insertions(+), 3 deletions(-) + +diff --git a/arch/riscv/kernel/cpu_ops_sbi.c b/arch/riscv/kernel/cpu_ops_sbi.c +index 1cc7df740eddc..e6fbaaf549562 100644 +--- a/arch/riscv/kernel/cpu_ops_sbi.c ++++ b/arch/riscv/kernel/cpu_ops_sbi.c +@@ -72,7 +72,7 @@ static int sbi_cpu_start(unsigned int cpuid, struct task_struct *tidle) + /* Make sure tidle is updated */ + smp_mb(); + bdata->task_ptr = tidle; +- bdata->stack_ptr = task_stack_page(tidle) + THREAD_SIZE; ++ bdata->stack_ptr = task_pt_regs(tidle); + /* Make sure boot data is updated */ + smp_mb(); + hsm_data = __pa(bdata); +diff --git a/arch/riscv/kernel/cpu_ops_spinwait.c b/arch/riscv/kernel/cpu_ops_spinwait.c +index 613872b0a21ac..24869eb889085 100644 +--- a/arch/riscv/kernel/cpu_ops_spinwait.c ++++ b/arch/riscv/kernel/cpu_ops_spinwait.c +@@ -34,8 +34,7 @@ static void cpu_update_secondary_bootdata(unsigned int cpuid, + + /* Make sure tidle is updated */ + smp_mb(); +- WRITE_ONCE(__cpu_spinwait_stack_pointer[hartid], +- task_stack_page(tidle) + THREAD_SIZE); ++ WRITE_ONCE(__cpu_spinwait_stack_pointer[hartid], task_pt_regs(tidle)); + WRITE_ONCE(__cpu_spinwait_task_pointer[hartid], tidle); + } + +-- +2.43.0 + diff --git a/queue-6.8/selftests-mptcp-add-ms-units-for-tc-netem-delay.patch b/queue-6.8/selftests-mptcp-add-ms-units-for-tc-netem-delay.patch new file mode 100644 index 00000000000..d15d9a1928e --- /dev/null +++ b/queue-6.8/selftests-mptcp-add-ms-units-for-tc-netem-delay.patch @@ -0,0 +1,66 @@ +From 2055a7b6f7581e8e00fce65d61a052a1821f4027 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 5 Apr 2024 12:52:06 +0200 +Subject: selftests: mptcp: add ms units for tc-netem delay + +From: Geliang Tang + +[ Upstream commit 9109853a388b7b2b934f56f4ddb250d72e486555 ] + +'delay 1' in tc-netem is confusing, not sure if it's a delay of 1 second or +1 millisecond. This patch explicitly adds millisecond units to make these +commands clearer. + +Signed-off-by: Geliang Tang +Reviewed-by: Matthieu Baerts (NGI0) +Signed-off-by: Matthieu Baerts (NGI0) +Signed-off-by: David S. Miller +Stable-dep-of: 38af56e6668b ("selftests: mptcp: join: mark 'fail' tests as flaky") +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/net/mptcp/mptcp_join.sh | 6 +++--- + tools/testing/selftests/net/mptcp/simult_flows.sh | 4 ++-- + 2 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh +index 24be952b4d4a1..a33e03da5070d 100755 +--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh ++++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh +@@ -135,8 +135,8 @@ init_shapers() + { + local i + for i in $(seq 1 4); do +- tc -n $ns1 qdisc add dev ns1eth$i root netem rate 20mbit delay 1 +- tc -n $ns2 qdisc add dev ns2eth$i root netem rate 20mbit delay 1 ++ tc -n $ns1 qdisc add dev ns1eth$i root netem rate 20mbit delay 1ms ++ tc -n $ns2 qdisc add dev ns2eth$i root netem rate 20mbit delay 1ms + done + } + +@@ -3279,7 +3279,7 @@ fail_tests() + + # multiple subflows + if reset_with_fail "MP_FAIL MP_RST" 2; then +- tc -n $ns2 qdisc add dev ns2eth1 root netem rate 1mbit delay 5 ++ tc -n $ns2 qdisc add dev ns2eth1 root netem rate 1mbit delay 5ms + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow +diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh +index f5d094836067b..9831d7655b987 100755 +--- a/tools/testing/selftests/net/mptcp/simult_flows.sh ++++ b/tools/testing/selftests/net/mptcp/simult_flows.sh +@@ -218,8 +218,8 @@ run_test() + shift 4 + local msg=$* + +- [ $delay1 -gt 0 ] && delay1="delay $delay1" || delay1="" +- [ $delay2 -gt 0 ] && delay2="delay $delay2" || delay2="" ++ [ $delay1 -gt 0 ] && delay1="delay ${delay1}ms" || delay1="" ++ [ $delay2 -gt 0 ] && delay2="delay ${delay2}ms" || delay2="" + + for dev in ns1eth1 ns1eth2; do + tc -n $ns1 qdisc del dev $dev root >/dev/null 2>&1 +-- +2.43.0 + diff --git a/queue-6.8/selftests-mptcp-join-mark-fail-tests-as-flaky.patch b/queue-6.8/selftests-mptcp-join-mark-fail-tests-as-flaky.patch new file mode 100644 index 00000000000..fad3adc0f13 --- /dev/null +++ b/queue-6.8/selftests-mptcp-join-mark-fail-tests-as-flaky.patch @@ -0,0 +1,51 @@ +From 7aa071ef54821269ad76b47d4d257c8c9f9b9d24 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 24 May 2024 18:30:59 +0200 +Subject: selftests: mptcp: join: mark 'fail' tests as flaky + +From: Matthieu Baerts (NGI0) + +[ Upstream commit 38af56e6668b455f7dd0a8e2d9afe74100068e17 ] + +These tests are rarely unstable. It depends on the CI running the tests, +especially if it is also busy doing other tasks in parallel, and if a +debug kernel config is being used. + +It looks like this issue is sometimes present with the NetDev CI. While +this is being investigated, the tests are marked as flaky not to create +noises on such CIs. + +Fixes: b6e074e171bc ("selftests: mptcp: add infinite map testcase") +Link: https://github.com/multipath-tcp/mptcp_net-next/issues/491 +Reviewed-by: Mat Martineau +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://lore.kernel.org/r/20240524-upstream-net-20240524-selftests-mptcp-flaky-v1-4-a352362f3f8e@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/net/mptcp/mptcp_join.sh | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh +index a33e03da5070d..1324ef532d99f 100755 +--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh ++++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh +@@ -3271,6 +3271,7 @@ fail_tests() + { + # single subflow + if reset_with_fail "Infinite map" 1; then ++ MPTCP_LIB_SUBTEST_FLAKY=1 + test_linkfail=128 \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 +1 +0 1 0 1 "$(pedit_action_pkts)" +@@ -3279,6 +3280,7 @@ fail_tests() + + # multiple subflows + if reset_with_fail "MP_FAIL MP_RST" 2; then ++ MPTCP_LIB_SUBTEST_FLAKY=1 + tc -n $ns2 qdisc add dev ns2eth1 root netem rate 1mbit delay 5ms + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 +-- +2.43.0 + diff --git a/queue-6.8/selftests-mptcp-simult-flows-mark-unbalanced-tests-a.patch b/queue-6.8/selftests-mptcp-simult-flows-mark-unbalanced-tests-a.patch new file mode 100644 index 00000000000..0350cea3a5e --- /dev/null +++ b/queue-6.8/selftests-mptcp-simult-flows-mark-unbalanced-tests-a.patch @@ -0,0 +1,68 @@ +From 1283f5bcb5c29f47ea2cdf1db137e5fd8e266e28 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 24 May 2024 18:30:57 +0200 +Subject: selftests: mptcp: simult flows: mark 'unbalanced' tests as flaky + +From: Matthieu Baerts (NGI0) + +[ Upstream commit cc73a6577ae64247898269d138dee6b73ff710cc ] + +These tests are flaky since their introduction. This might be less or +not visible depending on the CI running the tests, especially if it is +also busy doing other tasks in parallel. + +A first analysis shown that the transfer can be slowed down when there +are some re-injections at the MPTCP level. Such re-injections can of +course happen, and disturb the transfer, but it looks strange to have +them in this lab. That could be caused by the kernel having access to +less CPU cycles -- e.g. when other activities are executed in parallel +-- or by a misinterpretation on the MPTCP packet scheduler side. + +While this is being investigated, the tests are marked as flaky not to +create noises in other CIs. + +Fixes: 219d04992b68 ("mptcp: push pending frames when subflow has free space") +Link: https://github.com/multipath-tcp/mptcp_net-next/issues/475 +Reviewed-by: Mat Martineau +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://lore.kernel.org/r/20240524-upstream-net-20240524-selftests-mptcp-flaky-v1-2-a352362f3f8e@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/net/mptcp/simult_flows.sh | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh +index 8f9ddb3ad4fe8..f5d094836067b 100755 +--- a/tools/testing/selftests/net/mptcp/simult_flows.sh ++++ b/tools/testing/selftests/net/mptcp/simult_flows.sh +@@ -245,7 +245,7 @@ run_test() + do_transfer $small $large $time + lret=$? + mptcp_lib_result_code "${lret}" "${msg}" +- if [ $lret -ne 0 ]; then ++ if [ $lret -ne 0 ] && ! mptcp_lib_subtest_is_flaky; then + ret=$lret + [ $bail -eq 0 ] || exit $ret + fi +@@ -255,7 +255,7 @@ run_test() + do_transfer $large $small $time + lret=$? + mptcp_lib_result_code "${lret}" "${msg}" +- if [ $lret -ne 0 ]; then ++ if [ $lret -ne 0 ] && ! mptcp_lib_subtest_is_flaky; then + ret=$lret + [ $bail -eq 0 ] || exit $ret + fi +@@ -288,7 +288,7 @@ run_test 10 10 0 0 "balanced bwidth" + run_test 10 10 1 25 "balanced bwidth with unbalanced delay" + + # we still need some additional infrastructure to pass the following test-cases +-run_test 10 3 0 0 "unbalanced bwidth" ++MPTCP_LIB_SUBTEST_FLAKY=1 run_test 10 3 0 0 "unbalanced bwidth" + run_test 10 3 1 25 "unbalanced bwidth with unbalanced delay" + run_test 10 3 25 1 "unbalanced bwidth with opposed, unbalanced delay" + +-- +2.43.0 + diff --git a/queue-6.8/series b/queue-6.8/series index 56de229cf60..0858ffc1ccd 100644 --- a/queue-6.8/series +++ b/queue-6.8/series @@ -256,3 +256,80 @@ cifs-set-zero_point-in-the-copy_file_range-and-remap.patch cifs-fix-missing-set-of-remote_i_size.patch tracing-probes-fix-error-check-in-parse_btf_field.patch tpm_tis_spi-account-for-spi-header-when-allocating-t.patch +netfilter-nfnetlink_queue-acquire-rcu_read_lock-in-i.patch +netfilter-ipset-add-list-flush-to-cancel_gc.patch +netfilter-nft_payload-restore-vlan-q-in-q-match-supp.patch +spi-don-t-mark-message-dma-mapped-when-no-transfer-i.patch +dma-mapping-benchmark-fix-up-kthread-related-error-h.patch +dma-mapping-benchmark-fix-node-id-validation.patch +dma-mapping-benchmark-handle-numa_no_node-correctly.patch +nvme-fix-multipath-batched-completion-accounting.patch +nvme-multipath-fix-io-accounting-on-failover.patch +nvmet-fix-ns-enable-disable-possible-hang.patch +drm-amd-display-enable-colorspace-property-for-mst-c.patch +net-phy-micrel-set-soft_reset-callback-to-genphy_sof.patch +net-mlx5-lag-do-bond-only-if-slaves-agree-on-roce-st.patch +net-mlx5-fix-mtmp-register-capability-offset-in-mcam.patch +net-mlx5-use-mlx5_ipsec_rx_status_destroy-to-correct.patch +net-mlx5e-fix-ipsec-tunnel-mode-offload-feature-chec.patch +net-mlx5e-use-rx_missed_errors-instead-of-rx_dropped.patch +net-mlx5e-fix-udp-gso-for-encapsulated-packets.patch +dma-buf-sw-sync-don-t-enable-irq-from-sync_print_obj.patch +bpf-fix-potential-integer-overflow-in-resolve_btfids.patch +netkit-fix-setting-mac-address-in-l2-mode.patch +netkit-fix-pkt_type-override-upon-netkit-pass-verdic.patch +alsa-jack-use-guard-for-locking.patch +alsa-core-remove-debugfs-at-disconnection.patch +alsa-hda-realtek-adjust-g814jzr-to-use-spi-init-for-.patch +enic-validate-length-of-nl-attributes-in-enic_set_vf.patch +af_unix-annotate-data-race-around-unix_sk-sk-addr.patch +af_unix-read-sk-sk_hash-under-bindlock-during-bind.patch +octeontx2-pf-free-send-queue-buffers-incase-of-leaf-.patch +net-usb-smsc95xx-fix-changing-led_sel-bit-value-upda.patch +asoc-cs42l43-only-restrict-44.1khz-for-the-asp.patch +bpf-allow-delete-from-sockmap-sockhash-only-if-updat.patch +tcp-reduce-accepted-window-in-new_syn_recv-state.patch +net-fec-add-fec_enet_deinit.patch +net-micrel-fix-lan8841_config_intr-after-getting-out.patch +idpf-don-t-enable-napi-and-interrupts-prior-to-alloc.patch +ice-fix-accounting-if-a-vlan-already-exists.patch +selftests-mptcp-simult-flows-mark-unbalanced-tests-a.patch +selftests-mptcp-add-ms-units-for-tc-netem-delay.patch +selftests-mptcp-join-mark-fail-tests-as-flaky.patch +drm-xe-add-dbg-messages-on-the-suspend-resume-functi.patch +drm-xe-check-pcode-init-status-only-on-root-gt-of-ro.patch +drm-xe-change-pcode-timeout-to-50msec-while-polling-.patch +drm-xe-only-use-reserved-bcs-instances-for-usm-migra.patch +alsa-seq-fix-missing-bank-setup-between-midi1-midi2-.patch +alsa-seq-don-t-clear-bank-selection-at-event-ump-mid.patch +net-ti-icssg-prueth-fix-start-counter-for-ft1-filter.patch +netfilter-nft_payload-skbuff-vlan-metadata-mangle-su.patch +netfilter-tproxy-bail-out-if-ip-has-been-disabled-on.patch +netfilter-nft_fib-allow-from-forward-input-without-i.patch +net-sched-taprio-make-q-picos_per_byte-available-to-.patch +net-sched-taprio-extend-minimum-interval-restriction.patch +kconfig-fix-comparison-to-constant-symbols-m-n.patch +drm-i915-guc-avoid-field_prep-warning.patch +drm-i915-gt-fix-ccs-id-s-calculation-for-ccs-mode-se.patch +kheaders-use-command-v-to-test-for-existence-of-cpio.patch +spi-stm32-don-t-warn-about-spurious-interrupts.patch +drm-amdgpu-adjust-logic-in-amdgpu_device_partner_ban.patch +ipv6-introduce-dst_rt6_info-helper.patch +inet-introduce-dst_rtable-helper.patch +net-fix-__dst_negative_advice-race.patch +net-dsa-microchip-fix-rgmii-error-in-ksz-dsa-driver.patch +e1000e-move-force-smbus-near-the-end-of-enable_ulp-f.patch +ice-fix-200g-phy-types-to-link-speed-mapping.patch +net-ena-reduce-lines-with-longer-column-width-bounda.patch +net-ena-fix-redundant-device-numa-node-override.patch +ipvlan-dont-use-skb-sk-in-ipvlan_process_v-4-6-_outb.patch +alsa-seq-fix-yet-another-spot-for-system-message-con.patch +powerpc-pseries-lparcfg-drop-error-message-from-gues.patch +powerpc-uaccess-use-yz-asm-constraint-for-ld.patch +drm-panel-sitronix-st7789v-fix-timing-for-jt240mhqs_.patch +drm-panel-sitronix-st7789v-tweak-timing-for-jt240mhq.patch +drm-panel-sitronix-st7789v-fix-display-size-for-jt24.patch +hwmon-intel-m10-bmc-hwmon-fix-multiplier-for-n6000-b.patch +hwmon-shtc1-fix-property-misspelling.patch +riscv-prevent-pt_regs-corruption-for-secondary-idle-.patch +alsa-seq-ump-fix-swapped-song-position-pointer-data.patch diff --git a/queue-6.8/spi-don-t-mark-message-dma-mapped-when-no-transfer-i.patch b/queue-6.8/spi-don-t-mark-message-dma-mapped-when-no-transfer-i.patch new file mode 100644 index 00000000000..a15e3cc5aa9 --- /dev/null +++ b/queue-6.8/spi-don-t-mark-message-dma-mapped-when-no-transfer-i.patch @@ -0,0 +1,48 @@ +From 5a023bf3dd5293b2ed8226db1f2dfa1389f60dd9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 22 May 2024 20:09:49 +0300 +Subject: spi: Don't mark message DMA mapped when no transfer in it is + +From: Andy Shevchenko + +[ Upstream commit 9f788ba457b45b0ce422943fcec9fa35c4587764 ] + +There is no need to set the DMA mapped flag of the message if it has +no mapped transfers. Moreover, it may give the code a chance to take +the wrong paths, i.e. to exercise DMA related APIs on unmapped data. +Make __spi_map_msg() to bail earlier on the above mentioned cases. + +Fixes: 99adef310f68 ("spi: Provide core support for DMA mapping transfers") +Signed-off-by: Andy Shevchenko +Link: https://msgid.link/r/20240522171018.3362521-2-andriy.shevchenko@linux.intel.com +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + drivers/spi/spi.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c +index a7194f29c2007..8bb0e6cb5a7b3 100644 +--- a/drivers/spi/spi.c ++++ b/drivers/spi/spi.c +@@ -1222,6 +1222,7 @@ static int __spi_map_msg(struct spi_controller *ctlr, struct spi_message *msg) + else + rx_dev = ctlr->dev.parent; + ++ ret = -ENOMSG; + list_for_each_entry(xfer, &msg->transfers, transfer_list) { + /* The sync is done before each transfer. */ + unsigned long attrs = DMA_ATTR_SKIP_CPU_SYNC; +@@ -1251,6 +1252,9 @@ static int __spi_map_msg(struct spi_controller *ctlr, struct spi_message *msg) + } + } + } ++ /* No transfer has been mapped, bail out with success */ ++ if (ret) ++ return 0; + + ctlr->cur_rx_dma_dev = rx_dev; + ctlr->cur_tx_dma_dev = tx_dev; +-- +2.43.0 + diff --git a/queue-6.8/spi-stm32-don-t-warn-about-spurious-interrupts.patch b/queue-6.8/spi-stm32-don-t-warn-about-spurious-interrupts.patch new file mode 100644 index 00000000000..b699c38aca3 --- /dev/null +++ b/queue-6.8/spi-stm32-don-t-warn-about-spurious-interrupts.patch @@ -0,0 +1,43 @@ +From 44730535ea991c2a581eedcda9d8a9b4e7bc4135 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 21 May 2024 12:52:42 +0200 +Subject: spi: stm32: Don't warn about spurious interrupts +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Uwe Kleine-König + +[ Upstream commit 95d7c452a26564ef0c427f2806761b857106d8c4 ] + +The dev_warn to notify about a spurious interrupt was introduced with +the reasoning that these are unexpected. However spurious interrupts +tend to trigger continously and the error message on the serial console +prevents that the core's detection of spurious interrupts kicks in +(which disables the irq) and just floods the console. + +Fixes: c64e7efe46b7 ("spi: stm32: make spurious and overrun interrupts visible") +Signed-off-by: Uwe Kleine-König +Link: https://msgid.link/r/20240521105241.62400-2-u.kleine-koenig@pengutronix.de +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + drivers/spi/spi-stm32.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c +index e61302ef3c21a..43712f793ff0e 100644 +--- a/drivers/spi/spi-stm32.c ++++ b/drivers/spi/spi-stm32.c +@@ -1057,7 +1057,7 @@ static irqreturn_t stm32h7_spi_irq_thread(int irq, void *dev_id) + mask |= STM32H7_SPI_SR_TXP | STM32H7_SPI_SR_RXP; + + if (!(sr & mask)) { +- dev_warn(spi->dev, "spurious IT (sr=0x%08x, ier=0x%08x)\n", ++ dev_vdbg(spi->dev, "spurious IT (sr=0x%08x, ier=0x%08x)\n", + sr, ier); + spin_unlock_irqrestore(&spi->lock, flags); + return IRQ_NONE; +-- +2.43.0 + diff --git a/queue-6.8/tcp-reduce-accepted-window-in-new_syn_recv-state.patch b/queue-6.8/tcp-reduce-accepted-window-in-new_syn_recv-state.patch new file mode 100644 index 00000000000..eb000502f0e --- /dev/null +++ b/queue-6.8/tcp-reduce-accepted-window-in-new_syn_recv-state.patch @@ -0,0 +1,121 @@ +From 6f58554ba4277ffe93c17f9850b6a247a48244ac Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 23 May 2024 13:05:27 +0000 +Subject: tcp: reduce accepted window in NEW_SYN_RECV state + +From: Eric Dumazet + +[ Upstream commit f4dca95fc0f6350918f2e6727e35b41f7f86fcce ] + +Jason commit made checks against ACK sequence less strict +and can be exploited by attackers to establish spoofed flows +with less probes. + +Innocent users might use tcp_rmem[1] == 1,000,000,000, +or something more reasonable. + +An attacker can use a regular TCP connection to learn the server +initial tp->rcv_wnd, and use it to optimize the attack. + +If we make sure that only the announced window (smaller than 65535) +is used for ACK validation, we force an attacker to use +65537 packets to complete the 3WHS (assuming server ISN is unknown) + +Fixes: 378979e94e95 ("tcp: remove 64 KByte limit for initial tp->rcv_wnd value") +Link: https://datatracker.ietf.org/meeting/119/materials/slides-119-tcpm-ghost-acks-00 +Signed-off-by: Eric Dumazet +Acked-by: Neal Cardwell +Reviewed-by: Jason Xing +Link: https://lore.kernel.org/r/20240523130528.60376-1-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + include/net/request_sock.h | 12 ++++++++++++ + net/ipv4/tcp_ipv4.c | 7 +------ + net/ipv4/tcp_minisocks.c | 7 +++++-- + net/ipv6/tcp_ipv6.c | 7 +------ + 4 files changed, 19 insertions(+), 14 deletions(-) + +diff --git a/include/net/request_sock.h b/include/net/request_sock.h +index 144c39db9898a..b79b7c9011ebc 100644 +--- a/include/net/request_sock.h ++++ b/include/net/request_sock.h +@@ -238,4 +238,16 @@ static inline int reqsk_queue_len_young(const struct request_sock_queue *queue) + return atomic_read(&queue->young); + } + ++/* RFC 7323 2.3 Using the Window Scale Option ++ * The window field (SEG.WND) of every outgoing segment, with the ++ * exception of segments, MUST be right-shifted by ++ * Rcv.Wind.Shift bits. ++ * ++ * This means the SEG.WND carried in SYNACK can not exceed 65535. ++ * We use this property to harden TCP stack while in NEW_SYN_RECV state. ++ */ ++static inline u32 tcp_synack_window(const struct request_sock *req) ++{ ++ return min(req->rsk_rcv_wnd, 65535U); ++} + #endif /* _REQUEST_SOCK_H */ +diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c +index abd47159d7e4d..0e4b5553ce927 100644 +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -1143,14 +1143,9 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, + #endif + } + +- /* RFC 7323 2.3 +- * The window field (SEG.WND) of every outgoing segment, with the +- * exception of segments, MUST be right-shifted by +- * Rcv.Wind.Shift bits: +- */ + tcp_v4_send_ack(sk, skb, seq, + tcp_rsk(req)->rcv_nxt, +- req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, ++ tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale, + tcp_rsk_tsval(tcp_rsk(req)), + READ_ONCE(req->ts_recent), + 0, &key, +diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c +index 0ecc7311dc6ce..cb4e253d511f8 100644 +--- a/net/ipv4/tcp_minisocks.c ++++ b/net/ipv4/tcp_minisocks.c +@@ -783,8 +783,11 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, + + /* RFC793: "first check sequence number". */ + +- if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, +- tcp_rsk(req)->rcv_nxt, tcp_rsk(req)->rcv_nxt + req->rsk_rcv_wnd)) { ++ if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, ++ TCP_SKB_CB(skb)->end_seq, ++ tcp_rsk(req)->rcv_nxt, ++ tcp_rsk(req)->rcv_nxt + ++ tcp_synack_window(req))) { + /* Out of window: send ACK and drop. */ + if (!(flg & TCP_FLAG_RST) && + !tcp_oow_rate_limited(sock_net(sk), skb, +diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c +index 57b25b1fc9d9d..012adcfcabeb5 100644 +--- a/net/ipv6/tcp_ipv6.c ++++ b/net/ipv6/tcp_ipv6.c +@@ -1267,15 +1267,10 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, + /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV + * sk->sk_state == TCP_SYN_RECV -> for Fast Open. + */ +- /* RFC 7323 2.3 +- * The window field (SEG.WND) of every outgoing segment, with the +- * exception of segments, MUST be right-shifted by +- * Rcv.Wind.Shift bits: +- */ + tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? + tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, + tcp_rsk(req)->rcv_nxt, +- req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, ++ tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale, + tcp_rsk_tsval(tcp_rsk(req)), + READ_ONCE(req->ts_recent), sk->sk_bound_dev_if, + &key, ipv6_get_dsfield(ipv6_hdr(skb)), 0, +-- +2.43.0 +